aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Kconfig5
-rw-r--r--fs/9p/vfs_inode_dotl.c11
-rw-r--r--fs/Kconfig17
-rw-r--r--fs/binfmt_flat.c8
-rw-r--r--fs/block_dev.c45
-rw-r--r--fs/btrfs/acl.c5
-rw-r--r--fs/btrfs/extent-tree.c37
-rw-r--r--fs/btrfs/extent_io.c1
-rw-r--r--fs/btrfs/ioctl.c24
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/ceph/addr.c12
-rw-r--r--fs/ceph/caps.c91
-rw-r--r--fs/ceph/dir.c7
-rw-r--r--fs/ceph/export.c25
-rw-r--r--fs/ceph/file.c5
-rw-r--r--fs/ceph/inode.c7
-rw-r--r--fs/ceph/mds_client.c9
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/ceph/super.h4
-rw-r--r--fs/ceph/xattr.c12
-rw-r--r--fs/cifs/Kconfig35
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/README12
-rw-r--r--fs/cifs/cifs_debug.c2
-rw-r--r--fs/cifs/cifs_fs_sb.h4
-rw-r--r--fs/cifs/cifs_unicode.c14
-rw-r--r--fs/cifs/cifs_unicode.h3
-rw-r--r--fs/cifs/cifsacl.c483
-rw-r--r--fs/cifs/cifsacl.h25
-rw-r--r--fs/cifs/cifsencrypt.c12
-rw-r--r--fs/cifs/cifsfs.c119
-rw-r--r--fs/cifs/cifsfs.h20
-rw-r--r--fs/cifs/cifsglob.h10
-rw-r--r--fs/cifs/cifspdu.h37
-rw-r--r--fs/cifs/cifsproto.h30
-rw-r--r--fs/cifs/cifssmb.c377
-rw-r--r--fs/cifs/connect.c402
-rw-r--r--fs/cifs/export.c4
-rw-r--r--fs/cifs/file.c167
-rw-r--r--fs/cifs/inode.c129
-rw-r--r--fs/cifs/misc.c12
-rw-r--r--fs/cifs/netmisc.c7
-rw-r--r--fs/cifs/sess.c28
-rw-r--r--fs/cifs/smbdes.c418
-rw-r--r--fs/cifs/smbencrypt.c124
-rw-r--r--fs/cifs/transport.c66
-rw-r--r--fs/cifs/xattr.c20
-rw-r--r--fs/compat.c235
-rw-r--r--fs/configfs/dir.c39
-rw-r--r--fs/dcache.c9
-rw-r--r--fs/debugfs/file.c19
-rw-r--r--fs/dlm/config.c9
-rw-r--r--fs/dlm/config.h1
-rw-r--r--fs/dlm/dlm_internal.h3
-rw-r--r--fs/dlm/lock.c182
-rw-r--r--fs/dlm/lock.h1
-rw-r--r--fs/dlm/lockspace.c6
-rw-r--r--fs/dlm/plock.c65
-rw-r--r--fs/dlm/user.c1
-rw-r--r--fs/drop_caches.c5
-rw-r--r--fs/exec.c139
-rw-r--r--fs/ext2/super.c3
-rw-r--r--fs/ext3/namei.c80
-rw-r--r--fs/fat/cache.c7
-rw-r--r--fs/fat/dir.c32
-rw-r--r--fs/fat/fat.h15
-rw-r--r--fs/fat/fatent.c4
-rw-r--r--fs/fat/inode.c74
-rw-r--r--fs/fat/misc.c44
-rw-r--r--fs/fat/namei_msdos.c4
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/freevxfs/vxfs_inode.c2
-rw-r--r--fs/fscache/operation.c10
-rw-r--r--fs/fscache/page.c13
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/gfs2/Makefile4
-rw-r--r--fs/gfs2/aops.c8
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/dir.c197
-rw-r--r--fs/gfs2/dir.h4
-rw-r--r--fs/gfs2/export.c2
-rw-r--r--fs/gfs2/file.c46
-rw-r--r--fs/gfs2/glock.c99
-rw-r--r--fs/gfs2/glock.h3
-rw-r--r--fs/gfs2/glops.c172
-rw-r--r--fs/gfs2/glops.h2
-rw-r--r--fs/gfs2/incore.h8
-rw-r--r--fs/gfs2/inode.c1510
-rw-r--r--fs/gfs2/inode.h8
-rw-r--r--fs/gfs2/log.c208
-rw-r--r--fs/gfs2/log.h2
-rw-r--r--fs/gfs2/lops.c39
-rw-r--r--fs/gfs2/main.c1
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/meta_io.h2
-rw-r--r--fs/gfs2/ops_fstype.c32
-rw-r--r--fs/gfs2/ops_inode.c1344
-rw-r--r--fs/gfs2/quota.c12
-rw-r--r--fs/gfs2/quota.h4
-rw-r--r--fs/gfs2/rgrp.c24
-rw-r--r--fs/gfs2/super.c138
-rw-r--r--fs/gfs2/sys.c6
-rw-r--r--fs/gfs2/trace_gfs2.h38
-rw-r--r--fs/hpfs/Kconfig1
-rw-r--r--fs/hpfs/alloc.c118
-rw-r--r--fs/hpfs/anode.c138
-rw-r--r--fs/hpfs/buffer.c24
-rw-r--r--fs/hpfs/dir.c22
-rw-r--r--fs/hpfs/dnode.c174
-rw-r--r--fs/hpfs/ea.c136
-rw-r--r--fs/hpfs/file.c31
-rw-r--r--fs/hpfs/hpfs.h439
-rw-r--r--fs/hpfs/hpfs_fn.h80
-rw-r--r--fs/hpfs/inode.c47
-rw-r--r--fs/hpfs/map.c56
-rw-r--r--fs/hpfs/name.c33
-rw-r--r--fs/hpfs/namei.c106
-rw-r--r--fs/hpfs/super.c118
-rw-r--r--fs/hugetlbfs/inode.c7
-rw-r--r--fs/inode.c10
-rw-r--r--fs/jbd/commit.c15
-rw-r--r--fs/jbd/journal.c16
-rw-r--r--fs/jbd/transaction.c3
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/logfs/dev_bdev.c1
-rw-r--r--fs/logfs/readwrite.c2
-rw-r--r--fs/logfs/super.c8
-rw-r--r--fs/mbcache.c10
-rw-r--r--fs/namei.c4
-rw-r--r--fs/ncpfs/inode.c4
-rw-r--r--fs/nfs/dir.c5
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs4filelayout.c27
-rw-r--r--fs/nfs/nfs4filelayout.h2
-rw-r--r--fs/nfs/nfs4filelayoutdev.c34
-rw-r--r--fs/nfs/nfs4proc.c6
-rw-r--r--fs/nfs/pnfs.c34
-rw-r--r--fs/nfs/pnfs.h6
-rw-r--r--fs/nfs/read.c4
-rw-r--r--fs/nfs/write.c4
-rw-r--r--fs/nfsd/stats.c2
-rw-r--r--fs/nilfs2/alloc.c14
-rw-r--r--fs/nilfs2/bmap.c4
-rw-r--r--fs/nilfs2/btnode.c19
-rw-r--r--fs/nilfs2/btnode.h4
-rw-r--r--fs/nilfs2/btree.c38
-rw-r--r--fs/nilfs2/cpfile.c24
-rw-r--r--fs/nilfs2/dat.c4
-rw-r--r--fs/nilfs2/file.c1
-rw-r--r--fs/nilfs2/gcinode.c25
-rw-r--r--fs/nilfs2/ifile.c4
-rw-r--r--fs/nilfs2/inode.c23
-rw-r--r--fs/nilfs2/ioctl.c61
-rw-r--r--fs/nilfs2/mdt.c8
-rw-r--r--fs/nilfs2/mdt.h9
-rw-r--r--fs/nilfs2/nilfs.h7
-rw-r--r--fs/nilfs2/page.c79
-rw-r--r--fs/nilfs2/page.h7
-rw-r--r--fs/nilfs2/recovery.c12
-rw-r--r--fs/nilfs2/segbuf.c17
-rw-r--r--fs/nilfs2/segment.c190
-rw-r--r--fs/nilfs2/segment.h2
-rw-r--r--fs/nilfs2/sufile.c274
-rw-r--r--fs/nilfs2/sufile.h4
-rw-r--r--fs/nilfs2/super.c131
-rw-r--r--fs/nilfs2/the_nilfs.c24
-rw-r--r--fs/nilfs2/the_nilfs.h2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c61
-rw-r--r--fs/ocfs2/dir.c2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c3
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c3
-rw-r--r--fs/ocfs2/file.c12
-rw-r--r--fs/ocfs2/journal.c3
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/partitions/check.c8
-rw-r--r--fs/partitions/efi.c6
-rw-r--r--fs/partitions/ldm.c7
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/base.c20
-rw-r--r--fs/proc/generic.c1
-rw-r--r--fs/proc/inode.c7
-rw-r--r--fs/proc/internal.h26
-rw-r--r--fs/proc/namespaces.c198
-rw-r--r--fs/proc/task_mmu.c218
-rw-r--r--fs/pstore/platform.c12
-rw-r--r--fs/quota/dquot.c5
-rw-r--r--fs/splice.c33
-rw-r--r--fs/squashfs/Kconfig4
-rw-r--r--fs/squashfs/cache.c2
-rw-r--r--fs/super.c3
-rw-r--r--fs/sysfs/file.c12
-rw-r--r--fs/sysfs/group.c6
-rw-r--r--fs/timerfd.c102
-rw-r--r--fs/ubifs/budget.c104
-rw-r--r--fs/ubifs/commit.c2
-rw-r--r--fs/ubifs/debug.c167
-rw-r--r--fs/ubifs/debug.h178
-rw-r--r--fs/ubifs/dir.c4
-rw-r--r--fs/ubifs/file.c28
-rw-r--r--fs/ubifs/find.c10
-rw-r--r--fs/ubifs/gc.c71
-rw-r--r--fs/ubifs/io.c33
-rw-r--r--fs/ubifs/journal.c29
-rw-r--r--fs/ubifs/log.c48
-rw-r--r--fs/ubifs/lprops.c115
-rw-r--r--fs/ubifs/lpt_commit.c55
-rw-r--r--fs/ubifs/master.c8
-rw-r--r--fs/ubifs/misc.h17
-rw-r--r--fs/ubifs/orphan.c3
-rw-r--r--fs/ubifs/recovery.c354
-rw-r--r--fs/ubifs/replay.c468
-rw-r--r--fs/ubifs/sb.c153
-rw-r--r--fs/ubifs/super.c61
-rw-r--r--fs/ubifs/tnc.c10
-rw-r--r--fs/ubifs/tnc_commit.c18
-rw-r--r--fs/ubifs/ubifs-media.h30
-rw-r--r--fs/ubifs/ubifs.h86
-rw-r--r--fs/ubifs/xattr.c8
-rw-r--r--fs/ufs/inode.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c26
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_message.c20
-rw-r--r--fs/xfs/linux-2.6/xfs_message.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c16
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h76
-rw-r--r--fs/xfs/quota/xfs_qm.c6
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_alloc.c844
-rw-r--r--fs/xfs/xfs_alloc.h15
-rw-r--r--fs/xfs/xfs_alloc_btree.c13
-rw-r--r--fs/xfs/xfs_dfrag.c6
-rw-r--r--fs/xfs/xfs_inode.c4
-rw-r--r--fs/xfs/xfs_inode_item.c1
-rw-r--r--fs/xfs/xfs_log.c15
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_cil.c5
-rw-r--r--fs/xfs/xfs_log_priv.h2
-rw-r--r--fs/xfs/xfs_log_recover.c75
-rw-r--r--fs/xfs/xfs_mount.c4
-rw-r--r--fs/xfs/xfs_trans.c6
-rw-r--r--fs/xfs/xfs_trans_ail.c47
-rw-r--r--fs/xfs/xfs_types.h2
247 files changed, 8024 insertions, 6537 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 814ac4e213a8..0a93dc1cb4ac 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -1,6 +1,6 @@
1config 9P_FS 1config 9P_FS
2 tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" 2 tristate "Plan 9 Resource Sharing Support (9P2000)"
3 depends on INET && NET_9P && EXPERIMENTAL 3 depends on INET && NET_9P
4 help 4 help
5 If you say Y here, you will get experimental support for 5 If you say Y here, you will get experimental support for
6 Plan 9 resource sharing via the 9P2000 protocol. 6 Plan 9 resource sharing via the 9P2000 protocol.
@@ -10,7 +10,6 @@ config 9P_FS
10 If unsure, say N. 10 If unsure, say N.
11 11
12if 9P_FS 12if 9P_FS
13
14config 9P_FSCACHE 13config 9P_FSCACHE
15 bool "Enable 9P client caching support (EXPERIMENTAL)" 14 bool "Enable 9P client caching support (EXPERIMENTAL)"
16 depends on EXPERIMENTAL 15 depends on EXPERIMENTAL
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 82a7c38ddad0..691c78f58bef 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -259,7 +259,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
259 if (IS_ERR(inode_fid)) { 259 if (IS_ERR(inode_fid)) {
260 err = PTR_ERR(inode_fid); 260 err = PTR_ERR(inode_fid);
261 mutex_unlock(&v9inode->v_mutex); 261 mutex_unlock(&v9inode->v_mutex);
262 goto error; 262 goto err_clunk_old_fid;
263 } 263 }
264 v9inode->writeback_fid = (void *) inode_fid; 264 v9inode->writeback_fid = (void *) inode_fid;
265 } 265 }
@@ -267,8 +267,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
267 /* Since we are opening a file, assign the open fid to the file */ 267 /* Since we are opening a file, assign the open fid to the file */
268 filp = lookup_instantiate_filp(nd, dentry, generic_file_open); 268 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
269 if (IS_ERR(filp)) { 269 if (IS_ERR(filp)) {
270 p9_client_clunk(ofid); 270 err = PTR_ERR(filp);
271 return PTR_ERR(filp); 271 goto err_clunk_old_fid;
272 } 272 }
273 filp->private_data = ofid; 273 filp->private_data = ofid;
274#ifdef CONFIG_9P_FSCACHE 274#ifdef CONFIG_9P_FSCACHE
@@ -278,10 +278,11 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
278 return 0; 278 return 0;
279 279
280error: 280error:
281 if (ofid)
282 p9_client_clunk(ofid);
283 if (fid) 281 if (fid)
284 p9_client_clunk(fid); 282 p9_client_clunk(fid);
283err_clunk_old_fid:
284 if (ofid)
285 p9_client_clunk(ofid);
285 return err; 286 return err;
286} 287}
287 288
diff --git a/fs/Kconfig b/fs/Kconfig
index efb7d4ec6fcf..19891aab9c6e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -124,6 +124,7 @@ config TMPFS
124config TMPFS_POSIX_ACL 124config TMPFS_POSIX_ACL
125 bool "Tmpfs POSIX Access Control Lists" 125 bool "Tmpfs POSIX Access Control Lists"
126 depends on TMPFS 126 depends on TMPFS
127 select TMPFS_XATTR
127 select GENERIC_ACL 128 select GENERIC_ACL
128 help 129 help
129 POSIX Access Control Lists (ACLs) support permissions for users and 130 POSIX Access Control Lists (ACLs) support permissions for users and
@@ -134,6 +135,22 @@ config TMPFS_POSIX_ACL
134 135
135 If you don't know what Access Control Lists are, say N. 136 If you don't know what Access Control Lists are, say N.
136 137
138config TMPFS_XATTR
139 bool "Tmpfs extended attributes"
140 depends on TMPFS
141 default n
142 help
143 Extended attributes are name:value pairs associated with inodes by
144 the kernel or by users (see the attr(5) manual page, or visit
145 <http://acl.bestbits.at/> for details).
146
147 Currently this enables support for the trusted.* and
148 security.* namespaces.
149
150 You need this for POSIX ACL support on tmpfs.
151
152 If unsure, say N.
153
137config HUGETLBFS 154config HUGETLBFS
138 bool "HugeTLB file system support" 155 bool "HugeTLB file system support"
139 depends on X86 || IA64 || SPARC64 || (S390 && 64BIT) || \ 156 depends on X86 || IA64 || SPARC64 || (S390 && 64BIT) || \
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 397d3057d336..1bffbe0ed778 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -820,6 +820,8 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
820 int res; 820 int res;
821 char buf[16]; 821 char buf[16];
822 822
823 memset(&bprm, 0, sizeof(bprm));
824
823 /* Create the file name */ 825 /* Create the file name */
824 sprintf(buf, "/lib/lib%d.so", id); 826 sprintf(buf, "/lib/lib%d.so", id);
825 827
@@ -835,6 +837,12 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
835 if (!bprm.cred) 837 if (!bprm.cred)
836 goto out; 838 goto out;
837 839
840 /* We don't really care about recalculating credentials at this point
841 * as we're past the point of no return and are dealing with shared
842 * libraries.
843 */
844 bprm.cred_prepared = 1;
845
838 res = prepare_binprm(&bprm); 846 res = prepare_binprm(&bprm);
839 847
840 if (!IS_ERR_VALUE(res)) 848 if (!IS_ERR_VALUE(res))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5147bdd3b8e1..1f2b19978333 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1102,6 +1102,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1102 if (!bdev->bd_part) 1102 if (!bdev->bd_part)
1103 goto out_clear; 1103 goto out_clear;
1104 1104
1105 ret = 0;
1105 if (disk->fops->open) { 1106 if (disk->fops->open) {
1106 ret = disk->fops->open(bdev, mode); 1107 ret = disk->fops->open(bdev, mode);
1107 if (ret == -ERESTARTSYS) { 1108 if (ret == -ERESTARTSYS) {
@@ -1118,18 +1119,26 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1118 put_disk(disk); 1119 put_disk(disk);
1119 goto restart; 1120 goto restart;
1120 } 1121 }
1121 if (ret)
1122 goto out_clear;
1123 } 1122 }
1124 if (!bdev->bd_openers) { 1123
1124 if (!ret && !bdev->bd_openers) {
1125 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); 1125 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1126 bdi = blk_get_backing_dev_info(bdev); 1126 bdi = blk_get_backing_dev_info(bdev);
1127 if (bdi == NULL) 1127 if (bdi == NULL)
1128 bdi = &default_backing_dev_info; 1128 bdi = &default_backing_dev_info;
1129 bdev_inode_switch_bdi(bdev->bd_inode, bdi); 1129 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1130 } 1130 }
1131 if (bdev->bd_invalidated) 1131
1132 /*
1133 * If the device is invalidated, rescan partition
1134 * if open succeeded or failed with -ENOMEDIUM.
1135 * The latter is necessary to prevent ghost
1136 * partitions on a removed medium.
1137 */
1138 if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
1132 rescan_partitions(disk, bdev); 1139 rescan_partitions(disk, bdev);
1140 if (ret)
1141 goto out_clear;
1133 } else { 1142 } else {
1134 struct block_device *whole; 1143 struct block_device *whole;
1135 whole = bdget_disk(disk, 0); 1144 whole = bdget_disk(disk, 0);
@@ -1153,13 +1162,14 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1153 } 1162 }
1154 } else { 1163 } else {
1155 if (bdev->bd_contains == bdev) { 1164 if (bdev->bd_contains == bdev) {
1156 if (bdev->bd_disk->fops->open) { 1165 ret = 0;
1166 if (bdev->bd_disk->fops->open)
1157 ret = bdev->bd_disk->fops->open(bdev, mode); 1167 ret = bdev->bd_disk->fops->open(bdev, mode);
1158 if (ret) 1168 /* the same as first opener case, read comment there */
1159 goto out_unlock_bdev; 1169 if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
1160 }
1161 if (bdev->bd_invalidated)
1162 rescan_partitions(bdev->bd_disk, bdev); 1170 rescan_partitions(bdev->bd_disk, bdev);
1171 if (ret)
1172 goto out_unlock_bdev;
1163 } 1173 }
1164 /* only one opener holds refs to the module and disk */ 1174 /* only one opener holds refs to the module and disk */
1165 module_put(disk->fops->owner); 1175 module_put(disk->fops->owner);
@@ -1228,6 +1238,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1228 res = __blkdev_get(bdev, mode, 0); 1238 res = __blkdev_get(bdev, mode, 0);
1229 1239
1230 if (whole) { 1240 if (whole) {
1241 struct gendisk *disk = whole->bd_disk;
1242
1231 /* finish claiming */ 1243 /* finish claiming */
1232 mutex_lock(&bdev->bd_mutex); 1244 mutex_lock(&bdev->bd_mutex);
1233 spin_lock(&bdev_lock); 1245 spin_lock(&bdev_lock);
@@ -1254,15 +1266,16 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1254 spin_unlock(&bdev_lock); 1266 spin_unlock(&bdev_lock);
1255 1267
1256 /* 1268 /*
1257 * Block event polling for write claims. Any write 1269 * Block event polling for write claims if requested. Any
1258 * holder makes the write_holder state stick until all 1270 * write holder makes the write_holder state stick until
1259 * are released. This is good enough and tracking 1271 * all are released. This is good enough and tracking
1260 * individual writeable reference is too fragile given 1272 * individual writeable reference is too fragile given the
1261 * the way @mode is used in blkdev_get/put(). 1273 * way @mode is used in blkdev_get/put().
1262 */ 1274 */
1263 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { 1275 if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) &&
1276 !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
1264 bdev->bd_write_holder = true; 1277 bdev->bd_write_holder = true;
1265 disk_block_events(bdev->bd_disk); 1278 disk_block_events(disk);
1266 } 1279 }
1267 1280
1268 mutex_unlock(&bdev->bd_mutex); 1281 mutex_unlock(&bdev->bd_mutex);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 5d505aaa72fb..44ea5b92e1ba 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -178,12 +178,13 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
178 178
179 if (value) { 179 if (value) {
180 acl = posix_acl_from_xattr(value, size); 180 acl = posix_acl_from_xattr(value, size);
181 if (IS_ERR(acl))
182 return PTR_ERR(acl);
183
181 if (acl) { 184 if (acl) {
182 ret = posix_acl_valid(acl); 185 ret = posix_acl_valid(acl);
183 if (ret) 186 if (ret)
184 goto out; 187 goto out;
185 } else if (IS_ERR(acl)) {
186 return PTR_ERR(acl);
187 } 188 }
188 } 189 }
189 190
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index cd52f7f556ef..9ee6bd55e16c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8856,23 +8856,38 @@ out:
8856int btrfs_init_space_info(struct btrfs_fs_info *fs_info) 8856int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
8857{ 8857{
8858 struct btrfs_space_info *space_info; 8858 struct btrfs_space_info *space_info;
8859 struct btrfs_super_block *disk_super;
8860 u64 features;
8861 u64 flags;
8862 int mixed = 0;
8859 int ret; 8863 int ret;
8860 8864
8861 ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM, 0, 0, 8865 disk_super = &fs_info->super_copy;
8862 &space_info); 8866 if (!btrfs_super_root(disk_super))
8863 if (ret) 8867 return 1;
8864 return ret;
8865 8868
8866 ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA, 0, 0, 8869 features = btrfs_super_incompat_flags(disk_super);
8867 &space_info); 8870 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
8868 if (ret) 8871 mixed = 1;
8869 return ret;
8870 8872
8871 ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA, 0, 0, 8873 flags = BTRFS_BLOCK_GROUP_SYSTEM;
8872 &space_info); 8874 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
8873 if (ret) 8875 if (ret)
8874 return ret; 8876 goto out;
8875 8877
8878 if (mixed) {
8879 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA;
8880 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
8881 } else {
8882 flags = BTRFS_BLOCK_GROUP_METADATA;
8883 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
8884 if (ret)
8885 goto out;
8886
8887 flags = BTRFS_BLOCK_GROUP_DATA;
8888 ret = update_space_info(fs_info, flags, 0, 0, &space_info);
8889 }
8890out:
8876 return ret; 8891 return ret;
8877} 8892}
8878 8893
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ba41da59e31b..96fcfa522dab 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -10,6 +10,7 @@
10#include <linux/swap.h> 10#include <linux/swap.h>
11#include <linux/writeback.h> 11#include <linux/writeback.h>
12#include <linux/pagevec.h> 12#include <linux/pagevec.h>
13#include <linux/prefetch.h>
13#include "extent_io.h" 14#include "extent_io.h"
14#include "extent_map.h" 15#include "extent_map.h"
15#include "compat.h" 16#include "compat.h"
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ffb48d6c5433..2616f7ed4799 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -81,6 +81,13 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
81 iflags |= FS_NOATIME_FL; 81 iflags |= FS_NOATIME_FL;
82 if (flags & BTRFS_INODE_DIRSYNC) 82 if (flags & BTRFS_INODE_DIRSYNC)
83 iflags |= FS_DIRSYNC_FL; 83 iflags |= FS_DIRSYNC_FL;
84 if (flags & BTRFS_INODE_NODATACOW)
85 iflags |= FS_NOCOW_FL;
86
87 if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS))
88 iflags |= FS_COMPR_FL;
89 else if (flags & BTRFS_INODE_NOCOMPRESS)
90 iflags |= FS_NOCOMP_FL;
84 91
85 return iflags; 92 return iflags;
86} 93}
@@ -144,16 +151,13 @@ static int check_flags(unsigned int flags)
144 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ 151 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
145 FS_NOATIME_FL | FS_NODUMP_FL | \ 152 FS_NOATIME_FL | FS_NODUMP_FL | \
146 FS_SYNC_FL | FS_DIRSYNC_FL | \ 153 FS_SYNC_FL | FS_DIRSYNC_FL | \
147 FS_NOCOMP_FL | FS_COMPR_FL | \ 154 FS_NOCOMP_FL | FS_COMPR_FL |
148 FS_NOCOW_FL | FS_COW_FL)) 155 FS_NOCOW_FL))
149 return -EOPNOTSUPP; 156 return -EOPNOTSUPP;
150 157
151 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) 158 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
152 return -EINVAL; 159 return -EINVAL;
153 160
154 if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL))
155 return -EINVAL;
156
157 return 0; 161 return 0;
158} 162}
159 163
@@ -218,6 +222,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
218 ip->flags |= BTRFS_INODE_DIRSYNC; 222 ip->flags |= BTRFS_INODE_DIRSYNC;
219 else 223 else
220 ip->flags &= ~BTRFS_INODE_DIRSYNC; 224 ip->flags &= ~BTRFS_INODE_DIRSYNC;
225 if (flags & FS_NOCOW_FL)
226 ip->flags |= BTRFS_INODE_NODATACOW;
227 else
228 ip->flags &= ~BTRFS_INODE_NODATACOW;
221 229
222 /* 230 /*
223 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS 231 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
@@ -230,11 +238,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
230 } else if (flags & FS_COMPR_FL) { 238 } else if (flags & FS_COMPR_FL) {
231 ip->flags |= BTRFS_INODE_COMPRESS; 239 ip->flags |= BTRFS_INODE_COMPRESS;
232 ip->flags &= ~BTRFS_INODE_NOCOMPRESS; 240 ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
241 } else {
242 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
233 } 243 }
234 if (flags & FS_NOCOW_FL)
235 ip->flags |= BTRFS_INODE_NODATACOW;
236 else if (flags & FS_COW_FL)
237 ip->flags &= ~BTRFS_INODE_NODATACOW;
238 244
239 trans = btrfs_join_transaction(root, 1); 245 trans = btrfs_join_transaction(root, 1);
240 BUG_ON(IS_ERR(trans)); 246 BUG_ON(IS_ERR(trans));
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 199a80134312..f340f7c99d09 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -709,7 +709,7 @@ again:
709 WARN_ON(cur->checked); 709 WARN_ON(cur->checked);
710 if (!list_empty(&cur->upper)) { 710 if (!list_empty(&cur->upper)) {
711 /* 711 /*
712 * the backref was added previously when processsing 712 * the backref was added previously when processing
713 * backref of type BTRFS_TREE_BLOCK_REF_KEY 713 * backref of type BTRFS_TREE_BLOCK_REF_KEY
714 */ 714 */
715 BUG_ON(!list_is_singular(&cur->upper)); 715 BUG_ON(!list_is_singular(&cur->upper));
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index e159c529fd2b..33da49dc3cc6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -775,6 +775,13 @@ get_more_pages:
775 ci->i_truncate_seq, 775 ci->i_truncate_seq,
776 ci->i_truncate_size, 776 ci->i_truncate_size,
777 &inode->i_mtime, true, 1, 0); 777 &inode->i_mtime, true, 1, 0);
778
779 if (!req) {
780 rc = -ENOMEM;
781 unlock_page(page);
782 break;
783 }
784
778 max_pages = req->r_num_pages; 785 max_pages = req->r_num_pages;
779 786
780 alloc_page_vec(fsc, req); 787 alloc_page_vec(fsc, req);
@@ -841,7 +848,8 @@ get_more_pages:
841 op->payload_len = cpu_to_le32(len); 848 op->payload_len = cpu_to_le32(len);
842 req->r_request->hdr.data_len = cpu_to_le32(len); 849 req->r_request->hdr.data_len = cpu_to_le32(len);
843 850
844 ceph_osdc_start_request(&fsc->client->osdc, req, true); 851 rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
852 BUG_ON(rc);
845 req = NULL; 853 req = NULL;
846 854
847 /* continue? */ 855 /* continue? */
@@ -873,8 +881,6 @@ release_pvec_pages:
873out: 881out:
874 if (req) 882 if (req)
875 ceph_osdc_put_request(req); 883 ceph_osdc_put_request(req);
876 if (rc > 0)
877 rc = 0; /* vfs expects us to return 0 */
878 ceph_put_snap_context(snapc); 884 ceph_put_snap_context(snapc);
879 dout("writepages done, rc = %d\n", rc); 885 dout("writepages done, rc = %d\n", rc);
880 return rc; 886 return rc;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 5323c330bbf3..1f72b00447c4 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -569,7 +569,8 @@ retry:
569 list_add_tail(&cap->session_caps, &session->s_caps); 569 list_add_tail(&cap->session_caps, &session->s_caps);
570 session->s_nr_caps++; 570 session->s_nr_caps++;
571 spin_unlock(&session->s_cap_lock); 571 spin_unlock(&session->s_cap_lock);
572 } 572 } else if (new_cap)
573 ceph_put_cap(mdsc, new_cap);
573 574
574 if (!ci->i_snap_realm) { 575 if (!ci->i_snap_realm) {
575 /* 576 /*
@@ -819,7 +820,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
819 used |= CEPH_CAP_FILE_CACHE; 820 used |= CEPH_CAP_FILE_CACHE;
820 if (ci->i_wr_ref) 821 if (ci->i_wr_ref)
821 used |= CEPH_CAP_FILE_WR; 822 used |= CEPH_CAP_FILE_WR;
822 if (ci->i_wrbuffer_ref) 823 if (ci->i_wb_ref || ci->i_wrbuffer_ref)
823 used |= CEPH_CAP_FILE_BUFFER; 824 used |= CEPH_CAP_FILE_BUFFER;
824 return used; 825 return used;
825} 826}
@@ -1331,10 +1332,11 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
1331} 1332}
1332 1333
1333/* 1334/*
1334 * Mark caps dirty. If inode is newly dirty, add to the global dirty 1335 * Mark caps dirty. If inode is newly dirty, return the dirty flags.
1335 * list. 1336 * Caller is then responsible for calling __mark_inode_dirty with the
1337 * returned flags value.
1336 */ 1338 */
1337void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) 1339int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1338{ 1340{
1339 struct ceph_mds_client *mdsc = 1341 struct ceph_mds_client *mdsc =
1340 ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; 1342 ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
@@ -1357,7 +1359,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1357 list_add(&ci->i_dirty_item, &mdsc->cap_dirty); 1359 list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
1358 spin_unlock(&mdsc->cap_dirty_lock); 1360 spin_unlock(&mdsc->cap_dirty_lock);
1359 if (ci->i_flushing_caps == 0) { 1361 if (ci->i_flushing_caps == 0) {
1360 igrab(inode); 1362 ihold(inode);
1361 dirty |= I_DIRTY_SYNC; 1363 dirty |= I_DIRTY_SYNC;
1362 } 1364 }
1363 } 1365 }
@@ -1365,9 +1367,8 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1365 if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) && 1367 if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
1366 (mask & CEPH_CAP_FILE_BUFFER)) 1368 (mask & CEPH_CAP_FILE_BUFFER))
1367 dirty |= I_DIRTY_DATASYNC; 1369 dirty |= I_DIRTY_DATASYNC;
1368 if (dirty)
1369 __mark_inode_dirty(inode, dirty);
1370 __cap_delay_requeue(mdsc, ci); 1370 __cap_delay_requeue(mdsc, ci);
1371 return dirty;
1371} 1372}
1372 1373
1373/* 1374/*
@@ -1990,11 +1991,11 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
1990 if (got & CEPH_CAP_FILE_WR) 1991 if (got & CEPH_CAP_FILE_WR)
1991 ci->i_wr_ref++; 1992 ci->i_wr_ref++;
1992 if (got & CEPH_CAP_FILE_BUFFER) { 1993 if (got & CEPH_CAP_FILE_BUFFER) {
1993 if (ci->i_wrbuffer_ref == 0) 1994 if (ci->i_wb_ref == 0)
1994 igrab(&ci->vfs_inode); 1995 ihold(&ci->vfs_inode);
1995 ci->i_wrbuffer_ref++; 1996 ci->i_wb_ref++;
1996 dout("__take_cap_refs %p wrbuffer %d -> %d (?)\n", 1997 dout("__take_cap_refs %p wb %d -> %d (?)\n",
1997 &ci->vfs_inode, ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref); 1998 &ci->vfs_inode, ci->i_wb_ref-1, ci->i_wb_ref);
1998 } 1999 }
1999} 2000}
2000 2001
@@ -2169,12 +2170,12 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2169 if (--ci->i_rdcache_ref == 0) 2170 if (--ci->i_rdcache_ref == 0)
2170 last++; 2171 last++;
2171 if (had & CEPH_CAP_FILE_BUFFER) { 2172 if (had & CEPH_CAP_FILE_BUFFER) {
2172 if (--ci->i_wrbuffer_ref == 0) { 2173 if (--ci->i_wb_ref == 0) {
2173 last++; 2174 last++;
2174 put++; 2175 put++;
2175 } 2176 }
2176 dout("put_cap_refs %p wrbuffer %d -> %d (?)\n", 2177 dout("put_cap_refs %p wb %d -> %d (?)\n",
2177 inode, ci->i_wrbuffer_ref+1, ci->i_wrbuffer_ref); 2178 inode, ci->i_wb_ref+1, ci->i_wb_ref);
2178 } 2179 }
2179 if (had & CEPH_CAP_FILE_WR) 2180 if (had & CEPH_CAP_FILE_WR)
2180 if (--ci->i_wr_ref == 0) { 2181 if (--ci->i_wr_ref == 0) {
@@ -2634,6 +2635,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2634 struct ceph_mds_session *session, 2635 struct ceph_mds_session *session,
2635 int *open_target_sessions) 2636 int *open_target_sessions)
2636{ 2637{
2638 struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
2637 struct ceph_inode_info *ci = ceph_inode(inode); 2639 struct ceph_inode_info *ci = ceph_inode(inode);
2638 int mds = session->s_mds; 2640 int mds = session->s_mds;
2639 unsigned mseq = le32_to_cpu(ex->migrate_seq); 2641 unsigned mseq = le32_to_cpu(ex->migrate_seq);
@@ -2670,6 +2672,19 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2670 * export targets, so that we get the matching IMPORT 2672 * export targets, so that we get the matching IMPORT
2671 */ 2673 */
2672 *open_target_sessions = 1; 2674 *open_target_sessions = 1;
2675
2676 /*
2677 * we can't flush dirty caps that we've seen the
2678 * EXPORT but no IMPORT for
2679 */
2680 spin_lock(&mdsc->cap_dirty_lock);
2681 if (!list_empty(&ci->i_dirty_item)) {
2682 dout(" moving %p to cap_dirty_migrating\n",
2683 inode);
2684 list_move(&ci->i_dirty_item,
2685 &mdsc->cap_dirty_migrating);
2686 }
2687 spin_unlock(&mdsc->cap_dirty_lock);
2673 } 2688 }
2674 __ceph_remove_cap(cap); 2689 __ceph_remove_cap(cap);
2675 } 2690 }
@@ -2707,6 +2722,13 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
2707 ci->i_cap_exporting_issued = 0; 2722 ci->i_cap_exporting_issued = 0;
2708 ci->i_cap_exporting_mseq = 0; 2723 ci->i_cap_exporting_mseq = 0;
2709 ci->i_cap_exporting_mds = -1; 2724 ci->i_cap_exporting_mds = -1;
2725
2726 spin_lock(&mdsc->cap_dirty_lock);
2727 if (!list_empty(&ci->i_dirty_item)) {
2728 dout(" moving %p back to cap_dirty\n", inode);
2729 list_move(&ci->i_dirty_item, &mdsc->cap_dirty);
2730 }
2731 spin_unlock(&mdsc->cap_dirty_lock);
2710 } else { 2732 } else {
2711 dout("handle_cap_import inode %p ci %p mds%d mseq %d\n", 2733 dout("handle_cap_import inode %p ci %p mds%d mseq %d\n",
2712 inode, ci, mds, mseq); 2734 inode, ci, mds, mseq);
@@ -2910,38 +2932,16 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
2910 */ 2932 */
2911void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) 2933void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
2912{ 2934{
2913 struct ceph_inode_info *ci, *nci = NULL; 2935 struct ceph_inode_info *ci;
2914 struct inode *inode, *ninode = NULL; 2936 struct inode *inode;
2915 struct list_head *p, *n;
2916 2937
2917 dout("flush_dirty_caps\n"); 2938 dout("flush_dirty_caps\n");
2918 spin_lock(&mdsc->cap_dirty_lock); 2939 spin_lock(&mdsc->cap_dirty_lock);
2919 list_for_each_safe(p, n, &mdsc->cap_dirty) { 2940 while (!list_empty(&mdsc->cap_dirty)) {
2920 if (nci) { 2941 ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info,
2921 ci = nci; 2942 i_dirty_item);
2922 inode = ninode; 2943 inode = igrab(&ci->vfs_inode);
2923 ci->i_ceph_flags &= ~CEPH_I_NOFLUSH; 2944 dout("flush_dirty_caps %p\n", inode);
2924 dout("flush_dirty_caps inode %p (was next inode)\n",
2925 inode);
2926 } else {
2927 ci = list_entry(p, struct ceph_inode_info,
2928 i_dirty_item);
2929 inode = igrab(&ci->vfs_inode);
2930 BUG_ON(!inode);
2931 dout("flush_dirty_caps inode %p\n", inode);
2932 }
2933 if (n != &mdsc->cap_dirty) {
2934 nci = list_entry(n, struct ceph_inode_info,
2935 i_dirty_item);
2936 ninode = igrab(&nci->vfs_inode);
2937 BUG_ON(!ninode);
2938 nci->i_ceph_flags |= CEPH_I_NOFLUSH;
2939 dout("flush_dirty_caps next inode %p, noflush\n",
2940 ninode);
2941 } else {
2942 nci = NULL;
2943 ninode = NULL;
2944 }
2945 spin_unlock(&mdsc->cap_dirty_lock); 2945 spin_unlock(&mdsc->cap_dirty_lock);
2946 if (inode) { 2946 if (inode) {
2947 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, 2947 ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH,
@@ -2951,6 +2951,7 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
2951 spin_lock(&mdsc->cap_dirty_lock); 2951 spin_lock(&mdsc->cap_dirty_lock);
2952 } 2952 }
2953 spin_unlock(&mdsc->cap_dirty_lock); 2953 spin_unlock(&mdsc->cap_dirty_lock);
2954 dout("flush_dirty_caps done\n");
2954} 2955}
2955 2956
2956/* 2957/*
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 1a867a3601ae..33729e822bb9 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -360,7 +360,7 @@ more:
360 rinfo = &fi->last_readdir->r_reply_info; 360 rinfo = &fi->last_readdir->r_reply_info;
361 dout("readdir frag %x num %d off %d chunkoff %d\n", frag, 361 dout("readdir frag %x num %d off %d chunkoff %d\n", frag,
362 rinfo->dir_nr, off, fi->offset); 362 rinfo->dir_nr, off, fi->offset);
363 while (off - fi->offset >= 0 && off - fi->offset < rinfo->dir_nr) { 363 while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) {
364 u64 pos = ceph_make_fpos(frag, off); 364 u64 pos = ceph_make_fpos(frag, off);
365 struct ceph_mds_reply_inode *in = 365 struct ceph_mds_reply_inode *in =
366 rinfo->dir_in[off - fi->offset].in; 366 rinfo->dir_in[off - fi->offset].in;
@@ -1066,16 +1066,17 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
1066 struct inode *inode = file->f_dentry->d_inode; 1066 struct inode *inode = file->f_dentry->d_inode;
1067 struct ceph_inode_info *ci = ceph_inode(inode); 1067 struct ceph_inode_info *ci = ceph_inode(inode);
1068 int left; 1068 int left;
1069 const int bufsize = 1024;
1069 1070
1070 if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) 1071 if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT))
1071 return -EISDIR; 1072 return -EISDIR;
1072 1073
1073 if (!cf->dir_info) { 1074 if (!cf->dir_info) {
1074 cf->dir_info = kmalloc(1024, GFP_NOFS); 1075 cf->dir_info = kmalloc(bufsize, GFP_NOFS);
1075 if (!cf->dir_info) 1076 if (!cf->dir_info)
1076 return -ENOMEM; 1077 return -ENOMEM;
1077 cf->dir_info_len = 1078 cf->dir_info_len =
1078 sprintf(cf->dir_info, 1079 snprintf(cf->dir_info, bufsize,
1079 "entries: %20lld\n" 1080 "entries: %20lld\n"
1080 " files: %20lld\n" 1081 " files: %20lld\n"
1081 " subdirs: %20lld\n" 1082 " subdirs: %20lld\n"
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index e41056174bf8..a610d3d67488 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -86,6 +86,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
86static struct dentry *__fh_to_dentry(struct super_block *sb, 86static struct dentry *__fh_to_dentry(struct super_block *sb,
87 struct ceph_nfs_fh *fh) 87 struct ceph_nfs_fh *fh)
88{ 88{
89 struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
89 struct inode *inode; 90 struct inode *inode;
90 struct dentry *dentry; 91 struct dentry *dentry;
91 struct ceph_vino vino; 92 struct ceph_vino vino;
@@ -95,8 +96,24 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
95 vino.ino = fh->ino; 96 vino.ino = fh->ino;
96 vino.snap = CEPH_NOSNAP; 97 vino.snap = CEPH_NOSNAP;
97 inode = ceph_find_inode(sb, vino); 98 inode = ceph_find_inode(sb, vino);
98 if (!inode) 99 if (!inode) {
99 return ERR_PTR(-ESTALE); 100 struct ceph_mds_request *req;
101
102 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
103 USE_ANY_MDS);
104 if (IS_ERR(req))
105 return ERR_CAST(req);
106
107 req->r_ino1 = vino;
108 req->r_num_caps = 1;
109 err = ceph_mdsc_do_request(mdsc, NULL, req);
110 inode = req->r_target_inode;
111 if (inode)
112 igrab(inode);
113 ceph_mdsc_put_request(req);
114 if (!inode)
115 return ERR_PTR(-ESTALE);
116 }
100 117
101 dentry = d_obtain_alias(inode); 118 dentry = d_obtain_alias(inode);
102 if (IS_ERR(dentry)) { 119 if (IS_ERR(dentry)) {
@@ -148,8 +165,10 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb,
148 snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash); 165 snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash);
149 req->r_num_caps = 1; 166 req->r_num_caps = 1;
150 err = ceph_mdsc_do_request(mdsc, NULL, req); 167 err = ceph_mdsc_do_request(mdsc, NULL, req);
168 inode = req->r_target_inode;
169 if (inode)
170 igrab(inode);
151 ceph_mdsc_put_request(req); 171 ceph_mdsc_put_request(req);
152 inode = ceph_find_inode(sb, vino);
153 if (!inode) 172 if (!inode)
154 return ERR_PTR(err ? err : -ESTALE); 173 return ERR_PTR(err ? err : -ESTALE);
155 } 174 }
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 159b512d5a27..203252d88d9f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -734,9 +734,12 @@ retry_snap:
734 } 734 }
735 } 735 }
736 if (ret >= 0) { 736 if (ret >= 0) {
737 int dirty;
737 spin_lock(&inode->i_lock); 738 spin_lock(&inode->i_lock);
738 __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 739 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
739 spin_unlock(&inode->i_lock); 740 spin_unlock(&inode->i_lock);
741 if (dirty)
742 __mark_inode_dirty(inode, dirty);
740 } 743 }
741 744
742out: 745out:
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index b54c97da1c43..70b6a4839c38 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -355,6 +355,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
355 ci->i_rd_ref = 0; 355 ci->i_rd_ref = 0;
356 ci->i_rdcache_ref = 0; 356 ci->i_rdcache_ref = 0;
357 ci->i_wr_ref = 0; 357 ci->i_wr_ref = 0;
358 ci->i_wb_ref = 0;
358 ci->i_wrbuffer_ref = 0; 359 ci->i_wrbuffer_ref = 0;
359 ci->i_wrbuffer_ref_head = 0; 360 ci->i_wrbuffer_ref_head = 0;
360 ci->i_shared_gen = 0; 361 ci->i_shared_gen = 0;
@@ -1567,6 +1568,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1567 int release = 0, dirtied = 0; 1568 int release = 0, dirtied = 0;
1568 int mask = 0; 1569 int mask = 0;
1569 int err = 0; 1570 int err = 0;
1571 int inode_dirty_flags = 0;
1570 1572
1571 if (ceph_snap(inode) != CEPH_NOSNAP) 1573 if (ceph_snap(inode) != CEPH_NOSNAP)
1572 return -EROFS; 1574 return -EROFS;
@@ -1725,13 +1727,16 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1725 dout("setattr %p ATTR_FILE ... hrm!\n", inode); 1727 dout("setattr %p ATTR_FILE ... hrm!\n", inode);
1726 1728
1727 if (dirtied) { 1729 if (dirtied) {
1728 __ceph_mark_dirty_caps(ci, dirtied); 1730 inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied);
1729 inode->i_ctime = CURRENT_TIME; 1731 inode->i_ctime = CURRENT_TIME;
1730 } 1732 }
1731 1733
1732 release &= issued; 1734 release &= issued;
1733 spin_unlock(&inode->i_lock); 1735 spin_unlock(&inode->i_lock);
1734 1736
1737 if (inode_dirty_flags)
1738 __mark_inode_dirty(inode, inode_dirty_flags);
1739
1735 if (mask) { 1740 if (mask) {
1736 req->r_inode = igrab(inode); 1741 req->r_inode = igrab(inode);
1737 req->r_inode_drop = release; 1742 req->r_inode_drop = release;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f60b07b0feb0..79743d146be6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -578,6 +578,7 @@ static void __register_request(struct ceph_mds_client *mdsc,
578 if (dir) { 578 if (dir) {
579 struct ceph_inode_info *ci = ceph_inode(dir); 579 struct ceph_inode_info *ci = ceph_inode(dir);
580 580
581 ihold(dir);
581 spin_lock(&ci->i_unsafe_lock); 582 spin_lock(&ci->i_unsafe_lock);
582 req->r_unsafe_dir = dir; 583 req->r_unsafe_dir = dir;
583 list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops); 584 list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops);
@@ -598,6 +599,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
598 spin_lock(&ci->i_unsafe_lock); 599 spin_lock(&ci->i_unsafe_lock);
599 list_del_init(&req->r_unsafe_dir_item); 600 list_del_init(&req->r_unsafe_dir_item);
600 spin_unlock(&ci->i_unsafe_lock); 601 spin_unlock(&ci->i_unsafe_lock);
602
603 iput(req->r_unsafe_dir);
604 req->r_unsafe_dir = NULL;
601 } 605 }
602 606
603 ceph_mdsc_put_request(req); 607 ceph_mdsc_put_request(req);
@@ -2691,7 +2695,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2691{ 2695{
2692 struct super_block *sb = mdsc->fsc->sb; 2696 struct super_block *sb = mdsc->fsc->sb;
2693 struct inode *inode; 2697 struct inode *inode;
2694 struct ceph_inode_info *ci;
2695 struct dentry *parent, *dentry; 2698 struct dentry *parent, *dentry;
2696 struct ceph_dentry_info *di; 2699 struct ceph_dentry_info *di;
2697 int mds = session->s_mds; 2700 int mds = session->s_mds;
@@ -2728,7 +2731,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2728 dout("handle_lease no inode %llx\n", vino.ino); 2731 dout("handle_lease no inode %llx\n", vino.ino);
2729 goto release; 2732 goto release;
2730 } 2733 }
2731 ci = ceph_inode(inode);
2732 2734
2733 /* dentry */ 2735 /* dentry */
2734 parent = d_find_alias(inode); 2736 parent = d_find_alias(inode);
@@ -3002,6 +3004,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
3002 spin_lock_init(&mdsc->snap_flush_lock); 3004 spin_lock_init(&mdsc->snap_flush_lock);
3003 mdsc->cap_flush_seq = 0; 3005 mdsc->cap_flush_seq = 0;
3004 INIT_LIST_HEAD(&mdsc->cap_dirty); 3006 INIT_LIST_HEAD(&mdsc->cap_dirty);
3007 INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
3005 mdsc->num_cap_flushing = 0; 3008 mdsc->num_cap_flushing = 0;
3006 spin_lock_init(&mdsc->cap_dirty_lock); 3009 spin_lock_init(&mdsc->cap_dirty_lock);
3007 init_waitqueue_head(&mdsc->cap_flushing_wq); 3010 init_waitqueue_head(&mdsc->cap_flushing_wq);
@@ -3304,8 +3307,8 @@ static void con_put(struct ceph_connection *con)
3304{ 3307{
3305 struct ceph_mds_session *s = con->private; 3308 struct ceph_mds_session *s = con->private;
3306 3309
3310 dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref) - 1);
3307 ceph_put_mds_session(s); 3311 ceph_put_mds_session(s);
3308 dout("mdsc con_put %p (%d)\n", s, atomic_read(&s->s_ref));
3309} 3312}
3310 3313
3311/* 3314/*
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 4e3a9cc0bba6..7d8a0d662d56 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -278,6 +278,7 @@ struct ceph_mds_client {
278 278
279 u64 cap_flush_seq; 279 u64 cap_flush_seq;
280 struct list_head cap_dirty; /* inodes with dirty caps */ 280 struct list_head cap_dirty; /* inodes with dirty caps */
281 struct list_head cap_dirty_migrating; /* ...that are migration... */
281 int num_cap_flushing; /* # caps we are flushing */ 282 int num_cap_flushing; /* # caps we are flushing */
282 spinlock_t cap_dirty_lock; /* protects above items */ 283 spinlock_t cap_dirty_lock; /* protects above items */
283 wait_queue_head_t cap_flushing_wq; 284 wait_queue_head_t cap_flushing_wq;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index e86ec1155f8f..24067d68a554 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -206,7 +206,7 @@ void ceph_put_snap_realm(struct ceph_mds_client *mdsc,
206 up_write(&mdsc->snap_rwsem); 206 up_write(&mdsc->snap_rwsem);
207 } else { 207 } else {
208 spin_lock(&mdsc->snap_empty_lock); 208 spin_lock(&mdsc->snap_empty_lock);
209 list_add(&mdsc->snap_empty, &realm->empty_item); 209 list_add(&realm->empty_item, &mdsc->snap_empty);
210 spin_unlock(&mdsc->snap_empty_lock); 210 spin_unlock(&mdsc->snap_empty_lock);
211 } 211 }
212} 212}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 619fe719968f..f5cabefa98dc 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -293,7 +293,7 @@ struct ceph_inode_info {
293 293
294 /* held references to caps */ 294 /* held references to caps */
295 int i_pin_ref; 295 int i_pin_ref;
296 int i_rd_ref, i_rdcache_ref, i_wr_ref; 296 int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wb_ref;
297 int i_wrbuffer_ref, i_wrbuffer_ref_head; 297 int i_wrbuffer_ref, i_wrbuffer_ref_head;
298 u32 i_shared_gen; /* increment each time we get FILE_SHARED */ 298 u32 i_shared_gen; /* increment each time we get FILE_SHARED */
299 u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ 299 u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */
@@ -506,7 +506,7 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci)
506{ 506{
507 return ci->i_dirty_caps | ci->i_flushing_caps; 507 return ci->i_dirty_caps | ci->i_flushing_caps;
508} 508}
509extern void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask); 509extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask);
510 510
511extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask); 511extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask);
512extern int __ceph_caps_used(struct ceph_inode_info *ci); 512extern int __ceph_caps_used(struct ceph_inode_info *ci);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 8c9eba6ef9df..f2b628696180 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -703,6 +703,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
703 struct ceph_inode_xattr *xattr = NULL; 703 struct ceph_inode_xattr *xattr = NULL;
704 int issued; 704 int issued;
705 int required_blob_size; 705 int required_blob_size;
706 int dirty;
706 707
707 if (ceph_snap(inode) != CEPH_NOSNAP) 708 if (ceph_snap(inode) != CEPH_NOSNAP)
708 return -EROFS; 709 return -EROFS;
@@ -763,11 +764,12 @@ retry:
763 dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); 764 dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
764 err = __set_xattr(ci, newname, name_len, newval, 765 err = __set_xattr(ci, newname, name_len, newval,
765 val_len, 1, 1, 1, &xattr); 766 val_len, 1, 1, 1, &xattr);
766 __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); 767 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
767 ci->i_xattrs.dirty = true; 768 ci->i_xattrs.dirty = true;
768 inode->i_ctime = CURRENT_TIME; 769 inode->i_ctime = CURRENT_TIME;
769 spin_unlock(&inode->i_lock); 770 spin_unlock(&inode->i_lock);
770 771 if (dirty)
772 __mark_inode_dirty(inode, dirty);
771 return err; 773 return err;
772 774
773do_sync: 775do_sync:
@@ -810,6 +812,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
810 struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode); 812 struct ceph_vxattr_cb *vxattrs = ceph_inode_vxattrs(inode);
811 int issued; 813 int issued;
812 int err; 814 int err;
815 int dirty;
813 816
814 if (ceph_snap(inode) != CEPH_NOSNAP) 817 if (ceph_snap(inode) != CEPH_NOSNAP)
815 return -EROFS; 818 return -EROFS;
@@ -833,12 +836,13 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
833 goto do_sync; 836 goto do_sync;
834 837
835 err = __remove_xattr_by_name(ceph_inode(inode), name); 838 err = __remove_xattr_by_name(ceph_inode(inode), name);
836 __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); 839 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
837 ci->i_xattrs.dirty = true; 840 ci->i_xattrs.dirty = true;
838 inode->i_ctime = CURRENT_TIME; 841 inode->i_ctime = CURRENT_TIME;
839 842
840 spin_unlock(&inode->i_lock); 843 spin_unlock(&inode->i_lock);
841 844 if (dirty)
845 __mark_inode_dirty(inode, dirty);
842 return err; 846 return err;
843do_sync: 847do_sync:
844 spin_unlock(&inode->i_lock); 848 spin_unlock(&inode->i_lock);
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 7cb0f7f847e4..75c47cd8d086 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -7,6 +7,7 @@ config CIFS
7 select CRYPTO_MD5 7 select CRYPTO_MD5
8 select CRYPTO_HMAC 8 select CRYPTO_HMAC
9 select CRYPTO_ARC4 9 select CRYPTO_ARC4
10 select CRYPTO_DES
10 help 11 help
11 This is the client VFS module for the Common Internet File System 12 This is the client VFS module for the Common Internet File System
12 (CIFS) protocol which is the successor to the Server Message Block 13 (CIFS) protocol which is the successor to the Server Message Block
@@ -152,16 +153,28 @@ config CIFS_ACL
152 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob 153 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob
153 is handed over to the application/caller. 154 is handed over to the application/caller.
154 155
155config CIFS_EXPERIMENTAL 156config CIFS_SMB2
156 bool "CIFS Experimental Features (EXPERIMENTAL)" 157 bool "SMB2 network file system support (EXPERIMENTAL)"
158 depends on EXPERIMENTAL && INET && BROKEN
159 select NLS
160 select KEYS
161 select FSCACHE
162 select DNS_RESOLVER
163
164 help
165 This enables experimental support for the SMB2 (Server Message Block
166 version 2) protocol. The SMB2 protocol is the successor to the
167 popular CIFS and SMB network file sharing protocols. SMB2 is the
168 native file sharing mechanism for recent versions of Windows
169 operating systems (since Vista). SMB2 enablement will eventually
170 allow users better performance, security and features, than would be
171 possible with cifs. Note that smb2 mount options also are simpler
172 (compared to cifs) due to protocol improvements.
173
174 Unless you are a developer or tester, say N.
175
176config CIFS_NFSD_EXPORT
177 bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)"
157 depends on CIFS && EXPERIMENTAL 178 depends on CIFS && EXPERIMENTAL
158 help 179 help
159 Enables cifs features under testing. These features are 180 Allows NFS server to export a CIFS mounted share (nfsd over cifs)
160 experimental and currently include DFS support and directory
161 change notification ie fcntl(F_DNOTIFY), as well as the upcall
162 mechanism which will be used for Kerberos session negotiation
163 and uid remapping. Some of these features also may depend on
164 setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
165 (which is disabled by default). See the file fs/cifs/README
166 for more details. If unsure, say N.
167
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index d87558448e3d..005d524c3a4a 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -4,7 +4,7 @@
4obj-$(CONFIG_CIFS) += cifs.o 4obj-$(CONFIG_CIFS) += cifs.o
5 5
6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ 6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
7 link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ 7 link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
8 cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ 8 cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
9 readdir.o ioctl.o sess.o export.o 9 readdir.o ioctl.o sess.o export.o
10 10
diff --git a/fs/cifs/README b/fs/cifs/README
index 74ab165fc646..4a3ca0e5ca24 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -704,18 +704,6 @@ the start of smb requests and responses can be enabled via:
704 704
705 echo 1 > /proc/fs/cifs/traceSMB 705 echo 1 > /proc/fs/cifs/traceSMB
706 706
707Two other experimental features are under development. To test these
708requires enabling CONFIG_CIFS_EXPERIMENTAL
709
710 cifsacl support needed to retrieve approximated mode bits based on
711 the contents on the CIFS ACL.
712
713 lease support: cifs will check the oplock state before calling into
714 the vfs to see if we can grant a lease on a file.
715
716 DNOTIFY fcntl: needed for support of directory change
717 notification and perhaps later for file leases)
718
719Per share (per client mount) statistics are available in /proc/fs/cifs/Stats 707Per share (per client mount) statistics are available in /proc/fs/cifs/Stats
720if the kernel was configured with cifs statistics enabled. The statistics 708if the kernel was configured with cifs statistics enabled. The statistics
721represent the number of successful (ie non-zero return code from the server) 709represent the number of successful (ie non-zero return code from the server)
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 30d01bc90855..18f4272d9047 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -63,7 +63,7 @@ void cifs_dump_detail(struct smb_hdr *smb)
63 cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d", 63 cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
64 smb->Command, smb->Status.CifsError, 64 smb->Command, smb->Status.CifsError,
65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid); 65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
66 cERROR(1, "smb buf %p len %d", smb, smbCalcSize_LE(smb)); 66 cERROR(1, "smb buf %p len %d", smb, smbCalcSize(smb));
67} 67}
68 68
69 69
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index ac51cd2d33ae..a9d5692e0c20 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -58,9 +58,7 @@ struct cifs_sb_info {
58 unsigned int mnt_cifs_flags; 58 unsigned int mnt_cifs_flags;
59 int prepathlen; 59 int prepathlen;
60 char *prepath; /* relative path under the share to mount to */ 60 char *prepath; /* relative path under the share to mount to */
61#ifdef CONFIG_CIFS_DFS_UPCALL 61 char *mountdata; /* options received at mount time or via DFS refs */
62 char *mountdata; /* mount options received at mount time */
63#endif
64 struct backing_dev_info bdi; 62 struct backing_dev_info bdi;
65 struct delayed_work prune_tlinks; 63 struct delayed_work prune_tlinks;
66}; 64};
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 23d43cde4306..1b2e180b018d 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -277,6 +277,7 @@ cifsConvertToUCS(__le16 *target, const char *source, int srclen,
277 277
278 for (i = 0, j = 0; i < srclen; j++) { 278 for (i = 0, j = 0; i < srclen; j++) {
279 src_char = source[i]; 279 src_char = source[i];
280 charlen = 1;
280 switch (src_char) { 281 switch (src_char) {
281 case 0: 282 case 0:
282 put_unaligned(0, &target[j]); 283 put_unaligned(0, &target[j]);
@@ -316,16 +317,13 @@ cifsConvertToUCS(__le16 *target, const char *source, int srclen,
316 dst_char = cpu_to_le16(0x003f); 317 dst_char = cpu_to_le16(0x003f);
317 charlen = 1; 318 charlen = 1;
318 } 319 }
319 /*
320 * character may take more than one byte in the source
321 * string, but will take exactly two bytes in the
322 * target string
323 */
324 i += charlen;
325 continue;
326 } 320 }
321 /*
322 * character may take more than one byte in the source string,
323 * but will take exactly two bytes in the target string
324 */
325 i += charlen;
327 put_unaligned(dst_char, &target[j]); 326 put_unaligned(dst_char, &target[j]);
328 i++; /* move to next char in source string */
329 } 327 }
330 328
331ctoUCS_out: 329ctoUCS_out:
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 644dd882a560..6d02fd560566 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -82,6 +82,9 @@ int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *);
82char *cifs_strndup_from_ucs(const char *src, const int maxlen, 82char *cifs_strndup_from_ucs(const char *src, const int maxlen,
83 const bool is_unicode, 83 const bool is_unicode,
84 const struct nls_table *codepage); 84 const struct nls_table *codepage);
85extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
86 const struct nls_table *cp, int mapChars);
87
85#endif 88#endif
86 89
87/* 90/*
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index beeebf194234..f3c6fb9942ac 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -23,24 +23,16 @@
23 23
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/string.h>
27#include <linux/keyctl.h>
28#include <linux/key-type.h>
29#include <keys/user-type.h>
26#include "cifspdu.h" 30#include "cifspdu.h"
27#include "cifsglob.h" 31#include "cifsglob.h"
28#include "cifsacl.h" 32#include "cifsacl.h"
29#include "cifsproto.h" 33#include "cifsproto.h"
30#include "cifs_debug.h" 34#include "cifs_debug.h"
31 35
32
33static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
34 {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
35 {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
36 {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"},
37 {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(18), 0, 0, 0, 0} }, "sys"},
38 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(544), 0, 0, 0} }, "root"},
39 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(545), 0, 0, 0} }, "users"},
40 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(546), 0, 0, 0} }, "guest"} }
41;
42
43
44/* security id for everyone/world system group */ 36/* security id for everyone/world system group */
45static const struct cifs_sid sid_everyone = { 37static const struct cifs_sid sid_everyone = {
46 1, 1, {0, 0, 0, 0, 0, 1}, {0} }; 38 1, 1, {0, 0, 0, 0, 0, 1}, {0} };
@@ -50,50 +42,385 @@ static const struct cifs_sid sid_authusers = {
50/* group users */ 42/* group users */
51static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} }; 43static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
52 44
45const struct cred *root_cred;
53 46
54int match_sid(struct cifs_sid *ctsid) 47static void
48shrink_idmap_tree(struct rb_root *root, int nr_to_scan, int *nr_rem,
49 int *nr_del)
55{ 50{
56 int i, j; 51 struct rb_node *node;
57 int num_subauth, num_sat, num_saw; 52 struct rb_node *tmp;
58 struct cifs_sid *cwsid; 53 struct cifs_sid_id *psidid;
54
55 node = rb_first(root);
56 while (node) {
57 tmp = node;
58 node = rb_next(tmp);
59 psidid = rb_entry(tmp, struct cifs_sid_id, rbnode);
60 if (nr_to_scan == 0 || *nr_del == nr_to_scan)
61 ++(*nr_rem);
62 else {
63 if (time_after(jiffies, psidid->time + SID_MAP_EXPIRE)
64 && psidid->refcount == 0) {
65 rb_erase(tmp, root);
66 ++(*nr_del);
67 } else
68 ++(*nr_rem);
69 }
70 }
71}
72
73/*
74 * Run idmap cache shrinker.
75 */
76static int
77cifs_idmap_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
78{
79 int nr_del = 0;
80 int nr_rem = 0;
81 struct rb_root *root;
82
83 root = &uidtree;
84 spin_lock(&siduidlock);
85 shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del);
86 spin_unlock(&siduidlock);
87
88 root = &gidtree;
89 spin_lock(&sidgidlock);
90 shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del);
91 spin_unlock(&sidgidlock);
92
93 return nr_rem;
94}
95
96static struct shrinker cifs_shrinker = {
97 .shrink = cifs_idmap_shrinker,
98 .seeks = DEFAULT_SEEKS,
99};
100
101static int
102cifs_idmap_key_instantiate(struct key *key, const void *data, size_t datalen)
103{
104 char *payload;
105
106 payload = kmalloc(datalen, GFP_KERNEL);
107 if (!payload)
108 return -ENOMEM;
109
110 memcpy(payload, data, datalen);
111 key->payload.data = payload;
112 return 0;
113}
114
115static inline void
116cifs_idmap_key_destroy(struct key *key)
117{
118 kfree(key->payload.data);
119}
59 120
60 if (!ctsid) 121struct key_type cifs_idmap_key_type = {
61 return -1; 122 .name = "cifs.idmap",
123 .instantiate = cifs_idmap_key_instantiate,
124 .destroy = cifs_idmap_key_destroy,
125 .describe = user_describe,
126 .match = user_match,
127};
128
129static void
130sid_to_str(struct cifs_sid *sidptr, char *sidstr)
131{
132 int i;
133 unsigned long saval;
134 char *strptr;
62 135
63 for (i = 0; i < NUM_WK_SIDS; ++i) { 136 strptr = sidstr;
64 cwsid = &(wksidarr[i].cifssid);
65 137
66 /* compare the revision */ 138 sprintf(strptr, "%s", "S");
67 if (ctsid->revision != cwsid->revision) 139 strptr = sidstr + strlen(sidstr);
68 continue;
69 140
70 /* compare all of the six auth values */ 141 sprintf(strptr, "-%d", sidptr->revision);
71 for (j = 0; j < 6; ++j) { 142 strptr = sidstr + strlen(sidstr);
72 if (ctsid->authority[j] != cwsid->authority[j]) 143
73 break; 144 for (i = 0; i < 6; ++i) {
145 if (sidptr->authority[i]) {
146 sprintf(strptr, "-%d", sidptr->authority[i]);
147 strptr = sidstr + strlen(sidstr);
74 } 148 }
75 if (j < 6) 149 }
76 continue; /* all of the auth values did not match */ 150
77 151 for (i = 0; i < sidptr->num_subauth; ++i) {
78 /* compare all of the subauth values if any */ 152 saval = le32_to_cpu(sidptr->sub_auth[i]);
79 num_sat = ctsid->num_subauth; 153 sprintf(strptr, "-%ld", saval);
80 num_saw = cwsid->num_subauth; 154 strptr = sidstr + strlen(sidstr);
81 num_subauth = num_sat < num_saw ? num_sat : num_saw; 155 }
82 if (num_subauth) { 156}
83 for (j = 0; j < num_subauth; ++j) { 157
84 if (ctsid->sub_auth[j] != cwsid->sub_auth[j]) 158static void
85 break; 159id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
86 } 160 struct cifs_sid_id **psidid, char *typestr)
87 if (j < num_subauth) 161{
88 continue; /* all sub_auth values do not match */ 162 int rc;
163 char *strptr;
164 struct rb_node *node = root->rb_node;
165 struct rb_node *parent = NULL;
166 struct rb_node **linkto = &(root->rb_node);
167 struct cifs_sid_id *lsidid;
168
169 while (node) {
170 lsidid = rb_entry(node, struct cifs_sid_id, rbnode);
171 parent = node;
172 rc = compare_sids(sidptr, &((lsidid)->sid));
173 if (rc > 0) {
174 linkto = &(node->rb_left);
175 node = node->rb_left;
176 } else if (rc < 0) {
177 linkto = &(node->rb_right);
178 node = node->rb_right;
179 }
180 }
181
182 memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid));
183 (*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
184 (*psidid)->refcount = 0;
185
186 sprintf((*psidid)->sidstr, "%s", typestr);
187 strptr = (*psidid)->sidstr + strlen((*psidid)->sidstr);
188 sid_to_str(&(*psidid)->sid, strptr);
189
190 clear_bit(SID_ID_PENDING, &(*psidid)->state);
191 clear_bit(SID_ID_MAPPED, &(*psidid)->state);
192
193 rb_link_node(&(*psidid)->rbnode, parent, linkto);
194 rb_insert_color(&(*psidid)->rbnode, root);
195}
196
197static struct cifs_sid_id *
198id_rb_search(struct rb_root *root, struct cifs_sid *sidptr)
199{
200 int rc;
201 struct rb_node *node = root->rb_node;
202 struct cifs_sid_id *lsidid;
203
204 while (node) {
205 lsidid = rb_entry(node, struct cifs_sid_id, rbnode);
206 rc = compare_sids(sidptr, &((lsidid)->sid));
207 if (rc > 0) {
208 node = node->rb_left;
209 } else if (rc < 0) {
210 node = node->rb_right;
211 } else /* node found */
212 return lsidid;
213 }
214
215 return NULL;
216}
217
218static int
219sidid_pending_wait(void *unused)
220{
221 schedule();
222 return signal_pending(current) ? -ERESTARTSYS : 0;
223}
224
225static int
226sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
227 struct cifs_fattr *fattr, uint sidtype)
228{
229 int rc;
230 unsigned long cid;
231 struct key *idkey;
232 const struct cred *saved_cred;
233 struct cifs_sid_id *psidid, *npsidid;
234 struct rb_root *cidtree;
235 spinlock_t *cidlock;
236
237 if (sidtype == SIDOWNER) {
238 cid = cifs_sb->mnt_uid; /* default uid, in case upcall fails */
239 cidlock = &siduidlock;
240 cidtree = &uidtree;
241 } else if (sidtype == SIDGROUP) {
242 cid = cifs_sb->mnt_gid; /* default gid, in case upcall fails */
243 cidlock = &sidgidlock;
244 cidtree = &gidtree;
245 } else
246 return -ENOENT;
247
248 spin_lock(cidlock);
249 psidid = id_rb_search(cidtree, psid);
250
251 if (!psidid) { /* node does not exist, allocate one & attempt adding */
252 spin_unlock(cidlock);
253 npsidid = kzalloc(sizeof(struct cifs_sid_id), GFP_KERNEL);
254 if (!npsidid)
255 return -ENOMEM;
256
257 npsidid->sidstr = kmalloc(SIDLEN, GFP_KERNEL);
258 if (!npsidid->sidstr) {
259 kfree(npsidid);
260 return -ENOMEM;
261 }
262
263 spin_lock(cidlock);
264 psidid = id_rb_search(cidtree, psid);
265 if (psidid) { /* node happened to get inserted meanwhile */
266 ++psidid->refcount;
267 spin_unlock(cidlock);
268 kfree(npsidid->sidstr);
269 kfree(npsidid);
270 } else {
271 psidid = npsidid;
272 id_rb_insert(cidtree, psid, &psidid,
273 sidtype == SIDOWNER ? "os:" : "gs:");
274 ++psidid->refcount;
275 spin_unlock(cidlock);
89 } 276 }
277 } else {
278 ++psidid->refcount;
279 spin_unlock(cidlock);
280 }
281
282 /*
283 * If we are here, it is safe to access psidid and its fields
284 * since a reference was taken earlier while holding the spinlock.
285 * A reference on the node is put without holding the spinlock
286 * and it is OK to do so in this case, shrinker will not erase
287 * this node until all references are put and we do not access
288 * any fields of the node after a reference is put .
289 */
290 if (test_bit(SID_ID_MAPPED, &psidid->state)) {
291 cid = psidid->id;
292 psidid->time = jiffies; /* update ts for accessing */
293 goto sid_to_id_out;
294 }
90 295
91 cFYI(1, "matching sid: %s\n", wksidarr[i].sidname); 296 if (time_after(psidid->time + SID_MAP_RETRY, jiffies))
92 return 0; /* sids compare/match */ 297 goto sid_to_id_out;
298
299 if (!test_and_set_bit(SID_ID_PENDING, &psidid->state)) {
300 saved_cred = override_creds(root_cred);
301 idkey = request_key(&cifs_idmap_key_type, psidid->sidstr, "");
302 if (IS_ERR(idkey))
303 cFYI(1, "%s: Can't map SID to an id", __func__);
304 else {
305 cid = *(unsigned long *)idkey->payload.value;
306 psidid->id = cid;
307 set_bit(SID_ID_MAPPED, &psidid->state);
308 key_put(idkey);
309 kfree(psidid->sidstr);
310 }
311 revert_creds(saved_cred);
312 psidid->time = jiffies; /* update ts for accessing */
313 clear_bit(SID_ID_PENDING, &psidid->state);
314 wake_up_bit(&psidid->state, SID_ID_PENDING);
315 } else {
316 rc = wait_on_bit(&psidid->state, SID_ID_PENDING,
317 sidid_pending_wait, TASK_INTERRUPTIBLE);
318 if (rc) {
319 cFYI(1, "%s: sidid_pending_wait interrupted %d",
320 __func__, rc);
321 --psidid->refcount; /* decremented without spinlock */
322 return rc;
323 }
324 if (test_bit(SID_ID_MAPPED, &psidid->state))
325 cid = psidid->id;
93 } 326 }
94 327
95 cFYI(1, "No matching sid"); 328sid_to_id_out:
96 return -1; 329 --psidid->refcount; /* decremented without spinlock */
330 if (sidtype == SIDOWNER)
331 fattr->cf_uid = cid;
332 else
333 fattr->cf_gid = cid;
334
335 return 0;
336}
337
338int
339init_cifs_idmap(void)
340{
341 struct cred *cred;
342 struct key *keyring;
343 int ret;
344
345 cFYI(1, "Registering the %s key type\n", cifs_idmap_key_type.name);
346
347 /* create an override credential set with a special thread keyring in
348 * which requests are cached
349 *
350 * this is used to prevent malicious redirections from being installed
351 * with add_key().
352 */
353 cred = prepare_kernel_cred(NULL);
354 if (!cred)
355 return -ENOMEM;
356
357 keyring = key_alloc(&key_type_keyring, ".cifs_idmap", 0, 0, cred,
358 (KEY_POS_ALL & ~KEY_POS_SETATTR) |
359 KEY_USR_VIEW | KEY_USR_READ,
360 KEY_ALLOC_NOT_IN_QUOTA);
361 if (IS_ERR(keyring)) {
362 ret = PTR_ERR(keyring);
363 goto failed_put_cred;
364 }
365
366 ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
367 if (ret < 0)
368 goto failed_put_key;
369
370 ret = register_key_type(&cifs_idmap_key_type);
371 if (ret < 0)
372 goto failed_put_key;
373
374 /* instruct request_key() to use this special keyring as a cache for
375 * the results it looks up */
376 cred->thread_keyring = keyring;
377 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
378 root_cred = cred;
379
380 spin_lock_init(&siduidlock);
381 uidtree = RB_ROOT;
382 spin_lock_init(&sidgidlock);
383 gidtree = RB_ROOT;
384
385 register_shrinker(&cifs_shrinker);
386
387 cFYI(1, "cifs idmap keyring: %d\n", key_serial(keyring));
388 return 0;
389
390failed_put_key:
391 key_put(keyring);
392failed_put_cred:
393 put_cred(cred);
394 return ret;
395}
396
397void
398exit_cifs_idmap(void)
399{
400 key_revoke(root_cred->thread_keyring);
401 unregister_key_type(&cifs_idmap_key_type);
402 put_cred(root_cred);
403 unregister_shrinker(&cifs_shrinker);
404 cFYI(1, "Unregistered %s key type\n", cifs_idmap_key_type.name);
405}
406
407void
408cifs_destroy_idmaptrees(void)
409{
410 struct rb_root *root;
411 struct rb_node *node;
412
413 root = &uidtree;
414 spin_lock(&siduidlock);
415 while ((node = rb_first(root)))
416 rb_erase(node, root);
417 spin_unlock(&siduidlock);
418
419 root = &gidtree;
420 spin_lock(&sidgidlock);
421 while ((node = rb_first(root)))
422 rb_erase(node, root);
423 spin_unlock(&sidgidlock);
97} 424}
98 425
99/* if the two SIDs (roughly equivalent to a UUID for a user or group) are 426/* if the two SIDs (roughly equivalent to a UUID for a user or group) are
@@ -104,16 +431,24 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
104 int num_subauth, num_sat, num_saw; 431 int num_subauth, num_sat, num_saw;
105 432
106 if ((!ctsid) || (!cwsid)) 433 if ((!ctsid) || (!cwsid))
107 return 0; 434 return 1;
108 435
109 /* compare the revision */ 436 /* compare the revision */
110 if (ctsid->revision != cwsid->revision) 437 if (ctsid->revision != cwsid->revision) {
111 return 0; 438 if (ctsid->revision > cwsid->revision)
439 return 1;
440 else
441 return -1;
442 }
112 443
113 /* compare all of the six auth values */ 444 /* compare all of the six auth values */
114 for (i = 0; i < 6; ++i) { 445 for (i = 0; i < 6; ++i) {
115 if (ctsid->authority[i] != cwsid->authority[i]) 446 if (ctsid->authority[i] != cwsid->authority[i]) {
116 return 0; 447 if (ctsid->authority[i] > cwsid->authority[i])
448 return 1;
449 else
450 return -1;
451 }
117 } 452 }
118 453
119 /* compare all of the subauth values if any */ 454 /* compare all of the subauth values if any */
@@ -122,12 +457,16 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
122 num_subauth = num_sat < num_saw ? num_sat : num_saw; 457 num_subauth = num_sat < num_saw ? num_sat : num_saw;
123 if (num_subauth) { 458 if (num_subauth) {
124 for (i = 0; i < num_subauth; ++i) { 459 for (i = 0; i < num_subauth; ++i) {
125 if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) 460 if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) {
126 return 0; 461 if (ctsid->sub_auth[i] > cwsid->sub_auth[i])
462 return 1;
463 else
464 return -1;
465 }
127 } 466 }
128 } 467 }
129 468
130 return 1; /* sids compare/match */ 469 return 0; /* sids compare/match */
131} 470}
132 471
133 472
@@ -382,22 +721,22 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
382#ifdef CONFIG_CIFS_DEBUG2 721#ifdef CONFIG_CIFS_DEBUG2
383 dump_ace(ppace[i], end_of_acl); 722 dump_ace(ppace[i], end_of_acl);
384#endif 723#endif
385 if (compare_sids(&(ppace[i]->sid), pownersid)) 724 if (compare_sids(&(ppace[i]->sid), pownersid) == 0)
386 access_flags_to_mode(ppace[i]->access_req, 725 access_flags_to_mode(ppace[i]->access_req,
387 ppace[i]->type, 726 ppace[i]->type,
388 &fattr->cf_mode, 727 &fattr->cf_mode,
389 &user_mask); 728 &user_mask);
390 if (compare_sids(&(ppace[i]->sid), pgrpsid)) 729 if (compare_sids(&(ppace[i]->sid), pgrpsid) == 0)
391 access_flags_to_mode(ppace[i]->access_req, 730 access_flags_to_mode(ppace[i]->access_req,
392 ppace[i]->type, 731 ppace[i]->type,
393 &fattr->cf_mode, 732 &fattr->cf_mode,
394 &group_mask); 733 &group_mask);
395 if (compare_sids(&(ppace[i]->sid), &sid_everyone)) 734 if (compare_sids(&(ppace[i]->sid), &sid_everyone) == 0)
396 access_flags_to_mode(ppace[i]->access_req, 735 access_flags_to_mode(ppace[i]->access_req,
397 ppace[i]->type, 736 ppace[i]->type,
398 &fattr->cf_mode, 737 &fattr->cf_mode,
399 &other_mask); 738 &other_mask);
400 if (compare_sids(&(ppace[i]->sid), &sid_authusers)) 739 if (compare_sids(&(ppace[i]->sid), &sid_authusers) == 0)
401 access_flags_to_mode(ppace[i]->access_req, 740 access_flags_to_mode(ppace[i]->access_req,
402 ppace[i]->type, 741 ppace[i]->type,
403 &fattr->cf_mode, 742 &fattr->cf_mode,
@@ -475,10 +814,10 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
475 814
476 815
477/* Convert CIFS ACL to POSIX form */ 816/* Convert CIFS ACL to POSIX form */
478static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len, 817static int parse_sec_desc(struct cifs_sb_info *cifs_sb,
479 struct cifs_fattr *fattr) 818 struct cifs_ntsd *pntsd, int acl_len, struct cifs_fattr *fattr)
480{ 819{
481 int rc; 820 int rc = 0;
482 struct cifs_sid *owner_sid_ptr, *group_sid_ptr; 821 struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
483 struct cifs_acl *dacl_ptr; /* no need for SACL ptr */ 822 struct cifs_acl *dacl_ptr; /* no need for SACL ptr */
484 char *end_of_acl = ((char *)pntsd) + acl_len; 823 char *end_of_acl = ((char *)pntsd) + acl_len;
@@ -500,12 +839,26 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
500 le32_to_cpu(pntsd->sacloffset), dacloffset); 839 le32_to_cpu(pntsd->sacloffset), dacloffset);
501/* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */ 840/* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */
502 rc = parse_sid(owner_sid_ptr, end_of_acl); 841 rc = parse_sid(owner_sid_ptr, end_of_acl);
503 if (rc) 842 if (rc) {
843 cFYI(1, "%s: Error %d parsing Owner SID", __func__, rc);
844 return rc;
845 }
846 rc = sid_to_id(cifs_sb, owner_sid_ptr, fattr, SIDOWNER);
847 if (rc) {
848 cFYI(1, "%s: Error %d mapping Owner SID to uid", __func__, rc);
504 return rc; 849 return rc;
850 }
505 851
506 rc = parse_sid(group_sid_ptr, end_of_acl); 852 rc = parse_sid(group_sid_ptr, end_of_acl);
507 if (rc) 853 if (rc) {
854 cFYI(1, "%s: Error %d mapping Owner SID to gid", __func__, rc);
508 return rc; 855 return rc;
856 }
857 rc = sid_to_id(cifs_sb, group_sid_ptr, fattr, SIDGROUP);
858 if (rc) {
859 cFYI(1, "%s: Error %d mapping Group SID to gid", __func__, rc);
860 return rc;
861 }
509 862
510 if (dacloffset) 863 if (dacloffset)
511 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, 864 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr,
@@ -520,7 +873,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
520 memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr, 873 memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr,
521 sizeof(struct cifs_sid)); */ 874 sizeof(struct cifs_sid)); */
522 875
523 return 0; 876 return rc;
524} 877}
525 878
526 879
@@ -688,7 +1041,7 @@ out:
688} 1041}
689 1042
690/* Set an ACL on the server */ 1043/* Set an ACL on the server */
691static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, 1044int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
692 struct inode *inode, const char *path) 1045 struct inode *inode, const char *path)
693{ 1046{
694 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1047 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -727,7 +1080,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
727 rc = PTR_ERR(pntsd); 1080 rc = PTR_ERR(pntsd);
728 cERROR(1, "%s: error %d getting sec desc", __func__, rc); 1081 cERROR(1, "%s: error %d getting sec desc", __func__, rc);
729 } else { 1082 } else {
730 rc = parse_sec_desc(pntsd, acllen, fattr); 1083 rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr);
731 kfree(pntsd); 1084 kfree(pntsd);
732 if (rc) 1085 if (rc)
733 cERROR(1, "parse sec desc failed rc = %d", rc); 1086 cERROR(1, "parse sec desc failed rc = %d", rc);
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index c4ae7d036563..5c902c7ce524 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -39,6 +39,15 @@
39#define ACCESS_ALLOWED 0 39#define ACCESS_ALLOWED 0
40#define ACCESS_DENIED 1 40#define ACCESS_DENIED 1
41 41
42#define SIDOWNER 1
43#define SIDGROUP 2
44#define SIDLEN 150 /* S- 1 revision- 6 authorities- max 5 sub authorities */
45
46#define SID_ID_MAPPED 0
47#define SID_ID_PENDING 1
48#define SID_MAP_EXPIRE (3600 * HZ) /* map entry expires after one hour */
49#define SID_MAP_RETRY (300 * HZ) /* wait 5 minutes for next attempt to map */
50
42struct cifs_ntsd { 51struct cifs_ntsd {
43 __le16 revision; /* revision level */ 52 __le16 revision; /* revision level */
44 __le16 type; 53 __le16 type;
@@ -74,7 +83,21 @@ struct cifs_wksid {
74 char sidname[SIDNAMELENGTH]; 83 char sidname[SIDNAMELENGTH];
75} __attribute__((packed)); 84} __attribute__((packed));
76 85
77extern int match_sid(struct cifs_sid *); 86struct cifs_sid_id {
87 unsigned int refcount; /* increment with spinlock, decrement without */
88 unsigned long id;
89 unsigned long time;
90 unsigned long state;
91 char *sidstr;
92 struct rb_node rbnode;
93 struct cifs_sid sid;
94};
95
96#ifdef __KERNEL__
97extern struct key_type cifs_idmap_key_type;
98extern const struct cred *root_cred;
99#endif /* KERNEL */
100
78extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *); 101extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *);
79 102
80#endif /* _CIFSACL_H */ 103#endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index d1a016be73ba..45c3f78c8f81 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -60,7 +60,7 @@ static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
60 server->session_key.response, server->session_key.len); 60 server->session_key.response, server->session_key.len);
61 61
62 crypto_shash_update(&server->secmech.sdescmd5->shash, 62 crypto_shash_update(&server->secmech.sdescmd5->shash,
63 cifs_pdu->Protocol, cifs_pdu->smb_buf_length); 63 cifs_pdu->Protocol, be32_to_cpu(cifs_pdu->smb_buf_length));
64 64
65 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); 65 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
66 66
@@ -268,10 +268,11 @@ int setup_ntlm_response(struct cifsSesInfo *ses)
268} 268}
269 269
270#ifdef CONFIG_CIFS_WEAK_PW_HASH 270#ifdef CONFIG_CIFS_WEAK_PW_HASH
271void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, 271int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
272 char *lnm_session_key) 272 char *lnm_session_key)
273{ 273{
274 int i; 274 int i;
275 int rc;
275 char password_with_pad[CIFS_ENCPWD_SIZE]; 276 char password_with_pad[CIFS_ENCPWD_SIZE];
276 277
277 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); 278 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
@@ -282,7 +283,7 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
282 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE); 283 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE);
283 memcpy(lnm_session_key, password_with_pad, 284 memcpy(lnm_session_key, password_with_pad,
284 CIFS_ENCPWD_SIZE); 285 CIFS_ENCPWD_SIZE);
285 return; 286 return 0;
286 } 287 }
287 288
288 /* calculate old style session key */ 289 /* calculate old style session key */
@@ -299,10 +300,9 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
299 for (i = 0; i < CIFS_ENCPWD_SIZE; i++) 300 for (i = 0; i < CIFS_ENCPWD_SIZE; i++)
300 password_with_pad[i] = toupper(password_with_pad[i]); 301 password_with_pad[i] = toupper(password_with_pad[i]);
301 302
302 SMBencrypt(password_with_pad, cryptkey, lnm_session_key); 303 rc = SMBencrypt(password_with_pad, cryptkey, lnm_session_key);
303 304
304 /* clear password before we return/free memory */ 305 return rc;
305 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
306} 306}
307#endif /* CIFS_WEAK_PW_HASH */ 307#endif /* CIFS_WEAK_PW_HASH */
308 308
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5c412b33cd7c..493b74ca5648 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -128,29 +128,22 @@ cifs_read_super(struct super_block *sb, void *data,
128 } 128 }
129 cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages; 129 cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
130 130
131#ifdef CONFIG_CIFS_DFS_UPCALL 131 /*
132 /* copy mount params to sb for use in submounts */ 132 * Copy mount params to sb for use in submounts. Better to do
133 /* BB: should we move this after the mount so we 133 * the copy here and deal with the error before cleanup gets
134 * do not have to do the copy on failed mounts? 134 * complicated post-mount.
135 * BB: May be it is better to do simple copy before 135 */
136 * complex operation (mount), and in case of fail
137 * just exit instead of doing mount and attempting
138 * undo it if this copy fails?*/
139 if (data) { 136 if (data) {
140 int len = strlen(data); 137 cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
141 cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL);
142 if (cifs_sb->mountdata == NULL) { 138 if (cifs_sb->mountdata == NULL) {
143 bdi_destroy(&cifs_sb->bdi); 139 bdi_destroy(&cifs_sb->bdi);
144 kfree(sb->s_fs_info); 140 kfree(sb->s_fs_info);
145 sb->s_fs_info = NULL; 141 sb->s_fs_info = NULL;
146 return -ENOMEM; 142 return -ENOMEM;
147 } 143 }
148 strncpy(cifs_sb->mountdata, data, len + 1);
149 cifs_sb->mountdata[len] = '\0';
150 } 144 }
151#endif
152 145
153 rc = cifs_mount(sb, cifs_sb, data, devname); 146 rc = cifs_mount(sb, cifs_sb, devname);
154 147
155 if (rc) { 148 if (rc) {
156 if (!silent) 149 if (!silent)
@@ -163,7 +156,7 @@ cifs_read_super(struct super_block *sb, void *data,
163 sb->s_bdi = &cifs_sb->bdi; 156 sb->s_bdi = &cifs_sb->bdi;
164 sb->s_blocksize = CIFS_MAX_MSGSIZE; 157 sb->s_blocksize = CIFS_MAX_MSGSIZE;
165 sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ 158 sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */
166 inode = cifs_root_iget(sb, ROOT_I); 159 inode = cifs_root_iget(sb);
167 160
168 if (IS_ERR(inode)) { 161 if (IS_ERR(inode)) {
169 rc = PTR_ERR(inode); 162 rc = PTR_ERR(inode);
@@ -184,12 +177,12 @@ cifs_read_super(struct super_block *sb, void *data,
184 else 177 else
185 sb->s_d_op = &cifs_dentry_ops; 178 sb->s_d_op = &cifs_dentry_ops;
186 179
187#ifdef CONFIG_CIFS_EXPERIMENTAL 180#ifdef CIFS_NFSD_EXPORT
188 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { 181 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
189 cFYI(1, "export ops supported"); 182 cFYI(1, "export ops supported");
190 sb->s_export_op = &cifs_export_ops; 183 sb->s_export_op = &cifs_export_ops;
191 } 184 }
192#endif /* EXPERIMENTAL */ 185#endif /* CIFS_NFSD_EXPORT */
193 186
194 return 0; 187 return 0;
195 188
@@ -202,12 +195,10 @@ out_no_root:
202 195
203out_mount_failed: 196out_mount_failed:
204 if (cifs_sb) { 197 if (cifs_sb) {
205#ifdef CONFIG_CIFS_DFS_UPCALL
206 if (cifs_sb->mountdata) { 198 if (cifs_sb->mountdata) {
207 kfree(cifs_sb->mountdata); 199 kfree(cifs_sb->mountdata);
208 cifs_sb->mountdata = NULL; 200 cifs_sb->mountdata = NULL;
209 } 201 }
210#endif
211 unload_nls(cifs_sb->local_nls); 202 unload_nls(cifs_sb->local_nls);
212 bdi_destroy(&cifs_sb->bdi); 203 bdi_destroy(&cifs_sb->bdi);
213 kfree(cifs_sb); 204 kfree(cifs_sb);
@@ -231,12 +222,10 @@ cifs_put_super(struct super_block *sb)
231 rc = cifs_umount(sb, cifs_sb); 222 rc = cifs_umount(sb, cifs_sb);
232 if (rc) 223 if (rc)
233 cERROR(1, "cifs_umount failed with return code %d", rc); 224 cERROR(1, "cifs_umount failed with return code %d", rc);
234#ifdef CONFIG_CIFS_DFS_UPCALL
235 if (cifs_sb->mountdata) { 225 if (cifs_sb->mountdata) {
236 kfree(cifs_sb->mountdata); 226 kfree(cifs_sb->mountdata);
237 cifs_sb->mountdata = NULL; 227 cifs_sb->mountdata = NULL;
238 } 228 }
239#endif
240 229
241 unload_nls(cifs_sb->local_nls); 230 unload_nls(cifs_sb->local_nls);
242 bdi_destroy(&cifs_sb->bdi); 231 bdi_destroy(&cifs_sb->bdi);
@@ -618,16 +607,31 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
618{ 607{
619 /* origin == SEEK_END => we must revalidate the cached file length */ 608 /* origin == SEEK_END => we must revalidate the cached file length */
620 if (origin == SEEK_END) { 609 if (origin == SEEK_END) {
621 int retval; 610 int rc;
622 611 struct inode *inode = file->f_path.dentry->d_inode;
623 /* some applications poll for the file length in this strange 612
624 way so we must seek to end on non-oplocked files by 613 /*
625 setting the revalidate time to zero */ 614 * We need to be sure that all dirty pages are written and the
626 CIFS_I(file->f_path.dentry->d_inode)->time = 0; 615 * server has the newest file length.
627 616 */
628 retval = cifs_revalidate_file(file); 617 if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
629 if (retval < 0) 618 inode->i_mapping->nrpages != 0) {
630 return (loff_t)retval; 619 rc = filemap_fdatawait(inode->i_mapping);
620 if (rc) {
621 mapping_set_error(inode->i_mapping, rc);
622 return rc;
623 }
624 }
625 /*
626 * Some applications poll for the file length in this strange
627 * way so we must seek to end on non-oplocked files by
628 * setting the revalidate time to zero.
629 */
630 CIFS_I(inode)->time = 0;
631
632 rc = cifs_revalidate_file_attr(file);
633 if (rc < 0)
634 return (loff_t)rc;
631 } 635 }
632 return generic_file_llseek_unlocked(file, offset, origin); 636 return generic_file_llseek_unlocked(file, offset, origin);
633} 637}
@@ -760,10 +764,11 @@ const struct file_operations cifs_file_strict_ops = {
760}; 764};
761 765
762const struct file_operations cifs_file_direct_ops = { 766const struct file_operations cifs_file_direct_ops = {
763 /* no aio, no readv - 767 /* BB reevaluate whether they can be done with directio, no cache */
764 BB reevaluate whether they can be done with directio, no cache */ 768 .read = do_sync_read,
765 .read = cifs_user_read, 769 .write = do_sync_write,
766 .write = cifs_user_write, 770 .aio_read = cifs_user_readv,
771 .aio_write = cifs_user_writev,
767 .open = cifs_open, 772 .open = cifs_open,
768 .release = cifs_close, 773 .release = cifs_close,
769 .lock = cifs_lock, 774 .lock = cifs_lock,
@@ -815,10 +820,11 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
815}; 820};
816 821
817const struct file_operations cifs_file_direct_nobrl_ops = { 822const struct file_operations cifs_file_direct_nobrl_ops = {
818 /* no mmap, no aio, no readv - 823 /* BB reevaluate whether they can be done with directio, no cache */
819 BB reevaluate whether they can be done with directio, no cache */ 824 .read = do_sync_read,
820 .read = cifs_user_read, 825 .write = do_sync_write,
821 .write = cifs_user_write, 826 .aio_read = cifs_user_readv,
827 .aio_write = cifs_user_writev,
822 .open = cifs_open, 828 .open = cifs_open,
823 .release = cifs_close, 829 .release = cifs_close,
824 .fsync = cifs_fsync, 830 .fsync = cifs_fsync,
@@ -981,10 +987,10 @@ init_cifs(void)
981 int rc = 0; 987 int rc = 0;
982 cifs_proc_init(); 988 cifs_proc_init();
983 INIT_LIST_HEAD(&cifs_tcp_ses_list); 989 INIT_LIST_HEAD(&cifs_tcp_ses_list);
984#ifdef CONFIG_CIFS_EXPERIMENTAL 990#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
985 INIT_LIST_HEAD(&GlobalDnotifyReqList); 991 INIT_LIST_HEAD(&GlobalDnotifyReqList);
986 INIT_LIST_HEAD(&GlobalDnotifyRsp_Q); 992 INIT_LIST_HEAD(&GlobalDnotifyRsp_Q);
987#endif 993#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
988/* 994/*
989 * Initialize Global counters 995 * Initialize Global counters
990 */ 996 */
@@ -1033,22 +1039,33 @@ init_cifs(void)
1033 if (rc) 1039 if (rc)
1034 goto out_destroy_mids; 1040 goto out_destroy_mids;
1035 1041
1036 rc = register_filesystem(&cifs_fs_type);
1037 if (rc)
1038 goto out_destroy_request_bufs;
1039#ifdef CONFIG_CIFS_UPCALL 1042#ifdef CONFIG_CIFS_UPCALL
1040 rc = register_key_type(&cifs_spnego_key_type); 1043 rc = register_key_type(&cifs_spnego_key_type);
1041 if (rc) 1044 if (rc)
1042 goto out_unregister_filesystem; 1045 goto out_destroy_request_bufs;
1043#endif 1046#endif /* CONFIG_CIFS_UPCALL */
1047
1048#ifdef CONFIG_CIFS_ACL
1049 rc = init_cifs_idmap();
1050 if (rc)
1051 goto out_register_key_type;
1052#endif /* CONFIG_CIFS_ACL */
1053
1054 rc = register_filesystem(&cifs_fs_type);
1055 if (rc)
1056 goto out_init_cifs_idmap;
1044 1057
1045 return 0; 1058 return 0;
1046 1059
1047#ifdef CONFIG_CIFS_UPCALL 1060out_init_cifs_idmap:
1048out_unregister_filesystem: 1061#ifdef CONFIG_CIFS_ACL
1049 unregister_filesystem(&cifs_fs_type); 1062 exit_cifs_idmap();
1063out_register_key_type:
1050#endif 1064#endif
1065#ifdef CONFIG_CIFS_UPCALL
1066 unregister_key_type(&cifs_spnego_key_type);
1051out_destroy_request_bufs: 1067out_destroy_request_bufs:
1068#endif
1052 cifs_destroy_request_bufs(); 1069 cifs_destroy_request_bufs();
1053out_destroy_mids: 1070out_destroy_mids:
1054 cifs_destroy_mids(); 1071 cifs_destroy_mids();
@@ -1070,6 +1087,10 @@ exit_cifs(void)
1070#ifdef CONFIG_CIFS_DFS_UPCALL 1087#ifdef CONFIG_CIFS_DFS_UPCALL
1071 cifs_dfs_release_automount_timer(); 1088 cifs_dfs_release_automount_timer();
1072#endif 1089#endif
1090#ifdef CONFIG_CIFS_ACL
1091 cifs_destroy_idmaptrees();
1092 exit_cifs_idmap();
1093#endif
1073#ifdef CONFIG_CIFS_UPCALL 1094#ifdef CONFIG_CIFS_UPCALL
1074 unregister_key_type(&cifs_spnego_key_type); 1095 unregister_key_type(&cifs_spnego_key_type);
1075#endif 1096#endif
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index a9371b6578c0..64313f778ebf 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -47,7 +47,7 @@ extern void cifs_sb_deactive(struct super_block *sb);
47 47
48/* Functions related to inodes */ 48/* Functions related to inodes */
49extern const struct inode_operations cifs_dir_inode_ops; 49extern const struct inode_operations cifs_dir_inode_ops;
50extern struct inode *cifs_root_iget(struct super_block *, unsigned long); 50extern struct inode *cifs_root_iget(struct super_block *);
51extern int cifs_create(struct inode *, struct dentry *, int, 51extern int cifs_create(struct inode *, struct dentry *, int,
52 struct nameidata *); 52 struct nameidata *);
53extern struct dentry *cifs_lookup(struct inode *, struct dentry *, 53extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
@@ -59,9 +59,11 @@ extern int cifs_mkdir(struct inode *, struct dentry *, int);
59extern int cifs_rmdir(struct inode *, struct dentry *); 59extern int cifs_rmdir(struct inode *, struct dentry *);
60extern int cifs_rename(struct inode *, struct dentry *, struct inode *, 60extern int cifs_rename(struct inode *, struct dentry *, struct inode *,
61 struct dentry *); 61 struct dentry *);
62extern int cifs_revalidate_file_attr(struct file *filp);
63extern int cifs_revalidate_dentry_attr(struct dentry *);
62extern int cifs_revalidate_file(struct file *filp); 64extern int cifs_revalidate_file(struct file *filp);
63extern int cifs_revalidate_dentry(struct dentry *); 65extern int cifs_revalidate_dentry(struct dentry *);
64extern void cifs_invalidate_mapping(struct inode *inode); 66extern int cifs_invalidate_mapping(struct inode *inode);
65extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 67extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
66extern int cifs_setattr(struct dentry *, struct iattr *); 68extern int cifs_setattr(struct dentry *, struct iattr *);
67 69
@@ -80,12 +82,12 @@ extern const struct file_operations cifs_file_strict_nobrl_ops;
80extern int cifs_open(struct inode *inode, struct file *file); 82extern int cifs_open(struct inode *inode, struct file *file);
81extern int cifs_close(struct inode *inode, struct file *file); 83extern int cifs_close(struct inode *inode, struct file *file);
82extern int cifs_closedir(struct inode *inode, struct file *file); 84extern int cifs_closedir(struct inode *inode, struct file *file);
83extern ssize_t cifs_user_read(struct file *file, char __user *read_data, 85extern ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
84 size_t read_size, loff_t *poffset); 86 unsigned long nr_segs, loff_t pos);
85extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, 87extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
86 unsigned long nr_segs, loff_t pos); 88 unsigned long nr_segs, loff_t pos);
87extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, 89extern ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
88 size_t write_size, loff_t *poffset); 90 unsigned long nr_segs, loff_t pos);
89extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, 91extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
90 unsigned long nr_segs, loff_t pos); 92 unsigned long nr_segs, loff_t pos);
91extern int cifs_lock(struct file *, int, struct file_lock *); 93extern int cifs_lock(struct file *, int, struct file_lock *);
@@ -123,9 +125,9 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
123extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); 125extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
124extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); 126extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
125 127
126#ifdef CONFIG_CIFS_EXPERIMENTAL 128#ifdef CIFS_NFSD_EXPORT
127extern const struct export_operations cifs_export_ops; 129extern const struct export_operations cifs_export_ops;
128#endif /* EXPERIMENTAL */ 130#endif /* CIFS_NFSD_EXPORT */
129 131
130#define CIFS_VERSION "1.71" 132#define CIFS_VERSION "1.72"
131#endif /* _CIFSFS_H */ 133#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a5d1106fcbde..76b4517e74b0 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -274,7 +274,8 @@ struct cifsSesInfo {
274 int capabilities; 274 int capabilities;
275 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for 275 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
276 TCP names - will ipv6 and sctp addresses fit? */ 276 TCP names - will ipv6 and sctp addresses fit? */
277 char *user_name; 277 char *user_name; /* must not be null except during init of sess
278 and after mount option parsing we fill it */
278 char *domainName; 279 char *domainName;
279 char *password; 280 char *password;
280 struct session_key auth_key; 281 struct session_key auth_key;
@@ -780,10 +781,12 @@ GLOBAL_EXTERN spinlock_t cifs_tcp_ses_lock;
780 */ 781 */
781GLOBAL_EXTERN spinlock_t cifs_file_list_lock; 782GLOBAL_EXTERN spinlock_t cifs_file_list_lock;
782 783
784#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
783/* Outstanding dir notify requests */ 785/* Outstanding dir notify requests */
784GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; 786GLOBAL_EXTERN struct list_head GlobalDnotifyReqList;
785/* DirNotify response queue */ 787/* DirNotify response queue */
786GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q; 788GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q;
789#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
787 790
788/* 791/*
789 * Global transaction id (XID) information 792 * Global transaction id (XID) information
@@ -830,6 +833,11 @@ GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
830/* reconnect after this many failed echo attempts */ 833/* reconnect after this many failed echo attempts */
831GLOBAL_EXTERN unsigned short echo_retries; 834GLOBAL_EXTERN unsigned short echo_retries;
832 835
836GLOBAL_EXTERN struct rb_root uidtree;
837GLOBAL_EXTERN struct rb_root gidtree;
838GLOBAL_EXTERN spinlock_t siduidlock;
839GLOBAL_EXTERN spinlock_t sidgidlock;
840
833void cifs_oplock_break(struct work_struct *work); 841void cifs_oplock_break(struct work_struct *work);
834void cifs_oplock_break_get(struct cifsFileInfo *cfile); 842void cifs_oplock_break_get(struct cifsFileInfo *cfile);
835void cifs_oplock_break_put(struct cifsFileInfo *cfile); 843void cifs_oplock_break_put(struct cifsFileInfo *cfile);
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b5c8cc5d7a7f..de3aa285de03 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -397,9 +397,9 @@
397#define GETU32(var) (*((__u32 *)var)) /* BB check for endian issues */ 397#define GETU32(var) (*((__u32 *)var)) /* BB check for endian issues */
398 398
399struct smb_hdr { 399struct smb_hdr {
400 __u32 smb_buf_length; /* big endian on wire *//* BB length is only two 400 __be32 smb_buf_length; /* BB length is only two (rarely three) bytes,
401 or three bytes - with one or two byte type preceding it that are 401 with one or two byte "type" preceding it that will be
402 zero - we could mask the type byte off just in case BB */ 402 zero - we could mask the type byte off */
403 __u8 Protocol[4]; 403 __u8 Protocol[4];
404 __u8 Command; 404 __u8 Command;
405 union { 405 union {
@@ -428,43 +428,28 @@ struct smb_hdr {
428 __u8 WordCount; 428 __u8 WordCount;
429} __attribute__((packed)); 429} __attribute__((packed));
430 430
431/* given a pointer to an smb_hdr retrieve a char pointer to the byte count */ 431/* given a pointer to an smb_hdr, retrieve a void pointer to the ByteCount */
432#define BCC(smb_var) ((unsigned char *)(smb_var) + sizeof(struct smb_hdr) + \ 432static inline void *
433 (2 * (smb_var)->WordCount)) 433BCC(struct smb_hdr *smb)
434{
435 return (void *)smb + sizeof(*smb) + 2 * smb->WordCount;
436}
434 437
435/* given a pointer to an smb_hdr retrieve the pointer to the byte area */ 438/* given a pointer to an smb_hdr retrieve the pointer to the byte area */
436#define pByteArea(smb_var) (BCC(smb_var) + 2) 439#define pByteArea(smb_var) (BCC(smb_var) + 2)
437 440
438/* get the converted ByteCount for a SMB packet and return it */
439static inline __u16
440get_bcc(struct smb_hdr *hdr)
441{
442 __u16 *bc_ptr = (__u16 *)BCC(hdr);
443
444 return get_unaligned(bc_ptr);
445}
446
447/* get the unconverted ByteCount for a SMB packet and return it */ 441/* get the unconverted ByteCount for a SMB packet and return it */
448static inline __u16 442static inline __u16
449get_bcc_le(struct smb_hdr *hdr) 443get_bcc(struct smb_hdr *hdr)
450{ 444{
451 __le16 *bc_ptr = (__le16 *)BCC(hdr); 445 __le16 *bc_ptr = (__le16 *)BCC(hdr);
452 446
453 return get_unaligned_le16(bc_ptr); 447 return get_unaligned_le16(bc_ptr);
454} 448}
455 449
456/* set the ByteCount for a SMB packet in host-byte order */
457static inline void
458put_bcc(__u16 count, struct smb_hdr *hdr)
459{
460 __u16 *bc_ptr = (__u16 *)BCC(hdr);
461
462 put_unaligned(count, bc_ptr);
463}
464
465/* set the ByteCount for a SMB packet in little-endian */ 450/* set the ByteCount for a SMB packet in little-endian */
466static inline void 451static inline void
467put_bcc_le(__u16 count, struct smb_hdr *hdr) 452put_bcc(__u16 count, struct smb_hdr *hdr)
468{ 453{
469 __le16 *bc_ptr = (__le16 *)BCC(hdr); 454 __le16 *bc_ptr = (__le16 *)BCC(hdr);
470 455
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 8096f27ad9a8..6e69e06a30b3 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -53,6 +53,9 @@ do { \
53 cFYI(1, "CIFS VFS: leaving %s (xid = %d) rc = %d", \ 53 cFYI(1, "CIFS VFS: leaving %s (xid = %d) rc = %d", \
54 __func__, curr_xid, (int)rc); \ 54 __func__, curr_xid, (int)rc); \
55} while (0) 55} while (0)
56extern int init_cifs_idmap(void);
57extern void exit_cifs_idmap(void);
58extern void cifs_destroy_idmaptrees(void);
56extern char *build_path_from_dentry(struct dentry *); 59extern char *build_path_from_dentry(struct dentry *);
57extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb, 60extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
58 struct cifsTconInfo *tcon); 61 struct cifsTconInfo *tcon);
@@ -90,7 +93,6 @@ extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
90extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); 93extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
91extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); 94extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
92extern unsigned int smbCalcSize(struct smb_hdr *ptr); 95extern unsigned int smbCalcSize(struct smb_hdr *ptr);
93extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
94extern int decode_negTokenInit(unsigned char *security_blob, int length, 96extern int decode_negTokenInit(unsigned char *security_blob, int length,
95 struct TCP_Server_Info *server); 97 struct TCP_Server_Info *server);
96extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); 98extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
@@ -143,8 +145,10 @@ extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
143extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64); 145extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64);
144extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *, 146extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
145 const char *, u32 *); 147 const char *, u32 *);
148extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *,
149 const char *);
146 150
147extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, 151extern int cifs_mount(struct super_block *, struct cifs_sb_info *,
148 const char *); 152 const char *);
149extern int cifs_umount(struct super_block *, struct cifs_sb_info *); 153extern int cifs_umount(struct super_block *, struct cifs_sb_info *);
150extern void cifs_dfs_release_automount_timer(void); 154extern void cifs_dfs_release_automount_timer(void);
@@ -304,12 +308,13 @@ extern int CIFSSMBUnixQuerySymLink(const int xid,
304 struct cifsTconInfo *tcon, 308 struct cifsTconInfo *tcon,
305 const unsigned char *searchName, char **syminfo, 309 const unsigned char *searchName, char **syminfo,
306 const struct nls_table *nls_codepage); 310 const struct nls_table *nls_codepage);
311#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
307extern int CIFSSMBQueryReparseLinkInfo(const int xid, 312extern int CIFSSMBQueryReparseLinkInfo(const int xid,
308 struct cifsTconInfo *tcon, 313 struct cifsTconInfo *tcon,
309 const unsigned char *searchName, 314 const unsigned char *searchName,
310 char *symlinkinfo, const int buflen, __u16 fid, 315 char *symlinkinfo, const int buflen, __u16 fid,
311 const struct nls_table *nls_codepage); 316 const struct nls_table *nls_codepage);
312 317#endif /* temporarily unused until cifs_symlink fixed */
313extern int CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon, 318extern int CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon,
314 const char *fileName, const int disposition, 319 const char *fileName, const int disposition,
315 const int access_flags, const int omode, 320 const int access_flags, const int omode,
@@ -348,8 +353,6 @@ extern int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
348 const unsigned char *searchName, __u64 *inode_number, 353 const unsigned char *searchName, __u64 *inode_number,
349 const struct nls_table *nls_codepage, 354 const struct nls_table *nls_codepage,
350 int remap_special_chars); 355 int remap_special_chars);
351extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
352 const struct nls_table *cp, int mapChars);
353 356
354extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, 357extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
355 const __u16 netfid, const __u64 len, 358 const __u16 netfid, const __u64 len,
@@ -383,9 +386,15 @@ extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
383extern int calc_seckey(struct cifsSesInfo *); 386extern int calc_seckey(struct cifsSesInfo *);
384 387
385#ifdef CONFIG_CIFS_WEAK_PW_HASH 388#ifdef CONFIG_CIFS_WEAK_PW_HASH
386extern void calc_lanman_hash(const char *password, const char *cryptkey, 389extern int calc_lanman_hash(const char *password, const char *cryptkey,
387 bool encrypt, char *lnm_session_key); 390 bool encrypt, char *lnm_session_key);
388#endif /* CIFS_WEAK_PW_HASH */ 391#endif /* CIFS_WEAK_PW_HASH */
392#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
393extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
394 const int notify_subdirs, const __u16 netfid,
395 __u32 filter, struct file *file, int multishot,
396 const struct nls_table *nls_codepage);
397#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
389extern int CIFSSMBCopy(int xid, 398extern int CIFSSMBCopy(int xid,
390 struct cifsTconInfo *source_tcon, 399 struct cifsTconInfo *source_tcon,
391 const char *fromName, 400 const char *fromName,
@@ -393,10 +402,6 @@ extern int CIFSSMBCopy(int xid,
393 const char *toName, const int flags, 402 const char *toName, const int flags,
394 const struct nls_table *nls_codepage, 403 const struct nls_table *nls_codepage,
395 int remap_special_chars); 404 int remap_special_chars);
396extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
397 const int notify_subdirs, const __u16 netfid,
398 __u32 filter, struct file *file, int multishot,
399 const struct nls_table *nls_codepage);
400extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon, 405extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon,
401 const unsigned char *searchName, 406 const unsigned char *searchName,
402 const unsigned char *ea_name, char *EAData, 407 const unsigned char *ea_name, char *EAData,
@@ -427,9 +432,6 @@ extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr,
427 struct cifs_sb_info *cifs_sb, int xid); 432 struct cifs_sb_info *cifs_sb, int xid);
428extern int mdfour(unsigned char *, unsigned char *, int); 433extern int mdfour(unsigned char *, unsigned char *, int);
429extern int E_md4hash(const unsigned char *passwd, unsigned char *p16); 434extern int E_md4hash(const unsigned char *passwd, unsigned char *p16);
430extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8, 435extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
431 unsigned char *p24);
432extern void E_P16(unsigned char *p14, unsigned char *p16);
433extern void E_P24(unsigned char *p21, const unsigned char *c8,
434 unsigned char *p24); 436 unsigned char *p24);
435#endif /* _CIFSPROTO_H */ 437#endif /* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index df959bae6728..83df937b814e 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -339,12 +339,13 @@ static int validate_t2(struct smb_t2_rsp *pSMB)
339 get_unaligned_le16(&pSMB->t2_rsp.DataOffset) > 1024) 339 get_unaligned_le16(&pSMB->t2_rsp.DataOffset) > 1024)
340 goto vt2_err; 340 goto vt2_err;
341 341
342 /* check that bcc is at least as big as parms + data */
343 /* check that bcc is less than negotiated smb buffer */
344 total_size = get_unaligned_le16(&pSMB->t2_rsp.ParameterCount); 342 total_size = get_unaligned_le16(&pSMB->t2_rsp.ParameterCount);
345 if (total_size >= 512) 343 if (total_size >= 512)
346 goto vt2_err; 344 goto vt2_err;
347 345
346 /* check that bcc is at least as big as parms + data, and that it is
347 * less than negotiated smb buffer
348 */
348 total_size += get_unaligned_le16(&pSMB->t2_rsp.DataCount); 349 total_size += get_unaligned_le16(&pSMB->t2_rsp.DataCount);
349 if (total_size > get_bcc(&pSMB->hdr) || 350 if (total_size > get_bcc(&pSMB->hdr) ||
350 total_size >= CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) 351 total_size >= CIFSMaxBufSize + MAX_CIFS_HDR_SIZE)
@@ -357,6 +358,13 @@ vt2_err:
357 return -EINVAL; 358 return -EINVAL;
358} 359}
359 360
361static inline void inc_rfc1001_len(void *pSMB, int count)
362{
363 struct smb_hdr *hdr = (struct smb_hdr *)pSMB;
364
365 be32_add_cpu(&hdr->smb_buf_length, count);
366}
367
360int 368int
361CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) 369CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
362{ 370{
@@ -409,7 +417,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
409 count += strlen(protocols[i].name) + 1; 417 count += strlen(protocols[i].name) + 1;
410 /* null at end of source and target buffers anyway */ 418 /* null at end of source and target buffers anyway */
411 } 419 }
412 pSMB->hdr.smb_buf_length += count; 420 inc_rfc1001_len(pSMB, count);
413 pSMB->ByteCount = cpu_to_le16(count); 421 pSMB->ByteCount = cpu_to_le16(count);
414 422
415 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, 423 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
@@ -541,10 +549,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
541 server->secType = RawNTLMSSP; 549 server->secType = RawNTLMSSP;
542 else if (secFlags & CIFSSEC_MAY_LANMAN) 550 else if (secFlags & CIFSSEC_MAY_LANMAN)
543 server->secType = LANMAN; 551 server->secType = LANMAN;
544/* #ifdef CONFIG_CIFS_EXPERIMENTAL
545 else if (secFlags & CIFSSEC_MAY_PLNTXT)
546 server->secType = ??
547#endif */
548 else { 552 else {
549 rc = -EOPNOTSUPP; 553 rc = -EOPNOTSUPP;
550 cERROR(1, "Invalid security type"); 554 cERROR(1, "Invalid security type");
@@ -578,7 +582,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
578 582
579 if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) && 583 if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) &&
580 (server->capabilities & CAP_EXTENDED_SECURITY)) { 584 (server->capabilities & CAP_EXTENDED_SECURITY)) {
581 count = pSMBr->ByteCount; 585 count = get_bcc(&pSMBr->hdr);
582 if (count < 16) { 586 if (count < 16) {
583 rc = -EIO; 587 rc = -EIO;
584 goto neg_err_exit; 588 goto neg_err_exit;
@@ -732,9 +736,9 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
732 smb->hdr.Tid = 0xffff; 736 smb->hdr.Tid = 0xffff;
733 smb->hdr.WordCount = 1; 737 smb->hdr.WordCount = 1;
734 put_unaligned_le16(1, &smb->EchoCount); 738 put_unaligned_le16(1, &smb->EchoCount);
735 put_bcc_le(1, &smb->hdr); 739 put_bcc(1, &smb->hdr);
736 smb->Data[0] = 'a'; 740 smb->Data[0] = 'a';
737 smb->hdr.smb_buf_length += 3; 741 inc_rfc1001_len(smb, 3);
738 742
739 rc = cifs_call_async(server, (struct smb_hdr *)smb, 743 rc = cifs_call_async(server, (struct smb_hdr *)smb,
740 cifs_echo_callback, server); 744 cifs_echo_callback, server);
@@ -852,7 +856,7 @@ PsxDelete:
852 pSMB->TotalParameterCount = pSMB->ParameterCount; 856 pSMB->TotalParameterCount = pSMB->ParameterCount;
853 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_UNLINK); 857 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_UNLINK);
854 pSMB->Reserved4 = 0; 858 pSMB->Reserved4 = 0;
855 pSMB->hdr.smb_buf_length += byte_count; 859 inc_rfc1001_len(pSMB, byte_count);
856 pSMB->ByteCount = cpu_to_le16(byte_count); 860 pSMB->ByteCount = cpu_to_le16(byte_count);
857 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 861 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
858 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 862 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -898,7 +902,7 @@ DelFileRetry:
898 pSMB->SearchAttributes = 902 pSMB->SearchAttributes =
899 cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM); 903 cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM);
900 pSMB->BufferFormat = 0x04; 904 pSMB->BufferFormat = 0x04;
901 pSMB->hdr.smb_buf_length += name_len + 1; 905 inc_rfc1001_len(pSMB, name_len + 1);
902 pSMB->ByteCount = cpu_to_le16(name_len + 1); 906 pSMB->ByteCount = cpu_to_le16(name_len + 1);
903 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 907 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
904 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 908 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -942,7 +946,7 @@ RmDirRetry:
942 } 946 }
943 947
944 pSMB->BufferFormat = 0x04; 948 pSMB->BufferFormat = 0x04;
945 pSMB->hdr.smb_buf_length += name_len + 1; 949 inc_rfc1001_len(pSMB, name_len + 1);
946 pSMB->ByteCount = cpu_to_le16(name_len + 1); 950 pSMB->ByteCount = cpu_to_le16(name_len + 1);
947 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 951 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
948 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 952 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -985,7 +989,7 @@ MkDirRetry:
985 } 989 }
986 990
987 pSMB->BufferFormat = 0x04; 991 pSMB->BufferFormat = 0x04;
988 pSMB->hdr.smb_buf_length += name_len + 1; 992 inc_rfc1001_len(pSMB, name_len + 1);
989 pSMB->ByteCount = cpu_to_le16(name_len + 1); 993 pSMB->ByteCount = cpu_to_le16(name_len + 1);
990 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 994 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
991 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 995 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -1063,7 +1067,7 @@ PsxCreat:
1063 pSMB->TotalParameterCount = pSMB->ParameterCount; 1067 pSMB->TotalParameterCount = pSMB->ParameterCount;
1064 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_OPEN); 1068 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_OPEN);
1065 pSMB->Reserved4 = 0; 1069 pSMB->Reserved4 = 0;
1066 pSMB->hdr.smb_buf_length += byte_count; 1070 inc_rfc1001_len(pSMB, byte_count);
1067 pSMB->ByteCount = cpu_to_le16(byte_count); 1071 pSMB->ByteCount = cpu_to_le16(byte_count);
1068 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 1072 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
1069 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 1073 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -1075,7 +1079,7 @@ PsxCreat:
1075 cFYI(1, "copying inode info"); 1079 cFYI(1, "copying inode info");
1076 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 1080 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
1077 1081
1078 if (rc || (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP))) { 1082 if (rc || get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)) {
1079 rc = -EIO; /* bad smb */ 1083 rc = -EIO; /* bad smb */
1080 goto psx_create_err; 1084 goto psx_create_err;
1081 } 1085 }
@@ -1096,7 +1100,7 @@ PsxCreat:
1096 pRetData->Type = cpu_to_le32(-1); /* unknown */ 1100 pRetData->Type = cpu_to_le32(-1); /* unknown */
1097 cFYI(DBG2, "unknown type"); 1101 cFYI(DBG2, "unknown type");
1098 } else { 1102 } else {
1099 if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP) 1103 if (get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)
1100 + sizeof(FILE_UNIX_BASIC_INFO)) { 1104 + sizeof(FILE_UNIX_BASIC_INFO)) {
1101 cERROR(1, "Open response data too small"); 1105 cERROR(1, "Open response data too small");
1102 pRetData->Type = cpu_to_le32(-1); 1106 pRetData->Type = cpu_to_le32(-1);
@@ -1228,7 +1232,7 @@ OldOpenRetry:
1228 pSMB->Sattr = cpu_to_le16(ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY); 1232 pSMB->Sattr = cpu_to_le16(ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY);
1229 pSMB->OpenFunction = cpu_to_le16(convert_disposition(openDisposition)); 1233 pSMB->OpenFunction = cpu_to_le16(convert_disposition(openDisposition));
1230 count += name_len; 1234 count += name_len;
1231 pSMB->hdr.smb_buf_length += count; 1235 inc_rfc1001_len(pSMB, count);
1232 1236
1233 pSMB->ByteCount = cpu_to_le16(count); 1237 pSMB->ByteCount = cpu_to_le16(count);
1234 /* long_op set to 1 to allow for oplock break timeouts */ 1238 /* long_op set to 1 to allow for oplock break timeouts */
@@ -1341,7 +1345,7 @@ openRetry:
1341 SECURITY_CONTEXT_TRACKING | SECURITY_EFFECTIVE_ONLY; 1345 SECURITY_CONTEXT_TRACKING | SECURITY_EFFECTIVE_ONLY;
1342 1346
1343 count += name_len; 1347 count += name_len;
1344 pSMB->hdr.smb_buf_length += count; 1348 inc_rfc1001_len(pSMB, count);
1345 1349
1346 pSMB->ByteCount = cpu_to_le16(count); 1350 pSMB->ByteCount = cpu_to_le16(count);
1347 /* long_op set to 1 to allow for oplock break timeouts */ 1351 /* long_op set to 1 to allow for oplock break timeouts */
@@ -1426,7 +1430,7 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1426 } 1430 }
1427 1431
1428 iov[0].iov_base = (char *)pSMB; 1432 iov[0].iov_base = (char *)pSMB;
1429 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; 1433 iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
1430 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */, 1434 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
1431 &resp_buf_type, CIFS_LOG_ERROR); 1435 &resp_buf_type, CIFS_LOG_ERROR);
1432 cifs_stats_inc(&tcon->num_reads); 1436 cifs_stats_inc(&tcon->num_reads);
@@ -1560,7 +1564,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1560 1564
1561 pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF); 1565 pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF);
1562 pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16); 1566 pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16);
1563 pSMB->hdr.smb_buf_length += byte_count; 1567 inc_rfc1001_len(pSMB, byte_count);
1564 1568
1565 if (wct == 14) 1569 if (wct == 14)
1566 pSMB->ByteCount = cpu_to_le16(byte_count); 1570 pSMB->ByteCount = cpu_to_le16(byte_count);
@@ -1644,11 +1648,12 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1644 1648
1645 pSMB->DataLengthLow = cpu_to_le16(count & 0xFFFF); 1649 pSMB->DataLengthLow = cpu_to_le16(count & 0xFFFF);
1646 pSMB->DataLengthHigh = cpu_to_le16(count >> 16); 1650 pSMB->DataLengthHigh = cpu_to_le16(count >> 16);
1647 smb_hdr_len = pSMB->hdr.smb_buf_length + 1; /* hdr + 1 byte pad */ 1651 /* header + 1 byte pad */
1652 smb_hdr_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 1;
1648 if (wct == 14) 1653 if (wct == 14)
1649 pSMB->hdr.smb_buf_length += count+1; 1654 inc_rfc1001_len(pSMB, count + 1);
1650 else /* wct == 12 */ 1655 else /* wct == 12 */
1651 pSMB->hdr.smb_buf_length += count+5; /* smb data starts later */ 1656 inc_rfc1001_len(pSMB, count + 5); /* smb data starts later */
1652 if (wct == 14) 1657 if (wct == 14)
1653 pSMB->ByteCount = cpu_to_le16(count + 1); 1658 pSMB->ByteCount = cpu_to_le16(count + 1);
1654 else /* wct == 12 */ /* bigger pad, smaller smb hdr, keep offset ok */ { 1659 else /* wct == 12 */ /* bigger pad, smaller smb hdr, keep offset ok */ {
@@ -1748,7 +1753,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1748 /* oplock break */ 1753 /* oplock break */
1749 count = 0; 1754 count = 0;
1750 } 1755 }
1751 pSMB->hdr.smb_buf_length += count; 1756 inc_rfc1001_len(pSMB, count);
1752 pSMB->ByteCount = cpu_to_le16(count); 1757 pSMB->ByteCount = cpu_to_le16(count);
1753 1758
1754 if (waitFlag) { 1759 if (waitFlag) {
@@ -1839,14 +1844,14 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1839 pSMB->Fid = smb_file_id; 1844 pSMB->Fid = smb_file_id;
1840 pSMB->InformationLevel = cpu_to_le16(SMB_SET_POSIX_LOCK); 1845 pSMB->InformationLevel = cpu_to_le16(SMB_SET_POSIX_LOCK);
1841 pSMB->Reserved4 = 0; 1846 pSMB->Reserved4 = 0;
1842 pSMB->hdr.smb_buf_length += byte_count; 1847 inc_rfc1001_len(pSMB, byte_count);
1843 pSMB->ByteCount = cpu_to_le16(byte_count); 1848 pSMB->ByteCount = cpu_to_le16(byte_count);
1844 if (waitFlag) { 1849 if (waitFlag) {
1845 rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, 1850 rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB,
1846 (struct smb_hdr *) pSMBr, &bytes_returned); 1851 (struct smb_hdr *) pSMBr, &bytes_returned);
1847 } else { 1852 } else {
1848 iov[0].iov_base = (char *)pSMB; 1853 iov[0].iov_base = (char *)pSMB;
1849 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; 1854 iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
1850 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */, 1855 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
1851 &resp_buf_type, timeout); 1856 &resp_buf_type, timeout);
1852 pSMB = NULL; /* request buf already freed by SendReceive2. Do 1857 pSMB = NULL; /* request buf already freed by SendReceive2. Do
@@ -1862,7 +1867,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1862 __u16 data_count; 1867 __u16 data_count;
1863 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 1868 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
1864 1869
1865 if (rc || (pSMBr->ByteCount < sizeof(struct cifs_posix_lock))) { 1870 if (rc || get_bcc(&pSMBr->hdr) < sizeof(*parm_data)) {
1866 rc = -EIO; /* bad smb */ 1871 rc = -EIO; /* bad smb */
1867 goto plk_err_exit; 1872 goto plk_err_exit;
1868 } 1873 }
@@ -2012,7 +2017,7 @@ renameRetry:
2012 } 2017 }
2013 2018
2014 count = 1 /* 1st signature byte */ + name_len + name_len2; 2019 count = 1 /* 1st signature byte */ + name_len + name_len2;
2015 pSMB->hdr.smb_buf_length += count; 2020 inc_rfc1001_len(pSMB, count);
2016 pSMB->ByteCount = cpu_to_le16(count); 2021 pSMB->ByteCount = cpu_to_le16(count);
2017 2022
2018 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2023 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2092,7 +2097,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
2092 pSMB->InformationLevel = 2097 pSMB->InformationLevel =
2093 cpu_to_le16(SMB_SET_FILE_RENAME_INFORMATION); 2098 cpu_to_le16(SMB_SET_FILE_RENAME_INFORMATION);
2094 pSMB->Reserved4 = 0; 2099 pSMB->Reserved4 = 0;
2095 pSMB->hdr.smb_buf_length += byte_count; 2100 inc_rfc1001_len(pSMB, byte_count);
2096 pSMB->ByteCount = cpu_to_le16(byte_count); 2101 pSMB->ByteCount = cpu_to_le16(byte_count);
2097 rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB, 2102 rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB,
2098 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2103 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2159,7 +2164,7 @@ copyRetry:
2159 } 2164 }
2160 2165
2161 count = 1 /* 1st signature byte */ + name_len + name_len2; 2166 count = 1 /* 1st signature byte */ + name_len + name_len2;
2162 pSMB->hdr.smb_buf_length += count; 2167 inc_rfc1001_len(pSMB, count);
2163 pSMB->ByteCount = cpu_to_le16(count); 2168 pSMB->ByteCount = cpu_to_le16(count);
2164 2169
2165 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2170 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2249,7 +2254,7 @@ createSymLinkRetry:
2249 pSMB->DataOffset = cpu_to_le16(offset); 2254 pSMB->DataOffset = cpu_to_le16(offset);
2250 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_LINK); 2255 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_LINK);
2251 pSMB->Reserved4 = 0; 2256 pSMB->Reserved4 = 0;
2252 pSMB->hdr.smb_buf_length += byte_count; 2257 inc_rfc1001_len(pSMB, byte_count);
2253 pSMB->ByteCount = cpu_to_le16(byte_count); 2258 pSMB->ByteCount = cpu_to_le16(byte_count);
2254 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2259 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2255 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2260 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2335,7 +2340,7 @@ createHardLinkRetry:
2335 pSMB->DataOffset = cpu_to_le16(offset); 2340 pSMB->DataOffset = cpu_to_le16(offset);
2336 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_HLINK); 2341 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_HLINK);
2337 pSMB->Reserved4 = 0; 2342 pSMB->Reserved4 = 0;
2338 pSMB->hdr.smb_buf_length += byte_count; 2343 inc_rfc1001_len(pSMB, byte_count);
2339 pSMB->ByteCount = cpu_to_le16(byte_count); 2344 pSMB->ByteCount = cpu_to_le16(byte_count);
2340 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2345 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2341 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2346 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2406,7 +2411,7 @@ winCreateHardLinkRetry:
2406 } 2411 }
2407 2412
2408 count = 1 /* string type byte */ + name_len + name_len2; 2413 count = 1 /* string type byte */ + name_len + name_len2;
2409 pSMB->hdr.smb_buf_length += count; 2414 inc_rfc1001_len(pSMB, count);
2410 pSMB->ByteCount = cpu_to_le16(count); 2415 pSMB->ByteCount = cpu_to_le16(count);
2411 2416
2412 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2417 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2477,7 +2482,7 @@ querySymLinkRetry:
2477 pSMB->ParameterCount = pSMB->TotalParameterCount; 2482 pSMB->ParameterCount = pSMB->TotalParameterCount;
2478 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_LINK); 2483 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_LINK);
2479 pSMB->Reserved4 = 0; 2484 pSMB->Reserved4 = 0;
2480 pSMB->hdr.smb_buf_length += byte_count; 2485 inc_rfc1001_len(pSMB, byte_count);
2481 pSMB->ByteCount = cpu_to_le16(byte_count); 2486 pSMB->ByteCount = cpu_to_le16(byte_count);
2482 2487
2483 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2488 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2489,7 +2494,7 @@ querySymLinkRetry:
2489 2494
2490 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 2495 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
2491 /* BB also check enough total bytes returned */ 2496 /* BB also check enough total bytes returned */
2492 if (rc || (pSMBr->ByteCount < 2)) 2497 if (rc || get_bcc(&pSMBr->hdr) < 2)
2493 rc = -EIO; 2498 rc = -EIO;
2494 else { 2499 else {
2495 bool is_unicode; 2500 bool is_unicode;
@@ -2516,7 +2521,17 @@ querySymLinkRetry:
2516 return rc; 2521 return rc;
2517} 2522}
2518 2523
2519#ifdef CONFIG_CIFS_EXPERIMENTAL 2524#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
2525/*
2526 * Recent Windows versions now create symlinks more frequently
2527 * and they use the "reparse point" mechanism below. We can of course
2528 * do symlinks nicely to Samba and other servers which support the
2529 * CIFS Unix Extensions and we can also do SFU symlinks and "client only"
2530 * "MF" symlinks optionally, but for recent Windows we really need to
2531 * reenable the code below and fix the cifs_symlink callers to handle this.
2532 * In the interim this code has been moved to its own config option so
2533 * it is not compiled in by default until callers fixed up and more tested.
2534 */
2520int 2535int
2521CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, 2536CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2522 const unsigned char *searchName, 2537 const unsigned char *searchName,
@@ -2561,14 +2576,14 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2561 } else { /* decode response */ 2576 } else { /* decode response */
2562 __u32 data_offset = le32_to_cpu(pSMBr->DataOffset); 2577 __u32 data_offset = le32_to_cpu(pSMBr->DataOffset);
2563 __u32 data_count = le32_to_cpu(pSMBr->DataCount); 2578 __u32 data_count = le32_to_cpu(pSMBr->DataCount);
2564 if ((pSMBr->ByteCount < 2) || (data_offset > 512)) { 2579 if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) {
2565 /* BB also check enough total bytes returned */ 2580 /* BB also check enough total bytes returned */
2566 rc = -EIO; /* bad smb */ 2581 rc = -EIO; /* bad smb */
2567 goto qreparse_out; 2582 goto qreparse_out;
2568 } 2583 }
2569 if (data_count && (data_count < 2048)) { 2584 if (data_count && (data_count < 2048)) {
2570 char *end_of_smb = 2 /* sizeof byte count */ + 2585 char *end_of_smb = 2 /* sizeof byte count */ +
2571 pSMBr->ByteCount + (char *)&pSMBr->ByteCount; 2586 get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount;
2572 2587
2573 struct reparse_data *reparse_buf = 2588 struct reparse_data *reparse_buf =
2574 (struct reparse_data *) 2589 (struct reparse_data *)
@@ -2618,7 +2633,7 @@ qreparse_out:
2618 2633
2619 return rc; 2634 return rc;
2620} 2635}
2621#endif /* CIFS_EXPERIMENTAL */ 2636#endif /* CIFS_SYMLINK_EXPERIMENTAL */ /* BB temporarily unused */
2622 2637
2623#ifdef CONFIG_CIFS_POSIX 2638#ifdef CONFIG_CIFS_POSIX
2624 2639
@@ -2814,7 +2829,7 @@ queryAclRetry:
2814 pSMB->ParameterCount = pSMB->TotalParameterCount; 2829 pSMB->ParameterCount = pSMB->TotalParameterCount;
2815 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_ACL); 2830 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_ACL);
2816 pSMB->Reserved4 = 0; 2831 pSMB->Reserved4 = 0;
2817 pSMB->hdr.smb_buf_length += byte_count; 2832 inc_rfc1001_len(pSMB, byte_count);
2818 pSMB->ByteCount = cpu_to_le16(byte_count); 2833 pSMB->ByteCount = cpu_to_le16(byte_count);
2819 2834
2820 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2835 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2826,8 +2841,8 @@ queryAclRetry:
2826 /* decode response */ 2841 /* decode response */
2827 2842
2828 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 2843 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
2829 if (rc || (pSMBr->ByteCount < 2))
2830 /* BB also check enough total bytes returned */ 2844 /* BB also check enough total bytes returned */
2845 if (rc || get_bcc(&pSMBr->hdr) < 2)
2831 rc = -EIO; /* bad smb */ 2846 rc = -EIO; /* bad smb */
2832 else { 2847 else {
2833 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 2848 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -2908,7 +2923,7 @@ setAclRetry:
2908 pSMB->ParameterCount = cpu_to_le16(params); 2923 pSMB->ParameterCount = cpu_to_le16(params);
2909 pSMB->TotalParameterCount = pSMB->ParameterCount; 2924 pSMB->TotalParameterCount = pSMB->ParameterCount;
2910 pSMB->Reserved4 = 0; 2925 pSMB->Reserved4 = 0;
2911 pSMB->hdr.smb_buf_length += byte_count; 2926 inc_rfc1001_len(pSMB, byte_count);
2912 pSMB->ByteCount = cpu_to_le16(byte_count); 2927 pSMB->ByteCount = cpu_to_le16(byte_count);
2913 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2928 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2914 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2929 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2966,7 +2981,7 @@ GetExtAttrRetry:
2966 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_ATTR_FLAGS); 2981 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_ATTR_FLAGS);
2967 pSMB->Pad = 0; 2982 pSMB->Pad = 0;
2968 pSMB->Fid = netfid; 2983 pSMB->Fid = netfid;
2969 pSMB->hdr.smb_buf_length += byte_count; 2984 inc_rfc1001_len(pSMB, byte_count);
2970 pSMB->t2.ByteCount = cpu_to_le16(byte_count); 2985 pSMB->t2.ByteCount = cpu_to_le16(byte_count);
2971 2986
2972 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2987 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2976,8 +2991,8 @@ GetExtAttrRetry:
2976 } else { 2991 } else {
2977 /* decode response */ 2992 /* decode response */
2978 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 2993 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
2979 if (rc || (pSMBr->ByteCount < 2))
2980 /* BB also check enough total bytes returned */ 2994 /* BB also check enough total bytes returned */
2995 if (rc || get_bcc(&pSMBr->hdr) < 2)
2981 /* If rc should we check for EOPNOSUPP and 2996 /* If rc should we check for EOPNOSUPP and
2982 disable the srvino flag? or in caller? */ 2997 disable the srvino flag? or in caller? */
2983 rc = -EIO; /* bad smb */ 2998 rc = -EIO; /* bad smb */
@@ -3052,6 +3067,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
3052 char *end_of_smb; 3067 char *end_of_smb;
3053 __u32 data_count, data_offset, parm_count, parm_offset; 3068 __u32 data_count, data_offset, parm_count, parm_offset;
3054 struct smb_com_ntransact_rsp *pSMBr; 3069 struct smb_com_ntransact_rsp *pSMBr;
3070 u16 bcc;
3055 3071
3056 *pdatalen = 0; 3072 *pdatalen = 0;
3057 *pparmlen = 0; 3073 *pparmlen = 0;
@@ -3061,8 +3077,8 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
3061 3077
3062 pSMBr = (struct smb_com_ntransact_rsp *)buf; 3078 pSMBr = (struct smb_com_ntransact_rsp *)buf;
3063 3079
3064 /* ByteCount was converted from little endian in SendReceive */ 3080 bcc = get_bcc(&pSMBr->hdr);
3065 end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount + 3081 end_of_smb = 2 /* sizeof byte count */ + bcc +
3066 (char *)&pSMBr->ByteCount; 3082 (char *)&pSMBr->ByteCount;
3067 3083
3068 data_offset = le32_to_cpu(pSMBr->DataOffset); 3084 data_offset = le32_to_cpu(pSMBr->DataOffset);
@@ -3088,7 +3104,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
3088 *ppdata, data_count, (data_count + *ppdata), 3104 *ppdata, data_count, (data_count + *ppdata),
3089 end_of_smb, pSMBr); 3105 end_of_smb, pSMBr);
3090 return -EINVAL; 3106 return -EINVAL;
3091 } else if (parm_count + data_count > pSMBr->ByteCount) { 3107 } else if (parm_count + data_count > bcc) {
3092 cFYI(1, "parm count and data count larger than SMB"); 3108 cFYI(1, "parm count and data count larger than SMB");
3093 return -EINVAL; 3109 return -EINVAL;
3094 } 3110 }
@@ -3124,9 +3140,9 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3124 pSMB->AclFlags = cpu_to_le32(CIFS_ACL_OWNER | CIFS_ACL_GROUP | 3140 pSMB->AclFlags = cpu_to_le32(CIFS_ACL_OWNER | CIFS_ACL_GROUP |
3125 CIFS_ACL_DACL); 3141 CIFS_ACL_DACL);
3126 pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */ 3142 pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */
3127 pSMB->hdr.smb_buf_length += 11; 3143 inc_rfc1001_len(pSMB, 11);
3128 iov[0].iov_base = (char *)pSMB; 3144 iov[0].iov_base = (char *)pSMB;
3129 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; 3145 iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
3130 3146
3131 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type, 3147 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type,
3132 0); 3148 0);
@@ -3235,10 +3251,9 @@ setCifsAclRetry:
3235 memcpy((char *) &pSMBr->hdr.Protocol + data_offset, 3251 memcpy((char *) &pSMBr->hdr.Protocol + data_offset,
3236 (char *) pntsd, 3252 (char *) pntsd,
3237 acllen); 3253 acllen);
3238 pSMB->hdr.smb_buf_length += (byte_count + data_count); 3254 inc_rfc1001_len(pSMB, byte_count + data_count);
3239
3240 } else 3255 } else
3241 pSMB->hdr.smb_buf_length += byte_count; 3256 inc_rfc1001_len(pSMB, byte_count);
3242 3257
3243 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3258 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3244 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3259 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3289,7 +3304,7 @@ QInfRetry:
3289 } 3304 }
3290 pSMB->BufferFormat = 0x04; 3305 pSMB->BufferFormat = 0x04;
3291 name_len++; /* account for buffer type byte */ 3306 name_len++; /* account for buffer type byte */
3292 pSMB->hdr.smb_buf_length += (__u16) name_len; 3307 inc_rfc1001_len(pSMB, (__u16)name_len);
3293 pSMB->ByteCount = cpu_to_le16(name_len); 3308 pSMB->ByteCount = cpu_to_le16(name_len);
3294 3309
3295 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3310 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3364,7 +3379,7 @@ QFileInfoRetry:
3364 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO); 3379 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
3365 pSMB->Pad = 0; 3380 pSMB->Pad = 0;
3366 pSMB->Fid = netfid; 3381 pSMB->Fid = netfid;
3367 pSMB->hdr.smb_buf_length += byte_count; 3382 inc_rfc1001_len(pSMB, byte_count);
3368 3383
3369 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3384 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3370 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3385 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3375,7 +3390,7 @@ QFileInfoRetry:
3375 3390
3376 if (rc) /* BB add auto retry on EOPNOTSUPP? */ 3391 if (rc) /* BB add auto retry on EOPNOTSUPP? */
3377 rc = -EIO; 3392 rc = -EIO;
3378 else if (pSMBr->ByteCount < 40) 3393 else if (get_bcc(&pSMBr->hdr) < 40)
3379 rc = -EIO; /* bad smb */ 3394 rc = -EIO; /* bad smb */
3380 else if (pFindData) { 3395 else if (pFindData) {
3381 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 3396 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -3451,7 +3466,7 @@ QPathInfoRetry:
3451 else 3466 else
3452 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO); 3467 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
3453 pSMB->Reserved4 = 0; 3468 pSMB->Reserved4 = 0;
3454 pSMB->hdr.smb_buf_length += byte_count; 3469 inc_rfc1001_len(pSMB, byte_count);
3455 pSMB->ByteCount = cpu_to_le16(byte_count); 3470 pSMB->ByteCount = cpu_to_le16(byte_count);
3456 3471
3457 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3472 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3463,9 +3478,9 @@ QPathInfoRetry:
3463 3478
3464 if (rc) /* BB add auto retry on EOPNOTSUPP? */ 3479 if (rc) /* BB add auto retry on EOPNOTSUPP? */
3465 rc = -EIO; 3480 rc = -EIO;
3466 else if (!legacy && (pSMBr->ByteCount < 40)) 3481 else if (!legacy && get_bcc(&pSMBr->hdr) < 40)
3467 rc = -EIO; /* bad smb */ 3482 rc = -EIO; /* bad smb */
3468 else if (legacy && (pSMBr->ByteCount < 24)) 3483 else if (legacy && get_bcc(&pSMBr->hdr) < 24)
3469 rc = -EIO; /* 24 or 26 expected but we do not read 3484 rc = -EIO; /* 24 or 26 expected but we do not read
3470 last field */ 3485 last field */
3471 else if (pFindData) { 3486 else if (pFindData) {
@@ -3532,7 +3547,7 @@ UnixQFileInfoRetry:
3532 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC); 3547 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
3533 pSMB->Pad = 0; 3548 pSMB->Pad = 0;
3534 pSMB->Fid = netfid; 3549 pSMB->Fid = netfid;
3535 pSMB->hdr.smb_buf_length += byte_count; 3550 inc_rfc1001_len(pSMB, byte_count);
3536 3551
3537 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3552 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3538 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3553 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3541,7 +3556,7 @@ UnixQFileInfoRetry:
3541 } else { /* decode response */ 3556 } else { /* decode response */
3542 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3557 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3543 3558
3544 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) { 3559 if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
3545 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n" 3560 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
3546 "Unix Extensions can be disabled on mount " 3561 "Unix Extensions can be disabled on mount "
3547 "by specifying the nosfu mount option."); 3562 "by specifying the nosfu mount option.");
@@ -3617,7 +3632,7 @@ UnixQPathInfoRetry:
3617 pSMB->ParameterCount = pSMB->TotalParameterCount; 3632 pSMB->ParameterCount = pSMB->TotalParameterCount;
3618 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC); 3633 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
3619 pSMB->Reserved4 = 0; 3634 pSMB->Reserved4 = 0;
3620 pSMB->hdr.smb_buf_length += byte_count; 3635 inc_rfc1001_len(pSMB, byte_count);
3621 pSMB->ByteCount = cpu_to_le16(byte_count); 3636 pSMB->ByteCount = cpu_to_le16(byte_count);
3622 3637
3623 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3638 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3627,7 +3642,7 @@ UnixQPathInfoRetry:
3627 } else { /* decode response */ 3642 } else { /* decode response */
3628 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3643 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3629 3644
3630 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) { 3645 if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
3631 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n" 3646 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
3632 "Unix Extensions can be disabled on mount " 3647 "Unix Extensions can be disabled on mount "
3633 "by specifying the nosfu mount option."); 3648 "by specifying the nosfu mount option.");
@@ -3731,7 +3746,7 @@ findFirstRetry:
3731 3746
3732 /* BB what should we set StorageType to? Does it matter? BB */ 3747 /* BB what should we set StorageType to? Does it matter? BB */
3733 pSMB->SearchStorageType = 0; 3748 pSMB->SearchStorageType = 0;
3734 pSMB->hdr.smb_buf_length += byte_count; 3749 inc_rfc1001_len(pSMB, byte_count);
3735 pSMB->ByteCount = cpu_to_le16(byte_count); 3750 pSMB->ByteCount = cpu_to_le16(byte_count);
3736 3751
3737 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3752 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3860,7 +3875,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
3860 byte_count = params + 1 /* pad */ ; 3875 byte_count = params + 1 /* pad */ ;
3861 pSMB->TotalParameterCount = cpu_to_le16(params); 3876 pSMB->TotalParameterCount = cpu_to_le16(params);
3862 pSMB->ParameterCount = pSMB->TotalParameterCount; 3877 pSMB->ParameterCount = pSMB->TotalParameterCount;
3863 pSMB->hdr.smb_buf_length += byte_count; 3878 inc_rfc1001_len(pSMB, byte_count);
3864 pSMB->ByteCount = cpu_to_le16(byte_count); 3879 pSMB->ByteCount = cpu_to_le16(byte_count);
3865 3880
3866 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3881 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4022,7 +4037,7 @@ GetInodeNumberRetry:
4022 pSMB->ParameterCount = pSMB->TotalParameterCount; 4037 pSMB->ParameterCount = pSMB->TotalParameterCount;
4023 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_INTERNAL_INFO); 4038 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_INTERNAL_INFO);
4024 pSMB->Reserved4 = 0; 4039 pSMB->Reserved4 = 0;
4025 pSMB->hdr.smb_buf_length += byte_count; 4040 inc_rfc1001_len(pSMB, byte_count);
4026 pSMB->ByteCount = cpu_to_le16(byte_count); 4041 pSMB->ByteCount = cpu_to_le16(byte_count);
4027 4042
4028 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4043 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4032,8 +4047,8 @@ GetInodeNumberRetry:
4032 } else { 4047 } else {
4033 /* decode response */ 4048 /* decode response */
4034 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4049 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4035 if (rc || (pSMBr->ByteCount < 2))
4036 /* BB also check enough total bytes returned */ 4050 /* BB also check enough total bytes returned */
4051 if (rc || get_bcc(&pSMBr->hdr) < 2)
4037 /* If rc should we check for EOPNOSUPP and 4052 /* If rc should we check for EOPNOSUPP and
4038 disable the srvino flag? or in caller? */ 4053 disable the srvino flag? or in caller? */
4039 rc = -EIO; /* bad smb */ 4054 rc = -EIO; /* bad smb */
@@ -4246,7 +4261,7 @@ getDFSRetry:
4246 pSMB->ParameterCount = cpu_to_le16(params); 4261 pSMB->ParameterCount = cpu_to_le16(params);
4247 pSMB->TotalParameterCount = pSMB->ParameterCount; 4262 pSMB->TotalParameterCount = pSMB->ParameterCount;
4248 pSMB->MaxReferralLevel = cpu_to_le16(3); 4263 pSMB->MaxReferralLevel = cpu_to_le16(3);
4249 pSMB->hdr.smb_buf_length += byte_count; 4264 inc_rfc1001_len(pSMB, byte_count);
4250 pSMB->ByteCount = cpu_to_le16(byte_count); 4265 pSMB->ByteCount = cpu_to_le16(byte_count);
4251 4266
4252 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, 4267 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
@@ -4258,13 +4273,13 @@ getDFSRetry:
4258 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4273 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4259 4274
4260 /* BB Also check if enough total bytes returned? */ 4275 /* BB Also check if enough total bytes returned? */
4261 if (rc || (pSMBr->ByteCount < 17)) { 4276 if (rc || get_bcc(&pSMBr->hdr) < 17) {
4262 rc = -EIO; /* bad smb */ 4277 rc = -EIO; /* bad smb */
4263 goto GetDFSRefExit; 4278 goto GetDFSRefExit;
4264 } 4279 }
4265 4280
4266 cFYI(1, "Decoding GetDFSRefer response BCC: %d Offset %d", 4281 cFYI(1, "Decoding GetDFSRefer response BCC: %d Offset %d",
4267 pSMBr->ByteCount, 4282 get_bcc(&pSMBr->hdr),
4268 le16_to_cpu(pSMBr->t2.DataOffset)); 4283 le16_to_cpu(pSMBr->t2.DataOffset));
4269 4284
4270 /* parse returned result into more usable form */ 4285 /* parse returned result into more usable form */
@@ -4320,7 +4335,7 @@ oldQFSInfoRetry:
4320 pSMB->Reserved3 = 0; 4335 pSMB->Reserved3 = 0;
4321 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4336 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4322 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_ALLOCATION); 4337 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_ALLOCATION);
4323 pSMB->hdr.smb_buf_length += byte_count; 4338 inc_rfc1001_len(pSMB, byte_count);
4324 pSMB->ByteCount = cpu_to_le16(byte_count); 4339 pSMB->ByteCount = cpu_to_le16(byte_count);
4325 4340
4326 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4341 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4330,12 +4345,12 @@ oldQFSInfoRetry:
4330 } else { /* decode response */ 4345 } else { /* decode response */
4331 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4346 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4332 4347
4333 if (rc || (pSMBr->ByteCount < 18)) 4348 if (rc || get_bcc(&pSMBr->hdr) < 18)
4334 rc = -EIO; /* bad smb */ 4349 rc = -EIO; /* bad smb */
4335 else { 4350 else {
4336 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4351 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
4337 cFYI(1, "qfsinf resp BCC: %d Offset %d", 4352 cFYI(1, "qfsinf resp BCC: %d Offset %d",
4338 pSMBr->ByteCount, data_offset); 4353 get_bcc(&pSMBr->hdr), data_offset);
4339 4354
4340 response_data = (FILE_SYSTEM_ALLOC_INFO *) 4355 response_data = (FILE_SYSTEM_ALLOC_INFO *)
4341 (((char *) &pSMBr->hdr.Protocol) + data_offset); 4356 (((char *) &pSMBr->hdr.Protocol) + data_offset);
@@ -4399,7 +4414,7 @@ QFSInfoRetry:
4399 pSMB->Reserved3 = 0; 4414 pSMB->Reserved3 = 0;
4400 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4415 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4401 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_SIZE_INFO); 4416 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_SIZE_INFO);
4402 pSMB->hdr.smb_buf_length += byte_count; 4417 inc_rfc1001_len(pSMB, byte_count);
4403 pSMB->ByteCount = cpu_to_le16(byte_count); 4418 pSMB->ByteCount = cpu_to_le16(byte_count);
4404 4419
4405 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4420 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4409,7 +4424,7 @@ QFSInfoRetry:
4409 } else { /* decode response */ 4424 } else { /* decode response */
4410 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4425 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4411 4426
4412 if (rc || (pSMBr->ByteCount < 24)) 4427 if (rc || get_bcc(&pSMBr->hdr) < 24)
4413 rc = -EIO; /* bad smb */ 4428 rc = -EIO; /* bad smb */
4414 else { 4429 else {
4415 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4430 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4479,7 +4494,7 @@ QFSAttributeRetry:
4479 pSMB->Reserved3 = 0; 4494 pSMB->Reserved3 = 0;
4480 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4495 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4481 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_ATTRIBUTE_INFO); 4496 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_ATTRIBUTE_INFO);
4482 pSMB->hdr.smb_buf_length += byte_count; 4497 inc_rfc1001_len(pSMB, byte_count);
4483 pSMB->ByteCount = cpu_to_le16(byte_count); 4498 pSMB->ByteCount = cpu_to_le16(byte_count);
4484 4499
4485 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4500 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4489,7 +4504,7 @@ QFSAttributeRetry:
4489 } else { /* decode response */ 4504 } else { /* decode response */
4490 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4505 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4491 4506
4492 if (rc || (pSMBr->ByteCount < 13)) { 4507 if (rc || get_bcc(&pSMBr->hdr) < 13) {
4493 /* BB also check if enough bytes returned */ 4508 /* BB also check if enough bytes returned */
4494 rc = -EIO; /* bad smb */ 4509 rc = -EIO; /* bad smb */
4495 } else { 4510 } else {
@@ -4550,7 +4565,7 @@ QFSDeviceRetry:
4550 pSMB->Reserved3 = 0; 4565 pSMB->Reserved3 = 0;
4551 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4566 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4552 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_DEVICE_INFO); 4567 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_DEVICE_INFO);
4553 pSMB->hdr.smb_buf_length += byte_count; 4568 inc_rfc1001_len(pSMB, byte_count);
4554 pSMB->ByteCount = cpu_to_le16(byte_count); 4569 pSMB->ByteCount = cpu_to_le16(byte_count);
4555 4570
4556 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4571 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4560,7 +4575,8 @@ QFSDeviceRetry:
4560 } else { /* decode response */ 4575 } else { /* decode response */
4561 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4576 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4562 4577
4563 if (rc || (pSMBr->ByteCount < sizeof(FILE_SYSTEM_DEVICE_INFO))) 4578 if (rc || get_bcc(&pSMBr->hdr) <
4579 sizeof(FILE_SYSTEM_DEVICE_INFO))
4564 rc = -EIO; /* bad smb */ 4580 rc = -EIO; /* bad smb */
4565 else { 4581 else {
4566 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4582 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4619,7 +4635,7 @@ QFSUnixRetry:
4619 pSMB->Reserved3 = 0; 4635 pSMB->Reserved3 = 0;
4620 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4636 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4621 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_CIFS_UNIX_INFO); 4637 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_CIFS_UNIX_INFO);
4622 pSMB->hdr.smb_buf_length += byte_count; 4638 inc_rfc1001_len(pSMB, byte_count);
4623 pSMB->ByteCount = cpu_to_le16(byte_count); 4639 pSMB->ByteCount = cpu_to_le16(byte_count);
4624 4640
4625 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4641 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4629,7 +4645,7 @@ QFSUnixRetry:
4629 } else { /* decode response */ 4645 } else { /* decode response */
4630 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4646 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4631 4647
4632 if (rc || (pSMBr->ByteCount < 13)) { 4648 if (rc || get_bcc(&pSMBr->hdr) < 13) {
4633 rc = -EIO; /* bad smb */ 4649 rc = -EIO; /* bad smb */
4634 } else { 4650 } else {
4635 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4651 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4702,7 +4718,7 @@ SETFSUnixRetry:
4702 pSMB->ClientUnixMinor = cpu_to_le16(CIFS_UNIX_MINOR_VERSION); 4718 pSMB->ClientUnixMinor = cpu_to_le16(CIFS_UNIX_MINOR_VERSION);
4703 pSMB->ClientUnixCap = cpu_to_le64(cap); 4719 pSMB->ClientUnixCap = cpu_to_le64(cap);
4704 4720
4705 pSMB->hdr.smb_buf_length += byte_count; 4721 inc_rfc1001_len(pSMB, byte_count);
4706 pSMB->ByteCount = cpu_to_le16(byte_count); 4722 pSMB->ByteCount = cpu_to_le16(byte_count);
4707 4723
4708 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4724 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4764,7 +4780,7 @@ QFSPosixRetry:
4764 pSMB->Reserved3 = 0; 4780 pSMB->Reserved3 = 0;
4765 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4781 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4766 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_FS_INFO); 4782 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_FS_INFO);
4767 pSMB->hdr.smb_buf_length += byte_count; 4783 inc_rfc1001_len(pSMB, byte_count);
4768 pSMB->ByteCount = cpu_to_le16(byte_count); 4784 pSMB->ByteCount = cpu_to_le16(byte_count);
4769 4785
4770 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4786 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4774,7 +4790,7 @@ QFSPosixRetry:
4774 } else { /* decode response */ 4790 } else { /* decode response */
4775 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4791 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4776 4792
4777 if (rc || (pSMBr->ByteCount < 13)) { 4793 if (rc || get_bcc(&pSMBr->hdr) < 13) {
4778 rc = -EIO; /* bad smb */ 4794 rc = -EIO; /* bad smb */
4779 } else { 4795 } else {
4780 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4796 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4890,7 +4906,7 @@ SetEOFRetry:
4890 pSMB->ParameterCount = cpu_to_le16(params); 4906 pSMB->ParameterCount = cpu_to_le16(params);
4891 pSMB->TotalParameterCount = pSMB->ParameterCount; 4907 pSMB->TotalParameterCount = pSMB->ParameterCount;
4892 pSMB->Reserved4 = 0; 4908 pSMB->Reserved4 = 0;
4893 pSMB->hdr.smb_buf_length += byte_count; 4909 inc_rfc1001_len(pSMB, byte_count);
4894 parm_data->FileSize = cpu_to_le64(size); 4910 parm_data->FileSize = cpu_to_le64(size);
4895 pSMB->ByteCount = cpu_to_le16(byte_count); 4911 pSMB->ByteCount = cpu_to_le16(byte_count);
4896 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4912 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4969,7 +4985,7 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4969 cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO); 4985 cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO);
4970 } 4986 }
4971 pSMB->Reserved4 = 0; 4987 pSMB->Reserved4 = 0;
4972 pSMB->hdr.smb_buf_length += byte_count; 4988 inc_rfc1001_len(pSMB, byte_count);
4973 pSMB->ByteCount = cpu_to_le16(byte_count); 4989 pSMB->ByteCount = cpu_to_le16(byte_count);
4974 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 4990 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
4975 if (rc) { 4991 if (rc) {
@@ -5037,7 +5053,7 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5037 else 5053 else
5038 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO); 5054 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
5039 pSMB->Reserved4 = 0; 5055 pSMB->Reserved4 = 0;
5040 pSMB->hdr.smb_buf_length += byte_count; 5056 inc_rfc1001_len(pSMB, byte_count);
5041 pSMB->ByteCount = cpu_to_le16(byte_count); 5057 pSMB->ByteCount = cpu_to_le16(byte_count);
5042 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); 5058 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
5043 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5059 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
@@ -5096,7 +5112,7 @@ CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon,
5096 pSMB->Fid = fid; 5112 pSMB->Fid = fid;
5097 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO); 5113 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO);
5098 pSMB->Reserved4 = 0; 5114 pSMB->Reserved4 = 0;
5099 pSMB->hdr.smb_buf_length += byte_count; 5115 inc_rfc1001_len(pSMB, byte_count);
5100 pSMB->ByteCount = cpu_to_le16(byte_count); 5116 pSMB->ByteCount = cpu_to_le16(byte_count);
5101 *data_offset = delete_file ? 1 : 0; 5117 *data_offset = delete_file ? 1 : 0;
5102 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5118 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
@@ -5169,7 +5185,7 @@ SetTimesRetry:
5169 else 5185 else
5170 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO); 5186 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
5171 pSMB->Reserved4 = 0; 5187 pSMB->Reserved4 = 0;
5172 pSMB->hdr.smb_buf_length += byte_count; 5188 inc_rfc1001_len(pSMB, byte_count);
5173 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); 5189 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
5174 pSMB->ByteCount = cpu_to_le16(byte_count); 5190 pSMB->ByteCount = cpu_to_le16(byte_count);
5175 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5191 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -5221,7 +5237,7 @@ SetAttrLgcyRetry:
5221 } 5237 }
5222 pSMB->attr = cpu_to_le16(dos_attrs); 5238 pSMB->attr = cpu_to_le16(dos_attrs);
5223 pSMB->BufferFormat = 0x04; 5239 pSMB->BufferFormat = 0x04;
5224 pSMB->hdr.smb_buf_length += name_len + 1; 5240 inc_rfc1001_len(pSMB, name_len + 1);
5225 pSMB->ByteCount = cpu_to_le16(name_len + 1); 5241 pSMB->ByteCount = cpu_to_le16(name_len + 1);
5226 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5242 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5227 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5243 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5326,7 +5342,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5326 pSMB->Fid = fid; 5342 pSMB->Fid = fid;
5327 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); 5343 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
5328 pSMB->Reserved4 = 0; 5344 pSMB->Reserved4 = 0;
5329 pSMB->hdr.smb_buf_length += byte_count; 5345 inc_rfc1001_len(pSMB, byte_count);
5330 pSMB->ByteCount = cpu_to_le16(byte_count); 5346 pSMB->ByteCount = cpu_to_le16(byte_count);
5331 5347
5332 cifs_fill_unix_set_info(data_offset, args); 5348 cifs_fill_unix_set_info(data_offset, args);
@@ -5402,7 +5418,7 @@ setPermsRetry:
5402 pSMB->TotalDataCount = pSMB->DataCount; 5418 pSMB->TotalDataCount = pSMB->DataCount;
5403 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); 5419 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
5404 pSMB->Reserved4 = 0; 5420 pSMB->Reserved4 = 0;
5405 pSMB->hdr.smb_buf_length += byte_count; 5421 inc_rfc1001_len(pSMB, byte_count);
5406 5422
5407 cifs_fill_unix_set_info(data_offset, args); 5423 cifs_fill_unix_set_info(data_offset, args);
5408 5424
@@ -5418,79 +5434,6 @@ setPermsRetry:
5418 return rc; 5434 return rc;
5419} 5435}
5420 5436
5421int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
5422 const int notify_subdirs, const __u16 netfid,
5423 __u32 filter, struct file *pfile, int multishot,
5424 const struct nls_table *nls_codepage)
5425{
5426 int rc = 0;
5427 struct smb_com_transaction_change_notify_req *pSMB = NULL;
5428 struct smb_com_ntransaction_change_notify_rsp *pSMBr = NULL;
5429 struct dir_notify_req *dnotify_req;
5430 int bytes_returned;
5431
5432 cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
5433 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
5434 (void **) &pSMBr);
5435 if (rc)
5436 return rc;
5437
5438 pSMB->TotalParameterCount = 0 ;
5439 pSMB->TotalDataCount = 0;
5440 pSMB->MaxParameterCount = cpu_to_le32(2);
5441 /* BB find exact data count max from sess structure BB */
5442 pSMB->MaxDataCount = 0; /* same in little endian or be */
5443/* BB VERIFY verify which is correct for above BB */
5444 pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
5445 MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
5446
5447 pSMB->MaxSetupCount = 4;
5448 pSMB->Reserved = 0;
5449 pSMB->ParameterOffset = 0;
5450 pSMB->DataCount = 0;
5451 pSMB->DataOffset = 0;
5452 pSMB->SetupCount = 4; /* single byte does not need le conversion */
5453 pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_NOTIFY_CHANGE);
5454 pSMB->ParameterCount = pSMB->TotalParameterCount;
5455 if (notify_subdirs)
5456 pSMB->WatchTree = 1; /* one byte - no le conversion needed */
5457 pSMB->Reserved2 = 0;
5458 pSMB->CompletionFilter = cpu_to_le32(filter);
5459 pSMB->Fid = netfid; /* file handle always le */
5460 pSMB->ByteCount = 0;
5461
5462 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5463 (struct smb_hdr *)pSMBr, &bytes_returned,
5464 CIFS_ASYNC_OP);
5465 if (rc) {
5466 cFYI(1, "Error in Notify = %d", rc);
5467 } else {
5468 /* Add file to outstanding requests */
5469 /* BB change to kmem cache alloc */
5470 dnotify_req = kmalloc(
5471 sizeof(struct dir_notify_req),
5472 GFP_KERNEL);
5473 if (dnotify_req) {
5474 dnotify_req->Pid = pSMB->hdr.Pid;
5475 dnotify_req->PidHigh = pSMB->hdr.PidHigh;
5476 dnotify_req->Mid = pSMB->hdr.Mid;
5477 dnotify_req->Tid = pSMB->hdr.Tid;
5478 dnotify_req->Uid = pSMB->hdr.Uid;
5479 dnotify_req->netfid = netfid;
5480 dnotify_req->pfile = pfile;
5481 dnotify_req->filter = filter;
5482 dnotify_req->multishot = multishot;
5483 spin_lock(&GlobalMid_Lock);
5484 list_add_tail(&dnotify_req->lhead,
5485 &GlobalDnotifyReqList);
5486 spin_unlock(&GlobalMid_Lock);
5487 } else
5488 rc = -ENOMEM;
5489 }
5490 cifs_buf_release(pSMB);
5491 return rc;
5492}
5493
5494#ifdef CONFIG_CIFS_XATTR 5437#ifdef CONFIG_CIFS_XATTR
5495/* 5438/*
5496 * Do a path-based QUERY_ALL_EAS call and parse the result. This is a common 5439 * Do a path-based QUERY_ALL_EAS call and parse the result. This is a common
@@ -5560,7 +5503,7 @@ QAllEAsRetry:
5560 pSMB->ParameterCount = pSMB->TotalParameterCount; 5503 pSMB->ParameterCount = pSMB->TotalParameterCount;
5561 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_QUERY_ALL_EAS); 5504 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_QUERY_ALL_EAS);
5562 pSMB->Reserved4 = 0; 5505 pSMB->Reserved4 = 0;
5563 pSMB->hdr.smb_buf_length += byte_count; 5506 inc_rfc1001_len(pSMB, byte_count);
5564 pSMB->ByteCount = cpu_to_le16(byte_count); 5507 pSMB->ByteCount = cpu_to_le16(byte_count);
5565 5508
5566 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5509 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -5576,7 +5519,7 @@ QAllEAsRetry:
5576 of these trans2 responses */ 5519 of these trans2 responses */
5577 5520
5578 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 5521 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
5579 if (rc || (pSMBr->ByteCount < 4)) { 5522 if (rc || get_bcc(&pSMBr->hdr) < 4) {
5580 rc = -EIO; /* bad smb */ 5523 rc = -EIO; /* bad smb */
5581 goto QAllEAsOut; 5524 goto QAllEAsOut;
5582 } 5525 }
@@ -5773,7 +5716,7 @@ SetEARetry:
5773 pSMB->ParameterCount = cpu_to_le16(params); 5716 pSMB->ParameterCount = cpu_to_le16(params);
5774 pSMB->TotalParameterCount = pSMB->ParameterCount; 5717 pSMB->TotalParameterCount = pSMB->ParameterCount;
5775 pSMB->Reserved4 = 0; 5718 pSMB->Reserved4 = 0;
5776 pSMB->hdr.smb_buf_length += byte_count; 5719 inc_rfc1001_len(pSMB, byte_count);
5777 pSMB->ByteCount = cpu_to_le16(byte_count); 5720 pSMB->ByteCount = cpu_to_le16(byte_count);
5778 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5721 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5779 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5722 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5787,5 +5730,99 @@ SetEARetry:
5787 5730
5788 return rc; 5731 return rc;
5789} 5732}
5790
5791#endif 5733#endif
5734
5735#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* BB unused temporarily */
5736/*
5737 * Years ago the kernel added a "dnotify" function for Samba server,
5738 * to allow network clients (such as Windows) to display updated
5739 * lists of files in directory listings automatically when
5740 * files are added by one user when another user has the
5741 * same directory open on their desktop. The Linux cifs kernel
5742 * client hooked into the kernel side of this interface for
5743 * the same reason, but ironically when the VFS moved from
5744 * "dnotify" to "inotify" it became harder to plug in Linux
5745 * network file system clients (the most obvious use case
5746 * for notify interfaces is when multiple users can update
5747 * the contents of the same directory - exactly what network
5748 * file systems can do) although the server (Samba) could
5749 * still use it. For the short term we leave the worker
5750 * function ifdeffed out (below) until inotify is fixed
5751 * in the VFS to make it easier to plug in network file
5752 * system clients. If inotify turns out to be permanently
5753 * incompatible for network fs clients, we could instead simply
5754 * expose this config flag by adding a future cifs (and smb2) notify ioctl.
5755 */
5756int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
5757 const int notify_subdirs, const __u16 netfid,
5758 __u32 filter, struct file *pfile, int multishot,
5759 const struct nls_table *nls_codepage)
5760{
5761 int rc = 0;
5762 struct smb_com_transaction_change_notify_req *pSMB = NULL;
5763 struct smb_com_ntransaction_change_notify_rsp *pSMBr = NULL;
5764 struct dir_notify_req *dnotify_req;
5765 int bytes_returned;
5766
5767 cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
5768 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
5769 (void **) &pSMBr);
5770 if (rc)
5771 return rc;
5772
5773 pSMB->TotalParameterCount = 0 ;
5774 pSMB->TotalDataCount = 0;
5775 pSMB->MaxParameterCount = cpu_to_le32(2);
5776 /* BB find exact data count max from sess structure BB */
5777 pSMB->MaxDataCount = 0; /* same in little endian or be */
5778/* BB VERIFY verify which is correct for above BB */
5779 pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
5780 MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
5781
5782 pSMB->MaxSetupCount = 4;
5783 pSMB->Reserved = 0;
5784 pSMB->ParameterOffset = 0;
5785 pSMB->DataCount = 0;
5786 pSMB->DataOffset = 0;
5787 pSMB->SetupCount = 4; /* single byte does not need le conversion */
5788 pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_NOTIFY_CHANGE);
5789 pSMB->ParameterCount = pSMB->TotalParameterCount;
5790 if (notify_subdirs)
5791 pSMB->WatchTree = 1; /* one byte - no le conversion needed */
5792 pSMB->Reserved2 = 0;
5793 pSMB->CompletionFilter = cpu_to_le32(filter);
5794 pSMB->Fid = netfid; /* file handle always le */
5795 pSMB->ByteCount = 0;
5796
5797 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5798 (struct smb_hdr *)pSMBr, &bytes_returned,
5799 CIFS_ASYNC_OP);
5800 if (rc) {
5801 cFYI(1, "Error in Notify = %d", rc);
5802 } else {
5803 /* Add file to outstanding requests */
5804 /* BB change to kmem cache alloc */
5805 dnotify_req = kmalloc(
5806 sizeof(struct dir_notify_req),
5807 GFP_KERNEL);
5808 if (dnotify_req) {
5809 dnotify_req->Pid = pSMB->hdr.Pid;
5810 dnotify_req->PidHigh = pSMB->hdr.PidHigh;
5811 dnotify_req->Mid = pSMB->hdr.Mid;
5812 dnotify_req->Tid = pSMB->hdr.Tid;
5813 dnotify_req->Uid = pSMB->hdr.Uid;
5814 dnotify_req->netfid = netfid;
5815 dnotify_req->pfile = pfile;
5816 dnotify_req->filter = filter;
5817 dnotify_req->multishot = multishot;
5818 spin_lock(&GlobalMid_Lock);
5819 list_add_tail(&dnotify_req->lhead,
5820 &GlobalDnotifyReqList);
5821 spin_unlock(&GlobalMid_Lock);
5822 } else
5823 rc = -ENOMEM;
5824 }
5825 cifs_buf_release(pSMB);
5826 return rc;
5827}
5828#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4bc862a80efa..da284e3cb653 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -102,6 +102,7 @@ struct smb_vol {
102 bool fsc:1; /* enable fscache */ 102 bool fsc:1; /* enable fscache */
103 bool mfsymlinks:1; /* use Minshall+French Symlinks */ 103 bool mfsymlinks:1; /* use Minshall+French Symlinks */
104 bool multiuser:1; 104 bool multiuser:1;
105 bool use_smb2:1; /* force smb2 use on mount instead of cifs */
105 unsigned int rsize; 106 unsigned int rsize;
106 unsigned int wsize; 107 unsigned int wsize;
107 bool sockopt_tcp_nodelay:1; 108 bool sockopt_tcp_nodelay:1;
@@ -274,7 +275,8 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
274 char *data_area_of_target; 275 char *data_area_of_target;
275 char *data_area_of_buf2; 276 char *data_area_of_buf2;
276 int remaining; 277 int remaining;
277 __u16 byte_count, total_data_size, total_in_buf, total_in_buf2; 278 unsigned int byte_count, total_in_buf;
279 __u16 total_data_size, total_in_buf2;
278 280
279 total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount); 281 total_data_size = get_unaligned_le16(&pSMBt->t2_rsp.TotalDataCount);
280 282
@@ -287,7 +289,7 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
287 remaining = total_data_size - total_in_buf; 289 remaining = total_data_size - total_in_buf;
288 290
289 if (remaining < 0) 291 if (remaining < 0)
290 return -EINVAL; 292 return -EPROTO;
291 293
292 if (remaining == 0) /* nothing to do, ignore */ 294 if (remaining == 0) /* nothing to do, ignore */
293 return 0; 295 return 0;
@@ -308,19 +310,28 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
308 data_area_of_target += total_in_buf; 310 data_area_of_target += total_in_buf;
309 311
310 /* copy second buffer into end of first buffer */ 312 /* copy second buffer into end of first buffer */
311 memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2);
312 total_in_buf += total_in_buf2; 313 total_in_buf += total_in_buf2;
314 /* is the result too big for the field? */
315 if (total_in_buf > USHRT_MAX)
316 return -EPROTO;
313 put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount); 317 put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount);
314 byte_count = get_bcc_le(pTargetSMB);
315 byte_count += total_in_buf2;
316 put_bcc_le(byte_count, pTargetSMB);
317 318
318 byte_count = pTargetSMB->smb_buf_length; 319 /* fix up the BCC */
320 byte_count = get_bcc(pTargetSMB);
319 byte_count += total_in_buf2; 321 byte_count += total_in_buf2;
322 /* is the result too big for the field? */
323 if (byte_count > USHRT_MAX)
324 return -EPROTO;
325 put_bcc(byte_count, pTargetSMB);
320 326
321 /* BB also add check that we are not beyond maximum buffer size */ 327 byte_count = be32_to_cpu(pTargetSMB->smb_buf_length);
328 byte_count += total_in_buf2;
329 /* don't allow buffer to overflow */
330 if (byte_count > CIFSMaxBufSize)
331 return -ENOBUFS;
332 pTargetSMB->smb_buf_length = cpu_to_be32(byte_count);
322 333
323 pTargetSMB->smb_buf_length = byte_count; 334 memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2);
324 335
325 if (remaining == total_in_buf2) { 336 if (remaining == total_in_buf2) {
326 cFYI(1, "found the last secondary response"); 337 cFYI(1, "found the last secondary response");
@@ -485,8 +496,7 @@ incomplete_rcv:
485 /* Note that FC 1001 length is big endian on the wire, 496 /* Note that FC 1001 length is big endian on the wire,
486 but we convert it here so it is always manipulated 497 but we convert it here so it is always manipulated
487 as host byte order */ 498 as host byte order */
488 pdu_length = be32_to_cpu((__force __be32)smb_buffer->smb_buf_length); 499 pdu_length = be32_to_cpu(smb_buffer->smb_buf_length);
489 smb_buffer->smb_buf_length = pdu_length;
490 500
491 cFYI(1, "rfc1002 length 0x%x", pdu_length+4); 501 cFYI(1, "rfc1002 length 0x%x", pdu_length+4);
492 502
@@ -607,59 +617,63 @@ incomplete_rcv:
607 list_for_each_safe(tmp, tmp2, &server->pending_mid_q) { 617 list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
608 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 618 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
609 619
610 if ((mid_entry->mid == smb_buffer->Mid) && 620 if (mid_entry->mid != smb_buffer->Mid ||
611 (mid_entry->midState == MID_REQUEST_SUBMITTED) && 621 mid_entry->midState != MID_REQUEST_SUBMITTED ||
612 (mid_entry->command == smb_buffer->Command)) { 622 mid_entry->command != smb_buffer->Command) {
613 if (length == 0 && 623 mid_entry = NULL;
614 check2ndT2(smb_buffer, server->maxBuf) > 0) { 624 continue;
615 /* We have a multipart transact2 resp */ 625 }
616 isMultiRsp = true; 626
617 if (mid_entry->resp_buf) { 627 if (length == 0 &&
618 /* merge response - fix up 1st*/ 628 check2ndT2(smb_buffer, server->maxBuf) > 0) {
619 if (coalesce_t2(smb_buffer, 629 /* We have a multipart transact2 resp */
620 mid_entry->resp_buf)) { 630 isMultiRsp = true;
621 mid_entry->multiRsp = 631 if (mid_entry->resp_buf) {
622 true; 632 /* merge response - fix up 1st*/
623 break; 633 length = coalesce_t2(smb_buffer,
624 } else { 634 mid_entry->resp_buf);
625 /* all parts received */ 635 if (length > 0) {
626 mid_entry->multiEnd = 636 length = 0;
627 true; 637 mid_entry->multiRsp = true;
628 goto multi_t2_fnd; 638 break;
629 }
630 } else { 639 } else {
631 if (!isLargeBuf) { 640 /* all parts received or
632 cERROR(1, "1st trans2 resp needs bigbuf"); 641 * packet is malformed
633 /* BB maybe we can fix this up, switch 642 */
634 to already allocated large buffer? */ 643 mid_entry->multiEnd = true;
635 } else { 644 goto multi_t2_fnd;
636 /* Have first buffer */ 645 }
637 mid_entry->resp_buf = 646 } else {
638 smb_buffer; 647 if (!isLargeBuf) {
639 mid_entry->largeBuf = 648 /*
640 true; 649 * FIXME: switch to already
641 bigbuf = NULL; 650 * allocated largebuf?
642 } 651 */
652 cERROR(1, "1st trans2 resp "
653 "needs bigbuf");
654 } else {
655 /* Have first buffer */
656 mid_entry->resp_buf =
657 smb_buffer;
658 mid_entry->largeBuf = true;
659 bigbuf = NULL;
643 } 660 }
644 break;
645 } 661 }
646 mid_entry->resp_buf = smb_buffer; 662 break;
647 mid_entry->largeBuf = isLargeBuf; 663 }
664 mid_entry->resp_buf = smb_buffer;
665 mid_entry->largeBuf = isLargeBuf;
648multi_t2_fnd: 666multi_t2_fnd:
649 if (length == 0) 667 if (length == 0)
650 mid_entry->midState = 668 mid_entry->midState = MID_RESPONSE_RECEIVED;
651 MID_RESPONSE_RECEIVED; 669 else
652 else 670 mid_entry->midState = MID_RESPONSE_MALFORMED;
653 mid_entry->midState =
654 MID_RESPONSE_MALFORMED;
655#ifdef CONFIG_CIFS_STATS2 671#ifdef CONFIG_CIFS_STATS2
656 mid_entry->when_received = jiffies; 672 mid_entry->when_received = jiffies;
657#endif 673#endif
658 list_del_init(&mid_entry->qhead); 674 list_del_init(&mid_entry->qhead);
659 mid_entry->callback(mid_entry); 675 mid_entry->callback(mid_entry);
660 break; 676 break;
661 }
662 mid_entry = NULL;
663 } 677 }
664 spin_unlock(&GlobalMid_Lock); 678 spin_unlock(&GlobalMid_Lock);
665 679
@@ -721,7 +735,7 @@ multi_t2_fnd:
721 sock_release(csocket); 735 sock_release(csocket);
722 server->ssocket = NULL; 736 server->ssocket = NULL;
723 } 737 }
724 /* buffer usuallly freed in free_mid - need to free it here on exit */ 738 /* buffer usually freed in free_mid - need to free it here on exit */
725 cifs_buf_release(bigbuf); 739 cifs_buf_release(bigbuf);
726 if (smallbuf) /* no sense logging a debug message if NULL */ 740 if (smallbuf) /* no sense logging a debug message if NULL */
727 cifs_small_buf_release(smallbuf); 741 cifs_small_buf_release(smallbuf);
@@ -804,10 +818,11 @@ extract_hostname(const char *unc)
804} 818}
805 819
806static int 820static int
807cifs_parse_mount_options(char *options, const char *devname, 821cifs_parse_mount_options(const char *mountdata, const char *devname,
808 struct smb_vol *vol) 822 struct smb_vol *vol)
809{ 823{
810 char *value, *data, *end; 824 char *value, *data, *end;
825 char *mountdata_copy, *options;
811 unsigned int temp_len, i, j; 826 unsigned int temp_len, i, j;
812 char separator[2]; 827 char separator[2];
813 short int override_uid = -1; 828 short int override_uid = -1;
@@ -847,9 +862,14 @@ cifs_parse_mount_options(char *options, const char *devname,
847 862
848 vol->actimeo = CIFS_DEF_ACTIMEO; 863 vol->actimeo = CIFS_DEF_ACTIMEO;
849 864
850 if (!options) 865 if (!mountdata)
851 return 1; 866 goto cifs_parse_mount_err;
852 867
868 mountdata_copy = kstrndup(mountdata, PAGE_SIZE, GFP_KERNEL);
869 if (!mountdata_copy)
870 goto cifs_parse_mount_err;
871
872 options = mountdata_copy;
853 end = options + strlen(options); 873 end = options + strlen(options);
854 if (strncmp(options, "sep=", 4) == 0) { 874 if (strncmp(options, "sep=", 4) == 0) {
855 if (options[4] != 0) { 875 if (options[4] != 0) {
@@ -875,17 +895,22 @@ cifs_parse_mount_options(char *options, const char *devname,
875 if (!value) { 895 if (!value) {
876 printk(KERN_WARNING 896 printk(KERN_WARNING
877 "CIFS: invalid or missing username\n"); 897 "CIFS: invalid or missing username\n");
878 return 1; /* needs_arg; */ 898 goto cifs_parse_mount_err;
879 } else if (!*value) { 899 } else if (!*value) {
880 /* null user, ie anonymous, authentication */ 900 /* null user, ie anonymous, authentication */
881 vol->nullauth = 1; 901 vol->nullauth = 1;
882 } 902 }
883 if (strnlen(value, MAX_USERNAME_SIZE) < 903 if (strnlen(value, MAX_USERNAME_SIZE) <
884 MAX_USERNAME_SIZE) { 904 MAX_USERNAME_SIZE) {
885 vol->username = value; 905 vol->username = kstrdup(value, GFP_KERNEL);
906 if (!vol->username) {
907 printk(KERN_WARNING "CIFS: no memory "
908 "for username\n");
909 goto cifs_parse_mount_err;
910 }
886 } else { 911 } else {
887 printk(KERN_WARNING "CIFS: username too long\n"); 912 printk(KERN_WARNING "CIFS: username too long\n");
888 return 1; 913 goto cifs_parse_mount_err;
889 } 914 }
890 } else if (strnicmp(data, "pass", 4) == 0) { 915 } else if (strnicmp(data, "pass", 4) == 0) {
891 if (!value) { 916 if (!value) {
@@ -949,7 +974,7 @@ cifs_parse_mount_options(char *options, const char *devname,
949 if (vol->password == NULL) { 974 if (vol->password == NULL) {
950 printk(KERN_WARNING "CIFS: no memory " 975 printk(KERN_WARNING "CIFS: no memory "
951 "for password\n"); 976 "for password\n");
952 return 1; 977 goto cifs_parse_mount_err;
953 } 978 }
954 for (i = 0, j = 0; i < temp_len; i++, j++) { 979 for (i = 0, j = 0; i < temp_len; i++, j++) {
955 vol->password[j] = value[i]; 980 vol->password[j] = value[i];
@@ -965,7 +990,7 @@ cifs_parse_mount_options(char *options, const char *devname,
965 if (vol->password == NULL) { 990 if (vol->password == NULL) {
966 printk(KERN_WARNING "CIFS: no memory " 991 printk(KERN_WARNING "CIFS: no memory "
967 "for password\n"); 992 "for password\n");
968 return 1; 993 goto cifs_parse_mount_err;
969 } 994 }
970 strcpy(vol->password, value); 995 strcpy(vol->password, value);
971 } 996 }
@@ -975,11 +1000,16 @@ cifs_parse_mount_options(char *options, const char *devname,
975 vol->UNCip = NULL; 1000 vol->UNCip = NULL;
976 } else if (strnlen(value, INET6_ADDRSTRLEN) < 1001 } else if (strnlen(value, INET6_ADDRSTRLEN) <
977 INET6_ADDRSTRLEN) { 1002 INET6_ADDRSTRLEN) {
978 vol->UNCip = value; 1003 vol->UNCip = kstrdup(value, GFP_KERNEL);
1004 if (!vol->UNCip) {
1005 printk(KERN_WARNING "CIFS: no memory "
1006 "for UNC IP\n");
1007 goto cifs_parse_mount_err;
1008 }
979 } else { 1009 } else {
980 printk(KERN_WARNING "CIFS: ip address " 1010 printk(KERN_WARNING "CIFS: ip address "
981 "too long\n"); 1011 "too long\n");
982 return 1; 1012 goto cifs_parse_mount_err;
983 } 1013 }
984 } else if (strnicmp(data, "sec", 3) == 0) { 1014 } else if (strnicmp(data, "sec", 3) == 0) {
985 if (!value || !*value) { 1015 if (!value || !*value) {
@@ -992,7 +1022,7 @@ cifs_parse_mount_options(char *options, const char *devname,
992 /* vol->secFlg |= CIFSSEC_MUST_SEAL | 1022 /* vol->secFlg |= CIFSSEC_MUST_SEAL |
993 CIFSSEC_MAY_KRB5; */ 1023 CIFSSEC_MAY_KRB5; */
994 cERROR(1, "Krb5 cifs privacy not supported"); 1024 cERROR(1, "Krb5 cifs privacy not supported");
995 return 1; 1025 goto cifs_parse_mount_err;
996 } else if (strnicmp(value, "krb5", 4) == 0) { 1026 } else if (strnicmp(value, "krb5", 4) == 0) {
997 vol->secFlg |= CIFSSEC_MAY_KRB5; 1027 vol->secFlg |= CIFSSEC_MAY_KRB5;
998 } else if (strnicmp(value, "ntlmsspi", 8) == 0) { 1028 } else if (strnicmp(value, "ntlmsspi", 8) == 0) {
@@ -1022,7 +1052,23 @@ cifs_parse_mount_options(char *options, const char *devname,
1022 vol->nullauth = 1; 1052 vol->nullauth = 1;
1023 } else { 1053 } else {
1024 cERROR(1, "bad security option: %s", value); 1054 cERROR(1, "bad security option: %s", value);
1025 return 1; 1055 goto cifs_parse_mount_err;
1056 }
1057 } else if (strnicmp(data, "vers", 3) == 0) {
1058 if (!value || !*value) {
1059 cERROR(1, "no protocol version specified"
1060 " after vers= mount option");
1061 } else if ((strnicmp(value, "cifs", 4) == 0) ||
1062 (strnicmp(value, "1", 1) == 0)) {
1063 /* this is the default */
1064 continue;
1065 } else if ((strnicmp(value, "smb2", 4) == 0) ||
1066 (strnicmp(value, "2", 1) == 0)) {
1067#ifdef CONFIG_CIFS_SMB2
1068 vol->use_smb2 = true;
1069#else
1070 cERROR(1, "smb2 support not enabled");
1071#endif /* CONFIG_CIFS_SMB2 */
1026 } 1072 }
1027 } else if ((strnicmp(data, "unc", 3) == 0) 1073 } else if ((strnicmp(data, "unc", 3) == 0)
1028 || (strnicmp(data, "target", 6) == 0) 1074 || (strnicmp(data, "target", 6) == 0)
@@ -1030,12 +1076,12 @@ cifs_parse_mount_options(char *options, const char *devname,
1030 if (!value || !*value) { 1076 if (!value || !*value) {
1031 printk(KERN_WARNING "CIFS: invalid path to " 1077 printk(KERN_WARNING "CIFS: invalid path to "
1032 "network resource\n"); 1078 "network resource\n");
1033 return 1; /* needs_arg; */ 1079 goto cifs_parse_mount_err;
1034 } 1080 }
1035 if ((temp_len = strnlen(value, 300)) < 300) { 1081 if ((temp_len = strnlen(value, 300)) < 300) {
1036 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL); 1082 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
1037 if (vol->UNC == NULL) 1083 if (vol->UNC == NULL)
1038 return 1; 1084 goto cifs_parse_mount_err;
1039 strcpy(vol->UNC, value); 1085 strcpy(vol->UNC, value);
1040 if (strncmp(vol->UNC, "//", 2) == 0) { 1086 if (strncmp(vol->UNC, "//", 2) == 0) {
1041 vol->UNC[0] = '\\'; 1087 vol->UNC[0] = '\\';
@@ -1044,27 +1090,32 @@ cifs_parse_mount_options(char *options, const char *devname,
1044 printk(KERN_WARNING 1090 printk(KERN_WARNING
1045 "CIFS: UNC Path does not begin " 1091 "CIFS: UNC Path does not begin "
1046 "with // or \\\\ \n"); 1092 "with // or \\\\ \n");
1047 return 1; 1093 goto cifs_parse_mount_err;
1048 } 1094 }
1049 } else { 1095 } else {
1050 printk(KERN_WARNING "CIFS: UNC name too long\n"); 1096 printk(KERN_WARNING "CIFS: UNC name too long\n");
1051 return 1; 1097 goto cifs_parse_mount_err;
1052 } 1098 }
1053 } else if ((strnicmp(data, "domain", 3) == 0) 1099 } else if ((strnicmp(data, "domain", 3) == 0)
1054 || (strnicmp(data, "workgroup", 5) == 0)) { 1100 || (strnicmp(data, "workgroup", 5) == 0)) {
1055 if (!value || !*value) { 1101 if (!value || !*value) {
1056 printk(KERN_WARNING "CIFS: invalid domain name\n"); 1102 printk(KERN_WARNING "CIFS: invalid domain name\n");
1057 return 1; /* needs_arg; */ 1103 goto cifs_parse_mount_err;
1058 } 1104 }
1059 /* BB are there cases in which a comma can be valid in 1105 /* BB are there cases in which a comma can be valid in
1060 a domain name and need special handling? */ 1106 a domain name and need special handling? */
1061 if (strnlen(value, 256) < 256) { 1107 if (strnlen(value, 256) < 256) {
1062 vol->domainname = value; 1108 vol->domainname = kstrdup(value, GFP_KERNEL);
1109 if (!vol->domainname) {
1110 printk(KERN_WARNING "CIFS: no memory "
1111 "for domainname\n");
1112 goto cifs_parse_mount_err;
1113 }
1063 cFYI(1, "Domain name set"); 1114 cFYI(1, "Domain name set");
1064 } else { 1115 } else {
1065 printk(KERN_WARNING "CIFS: domain name too " 1116 printk(KERN_WARNING "CIFS: domain name too "
1066 "long\n"); 1117 "long\n");
1067 return 1; 1118 goto cifs_parse_mount_err;
1068 } 1119 }
1069 } else if (strnicmp(data, "srcaddr", 7) == 0) { 1120 } else if (strnicmp(data, "srcaddr", 7) == 0) {
1070 vol->srcaddr.ss_family = AF_UNSPEC; 1121 vol->srcaddr.ss_family = AF_UNSPEC;
@@ -1072,7 +1123,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1072 if (!value || !*value) { 1123 if (!value || !*value) {
1073 printk(KERN_WARNING "CIFS: srcaddr value" 1124 printk(KERN_WARNING "CIFS: srcaddr value"
1074 " not specified.\n"); 1125 " not specified.\n");
1075 return 1; /* needs_arg; */ 1126 goto cifs_parse_mount_err;
1076 } 1127 }
1077 i = cifs_convert_address((struct sockaddr *)&vol->srcaddr, 1128 i = cifs_convert_address((struct sockaddr *)&vol->srcaddr,
1078 value, strlen(value)); 1129 value, strlen(value));
@@ -1080,20 +1131,20 @@ cifs_parse_mount_options(char *options, const char *devname,
1080 printk(KERN_WARNING "CIFS: Could not parse" 1131 printk(KERN_WARNING "CIFS: Could not parse"
1081 " srcaddr: %s\n", 1132 " srcaddr: %s\n",
1082 value); 1133 value);
1083 return 1; 1134 goto cifs_parse_mount_err;
1084 } 1135 }
1085 } else if (strnicmp(data, "prefixpath", 10) == 0) { 1136 } else if (strnicmp(data, "prefixpath", 10) == 0) {
1086 if (!value || !*value) { 1137 if (!value || !*value) {
1087 printk(KERN_WARNING 1138 printk(KERN_WARNING
1088 "CIFS: invalid path prefix\n"); 1139 "CIFS: invalid path prefix\n");
1089 return 1; /* needs_argument */ 1140 goto cifs_parse_mount_err;
1090 } 1141 }
1091 if ((temp_len = strnlen(value, 1024)) < 1024) { 1142 if ((temp_len = strnlen(value, 1024)) < 1024) {
1092 if (value[0] != '/') 1143 if (value[0] != '/')
1093 temp_len++; /* missing leading slash */ 1144 temp_len++; /* missing leading slash */
1094 vol->prepath = kmalloc(temp_len+1, GFP_KERNEL); 1145 vol->prepath = kmalloc(temp_len+1, GFP_KERNEL);
1095 if (vol->prepath == NULL) 1146 if (vol->prepath == NULL)
1096 return 1; 1147 goto cifs_parse_mount_err;
1097 if (value[0] != '/') { 1148 if (value[0] != '/') {
1098 vol->prepath[0] = '/'; 1149 vol->prepath[0] = '/';
1099 strcpy(vol->prepath+1, value); 1150 strcpy(vol->prepath+1, value);
@@ -1102,24 +1153,33 @@ cifs_parse_mount_options(char *options, const char *devname,
1102 cFYI(1, "prefix path %s", vol->prepath); 1153 cFYI(1, "prefix path %s", vol->prepath);
1103 } else { 1154 } else {
1104 printk(KERN_WARNING "CIFS: prefix too long\n"); 1155 printk(KERN_WARNING "CIFS: prefix too long\n");
1105 return 1; 1156 goto cifs_parse_mount_err;
1106 } 1157 }
1107 } else if (strnicmp(data, "iocharset", 9) == 0) { 1158 } else if (strnicmp(data, "iocharset", 9) == 0) {
1108 if (!value || !*value) { 1159 if (!value || !*value) {
1109 printk(KERN_WARNING "CIFS: invalid iocharset " 1160 printk(KERN_WARNING "CIFS: invalid iocharset "
1110 "specified\n"); 1161 "specified\n");
1111 return 1; /* needs_arg; */ 1162 goto cifs_parse_mount_err;
1112 } 1163 }
1113 if (strnlen(value, 65) < 65) { 1164 if (strnlen(value, 65) < 65) {
1114 if (strnicmp(value, "default", 7)) 1165 if (strnicmp(value, "default", 7)) {
1115 vol->iocharset = value; 1166 vol->iocharset = kstrdup(value,
1167 GFP_KERNEL);
1168
1169 if (!vol->iocharset) {
1170 printk(KERN_WARNING "CIFS: no "
1171 "memory for"
1172 "charset\n");
1173 goto cifs_parse_mount_err;
1174 }
1175 }
1116 /* if iocharset not set then load_nls_default 1176 /* if iocharset not set then load_nls_default
1117 is used by caller */ 1177 is used by caller */
1118 cFYI(1, "iocharset set to %s", value); 1178 cFYI(1, "iocharset set to %s", value);
1119 } else { 1179 } else {
1120 printk(KERN_WARNING "CIFS: iocharset name " 1180 printk(KERN_WARNING "CIFS: iocharset name "
1121 "too long.\n"); 1181 "too long.\n");
1122 return 1; 1182 goto cifs_parse_mount_err;
1123 } 1183 }
1124 } else if (!strnicmp(data, "uid", 3) && value && *value) { 1184 } else if (!strnicmp(data, "uid", 3) && value && *value) {
1125 vol->linux_uid = simple_strtoul(value, &value, 0); 1185 vol->linux_uid = simple_strtoul(value, &value, 0);
@@ -1232,7 +1292,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1232 if (vol->actimeo > CIFS_MAX_ACTIMEO) { 1292 if (vol->actimeo > CIFS_MAX_ACTIMEO) {
1233 cERROR(1, "CIFS: attribute cache" 1293 cERROR(1, "CIFS: attribute cache"
1234 "timeout too large"); 1294 "timeout too large");
1235 return 1; 1295 goto cifs_parse_mount_err;
1236 } 1296 }
1237 } 1297 }
1238 } else if (strnicmp(data, "credentials", 4) == 0) { 1298 } else if (strnicmp(data, "credentials", 4) == 0) {
@@ -1376,7 +1436,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1376#ifndef CONFIG_CIFS_FSCACHE 1436#ifndef CONFIG_CIFS_FSCACHE
1377 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE" 1437 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE"
1378 "kernel config option set"); 1438 "kernel config option set");
1379 return 1; 1439 goto cifs_parse_mount_err;
1380#endif 1440#endif
1381 vol->fsc = true; 1441 vol->fsc = true;
1382 } else if (strnicmp(data, "mfsymlinks", 10) == 0) { 1442 } else if (strnicmp(data, "mfsymlinks", 10) == 0) {
@@ -1391,12 +1451,12 @@ cifs_parse_mount_options(char *options, const char *devname,
1391 if (devname == NULL) { 1451 if (devname == NULL) {
1392 printk(KERN_WARNING "CIFS: Missing UNC name for mount " 1452 printk(KERN_WARNING "CIFS: Missing UNC name for mount "
1393 "target\n"); 1453 "target\n");
1394 return 1; 1454 goto cifs_parse_mount_err;
1395 } 1455 }
1396 if ((temp_len = strnlen(devname, 300)) < 300) { 1456 if ((temp_len = strnlen(devname, 300)) < 300) {
1397 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL); 1457 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
1398 if (vol->UNC == NULL) 1458 if (vol->UNC == NULL)
1399 return 1; 1459 goto cifs_parse_mount_err;
1400 strcpy(vol->UNC, devname); 1460 strcpy(vol->UNC, devname);
1401 if (strncmp(vol->UNC, "//", 2) == 0) { 1461 if (strncmp(vol->UNC, "//", 2) == 0) {
1402 vol->UNC[0] = '\\'; 1462 vol->UNC[0] = '\\';
@@ -1404,21 +1464,21 @@ cifs_parse_mount_options(char *options, const char *devname,
1404 } else if (strncmp(vol->UNC, "\\\\", 2) != 0) { 1464 } else if (strncmp(vol->UNC, "\\\\", 2) != 0) {
1405 printk(KERN_WARNING "CIFS: UNC Path does not " 1465 printk(KERN_WARNING "CIFS: UNC Path does not "
1406 "begin with // or \\\\ \n"); 1466 "begin with // or \\\\ \n");
1407 return 1; 1467 goto cifs_parse_mount_err;
1408 } 1468 }
1409 value = strpbrk(vol->UNC+2, "/\\"); 1469 value = strpbrk(vol->UNC+2, "/\\");
1410 if (value) 1470 if (value)
1411 *value = '\\'; 1471 *value = '\\';
1412 } else { 1472 } else {
1413 printk(KERN_WARNING "CIFS: UNC name too long\n"); 1473 printk(KERN_WARNING "CIFS: UNC name too long\n");
1414 return 1; 1474 goto cifs_parse_mount_err;
1415 } 1475 }
1416 } 1476 }
1417 1477
1418 if (vol->multiuser && !(vol->secFlg & CIFSSEC_MAY_KRB5)) { 1478 if (vol->multiuser && !(vol->secFlg & CIFSSEC_MAY_KRB5)) {
1419 cERROR(1, "Multiuser mounts currently require krb5 " 1479 cERROR(1, "Multiuser mounts currently require krb5 "
1420 "authentication!"); 1480 "authentication!");
1421 return 1; 1481 goto cifs_parse_mount_err;
1422 } 1482 }
1423 1483
1424 if (vol->UNCip == NULL) 1484 if (vol->UNCip == NULL)
@@ -1436,7 +1496,12 @@ cifs_parse_mount_options(char *options, const char *devname,
1436 printk(KERN_NOTICE "CIFS: ignoring forcegid mount option " 1496 printk(KERN_NOTICE "CIFS: ignoring forcegid mount option "
1437 "specified with no gid= option.\n"); 1497 "specified with no gid= option.\n");
1438 1498
1499 kfree(mountdata_copy);
1439 return 0; 1500 return 0;
1501
1502cifs_parse_mount_err:
1503 kfree(mountdata_copy);
1504 return 1;
1440} 1505}
1441 1506
1442/** Returns true if srcaddr isn't specified and rhs isn't 1507/** Returns true if srcaddr isn't specified and rhs isn't
@@ -2266,7 +2331,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
2266 smb_buf = (struct smb_hdr *)ses_init_buf; 2331 smb_buf = (struct smb_hdr *)ses_init_buf;
2267 2332
2268 /* sizeof RFC1002_SESSION_REQUEST with no scope */ 2333 /* sizeof RFC1002_SESSION_REQUEST with no scope */
2269 smb_buf->smb_buf_length = 0x81000044; 2334 smb_buf->smb_buf_length = cpu_to_be32(0x81000044);
2270 rc = smb_send(server, smb_buf, 0x44); 2335 rc = smb_send(server, smb_buf, 0x44);
2271 kfree(ses_init_buf); 2336 kfree(ses_init_buf);
2272 /* 2337 /*
@@ -2659,6 +2724,11 @@ is_path_accessible(int xid, struct cifsTconInfo *tcon,
2659 0 /* not legacy */, cifs_sb->local_nls, 2724 0 /* not legacy */, cifs_sb->local_nls,
2660 cifs_sb->mnt_cifs_flags & 2725 cifs_sb->mnt_cifs_flags &
2661 CIFS_MOUNT_MAP_SPECIAL_CHR); 2726 CIFS_MOUNT_MAP_SPECIAL_CHR);
2727
2728 if (rc == -EOPNOTSUPP || rc == -EINVAL)
2729 rc = SMBQueryInformation(xid, tcon, full_path, pfile_info,
2730 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
2731 CIFS_MOUNT_MAP_SPECIAL_CHR);
2662 kfree(pfile_info); 2732 kfree(pfile_info);
2663 return rc; 2733 return rc;
2664} 2734}
@@ -2672,8 +2742,12 @@ cleanup_volume_info(struct smb_vol **pvolume_info)
2672 return; 2742 return;
2673 2743
2674 volume_info = *pvolume_info; 2744 volume_info = *pvolume_info;
2745 kfree(volume_info->username);
2675 kzfree(volume_info->password); 2746 kzfree(volume_info->password);
2676 kfree(volume_info->UNC); 2747 kfree(volume_info->UNC);
2748 kfree(volume_info->UNCip);
2749 kfree(volume_info->domainname);
2750 kfree(volume_info->iocharset);
2677 kfree(volume_info->prepath); 2751 kfree(volume_info->prepath);
2678 kfree(volume_info); 2752 kfree(volume_info);
2679 *pvolume_info = NULL; 2753 *pvolume_info = NULL;
@@ -2710,11 +2784,65 @@ build_unc_path_to_root(const struct smb_vol *volume_info,
2710 full_path[unc_len + cifs_sb->prepathlen] = 0; /* add trailing null */ 2784 full_path[unc_len + cifs_sb->prepathlen] = 0; /* add trailing null */
2711 return full_path; 2785 return full_path;
2712} 2786}
2787
2788/*
2789 * Perform a dfs referral query for a share and (optionally) prefix
2790 *
2791 * If a referral is found, cifs_sb->mountdata will be (re-)allocated
2792 * to a string containing updated options for the submount. Otherwise it
2793 * will be left untouched.
2794 *
2795 * Returns the rc from get_dfs_path to the caller, which can be used to
2796 * determine whether there were referrals.
2797 */
2798static int
2799expand_dfs_referral(int xid, struct cifsSesInfo *pSesInfo,
2800 struct smb_vol *volume_info, struct cifs_sb_info *cifs_sb,
2801 int check_prefix)
2802{
2803 int rc;
2804 unsigned int num_referrals = 0;
2805 struct dfs_info3_param *referrals = NULL;
2806 char *full_path = NULL, *ref_path = NULL, *mdata = NULL;
2807
2808 full_path = build_unc_path_to_root(volume_info, cifs_sb);
2809 if (IS_ERR(full_path))
2810 return PTR_ERR(full_path);
2811
2812 /* For DFS paths, skip the first '\' of the UNC */
2813 ref_path = check_prefix ? full_path + 1 : volume_info->UNC + 1;
2814
2815 rc = get_dfs_path(xid, pSesInfo , ref_path, cifs_sb->local_nls,
2816 &num_referrals, &referrals,
2817 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
2818
2819 if (!rc && num_referrals > 0) {
2820 char *fake_devname = NULL;
2821
2822 mdata = cifs_compose_mount_options(cifs_sb->mountdata,
2823 full_path + 1, referrals,
2824 &fake_devname);
2825
2826 free_dfs_info_array(referrals, num_referrals);
2827 kfree(fake_devname);
2828
2829 if (cifs_sb->mountdata != NULL)
2830 kfree(cifs_sb->mountdata);
2831
2832 if (IS_ERR(mdata)) {
2833 rc = PTR_ERR(mdata);
2834 mdata = NULL;
2835 }
2836 cifs_sb->mountdata = mdata;
2837 }
2838 kfree(full_path);
2839 return rc;
2840}
2713#endif 2841#endif
2714 2842
2715int 2843int
2716cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 2844cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2717 char *mount_data_global, const char *devname) 2845 const char *devname)
2718{ 2846{
2719 int rc; 2847 int rc;
2720 int xid; 2848 int xid;
@@ -2723,13 +2851,20 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2723 struct cifsTconInfo *tcon; 2851 struct cifsTconInfo *tcon;
2724 struct TCP_Server_Info *srvTcp; 2852 struct TCP_Server_Info *srvTcp;
2725 char *full_path; 2853 char *full_path;
2726 char *mount_data = mount_data_global;
2727 struct tcon_link *tlink; 2854 struct tcon_link *tlink;
2728#ifdef CONFIG_CIFS_DFS_UPCALL 2855#ifdef CONFIG_CIFS_DFS_UPCALL
2729 struct dfs_info3_param *referrals = NULL;
2730 unsigned int num_referrals = 0;
2731 int referral_walks_count = 0; 2856 int referral_walks_count = 0;
2732try_mount_again: 2857try_mount_again:
2858 /* cleanup activities if we're chasing a referral */
2859 if (referral_walks_count) {
2860 if (tcon)
2861 cifs_put_tcon(tcon);
2862 else if (pSesInfo)
2863 cifs_put_smb_ses(pSesInfo);
2864
2865 cleanup_volume_info(&volume_info);
2866 FreeXid(xid);
2867 }
2733#endif 2868#endif
2734 rc = 0; 2869 rc = 0;
2735 tcon = NULL; 2870 tcon = NULL;
@@ -2746,7 +2881,8 @@ try_mount_again:
2746 goto out; 2881 goto out;
2747 } 2882 }
2748 2883
2749 if (cifs_parse_mount_options(mount_data, devname, volume_info)) { 2884 if (cifs_parse_mount_options(cifs_sb->mountdata, devname,
2885 volume_info)) {
2750 rc = -EINVAL; 2886 rc = -EINVAL;
2751 goto out; 2887 goto out;
2752 } 2888 }
@@ -2842,6 +2978,24 @@ try_mount_again:
2842 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE)); 2978 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE));
2843 2979
2844remote_path_check: 2980remote_path_check:
2981#ifdef CONFIG_CIFS_DFS_UPCALL
2982 /*
2983 * Perform an unconditional check for whether there are DFS
2984 * referrals for this path without prefix, to provide support
2985 * for DFS referrals from w2k8 servers which don't seem to respond
2986 * with PATH_NOT_COVERED to requests that include the prefix.
2987 * Chase the referral if found, otherwise continue normally.
2988 */
2989 if (referral_walks_count == 0) {
2990 int refrc = expand_dfs_referral(xid, pSesInfo, volume_info,
2991 cifs_sb, false);
2992 if (!refrc) {
2993 referral_walks_count++;
2994 goto try_mount_again;
2995 }
2996 }
2997#endif
2998
2845 /* check if a whole path (including prepath) is not remote */ 2999 /* check if a whole path (including prepath) is not remote */
2846 if (!rc && tcon) { 3000 if (!rc && tcon) {
2847 /* build_path_to_root works only when we have a valid tcon */ 3001 /* build_path_to_root works only when we have a valid tcon */
@@ -2875,46 +3029,15 @@ remote_path_check:
2875 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0) 3029 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0)
2876 convert_delimiter(cifs_sb->prepath, 3030 convert_delimiter(cifs_sb->prepath,
2877 CIFS_DIR_SEP(cifs_sb)); 3031 CIFS_DIR_SEP(cifs_sb));
2878 full_path = build_unc_path_to_root(volume_info, cifs_sb);
2879 if (IS_ERR(full_path)) {
2880 rc = PTR_ERR(full_path);
2881 goto mount_fail_check;
2882 }
2883
2884 cFYI(1, "Getting referral for: %s", full_path);
2885 rc = get_dfs_path(xid, pSesInfo , full_path + 1,
2886 cifs_sb->local_nls, &num_referrals, &referrals,
2887 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
2888 if (!rc && num_referrals > 0) {
2889 char *fake_devname = NULL;
2890
2891 if (mount_data != mount_data_global)
2892 kfree(mount_data);
2893 3032
2894 mount_data = cifs_compose_mount_options( 3033 rc = expand_dfs_referral(xid, pSesInfo, volume_info, cifs_sb,
2895 cifs_sb->mountdata, full_path + 1, 3034 true);
2896 referrals, &fake_devname);
2897 3035
2898 free_dfs_info_array(referrals, num_referrals); 3036 if (!rc) {
2899 kfree(fake_devname);
2900 kfree(full_path);
2901
2902 if (IS_ERR(mount_data)) {
2903 rc = PTR_ERR(mount_data);
2904 mount_data = NULL;
2905 goto mount_fail_check;
2906 }
2907
2908 if (tcon)
2909 cifs_put_tcon(tcon);
2910 else if (pSesInfo)
2911 cifs_put_smb_ses(pSesInfo);
2912
2913 cleanup_volume_info(&volume_info);
2914 referral_walks_count++; 3037 referral_walks_count++;
2915 FreeXid(xid);
2916 goto try_mount_again; 3038 goto try_mount_again;
2917 } 3039 }
3040 goto mount_fail_check;
2918#else /* No DFS support, return error on mount */ 3041#else /* No DFS support, return error on mount */
2919 rc = -EOPNOTSUPP; 3042 rc = -EOPNOTSUPP;
2920#endif 3043#endif
@@ -2947,8 +3070,6 @@ remote_path_check:
2947mount_fail_check: 3070mount_fail_check:
2948 /* on error free sesinfo and tcon struct if needed */ 3071 /* on error free sesinfo and tcon struct if needed */
2949 if (rc) { 3072 if (rc) {
2950 if (mount_data != mount_data_global)
2951 kfree(mount_data);
2952 /* If find_unc succeeded then rc == 0 so we can not end */ 3073 /* If find_unc succeeded then rc == 0 so we can not end */
2953 /* up accidentally freeing someone elses tcon struct */ 3074 /* up accidentally freeing someone elses tcon struct */
2954 if (tcon) 3075 if (tcon)
@@ -3064,7 +3185,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3064 bcc_ptr += strlen("?????"); 3185 bcc_ptr += strlen("?????");
3065 bcc_ptr += 1; 3186 bcc_ptr += 1;
3066 count = bcc_ptr - &pSMB->Password[0]; 3187 count = bcc_ptr - &pSMB->Password[0];
3067 pSMB->hdr.smb_buf_length += count; 3188 pSMB->hdr.smb_buf_length = cpu_to_be32(be32_to_cpu(
3189 pSMB->hdr.smb_buf_length) + count);
3068 pSMB->ByteCount = cpu_to_le16(count); 3190 pSMB->ByteCount = cpu_to_le16(count);
3069 3191
3070 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length, 3192 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length,
@@ -3239,7 +3361,9 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid)
3239 struct cifsSesInfo *ses; 3361 struct cifsSesInfo *ses;
3240 struct cifsTconInfo *tcon = NULL; 3362 struct cifsTconInfo *tcon = NULL;
3241 struct smb_vol *vol_info; 3363 struct smb_vol *vol_info;
3242 char username[MAX_USERNAME_SIZE + 1]; 3364 char username[28]; /* big enough for "krb50x" + hex of ULONG_MAX 6+16 */
3365 /* We used to have this as MAX_USERNAME which is */
3366 /* way too big now (256 instead of 32) */
3243 3367
3244 vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL); 3368 vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL);
3245 if (vol_info == NULL) { 3369 if (vol_info == NULL) {
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 993f82045bf6..55d87ac52000 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -45,7 +45,7 @@
45#include "cifs_debug.h" 45#include "cifs_debug.h"
46#include "cifsfs.h" 46#include "cifsfs.h"
47 47
48#ifdef CONFIG_CIFS_EXPERIMENTAL 48#ifdef CIFS_NFSD_EXPORT
49static struct dentry *cifs_get_parent(struct dentry *dentry) 49static struct dentry *cifs_get_parent(struct dentry *dentry)
50{ 50{
51 /* BB need to add code here eventually to enable export via NFSD */ 51 /* BB need to add code here eventually to enable export via NFSD */
@@ -63,5 +63,5 @@ const struct export_operations cifs_export_ops = {
63 .encode_fs = */ 63 .encode_fs = */
64}; 64};
65 65
66#endif /* EXPERIMENTAL */ 66#endif /* CIFS_NFSD_EXPORT */
67 67
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index faf59529e847..c672afef0c09 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -857,95 +857,6 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
857 cifsi->server_eof = end_of_write; 857 cifsi->server_eof = end_of_write;
858} 858}
859 859
860ssize_t cifs_user_write(struct file *file, const char __user *write_data,
861 size_t write_size, loff_t *poffset)
862{
863 struct inode *inode = file->f_path.dentry->d_inode;
864 int rc = 0;
865 unsigned int bytes_written = 0;
866 unsigned int total_written;
867 struct cifs_sb_info *cifs_sb;
868 struct cifsTconInfo *pTcon;
869 int xid;
870 struct cifsFileInfo *open_file;
871 struct cifsInodeInfo *cifsi = CIFS_I(inode);
872
873 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
874
875 /* cFYI(1, " write %d bytes to offset %lld of %s", write_size,
876 *poffset, file->f_path.dentry->d_name.name); */
877
878 if (file->private_data == NULL)
879 return -EBADF;
880
881 open_file = file->private_data;
882 pTcon = tlink_tcon(open_file->tlink);
883
884 rc = generic_write_checks(file, poffset, &write_size, 0);
885 if (rc)
886 return rc;
887
888 xid = GetXid();
889
890 for (total_written = 0; write_size > total_written;
891 total_written += bytes_written) {
892 rc = -EAGAIN;
893 while (rc == -EAGAIN) {
894 if (file->private_data == NULL) {
895 /* file has been closed on us */
896 FreeXid(xid);
897 /* if we have gotten here we have written some data
898 and blocked, and the file has been freed on us while
899 we blocked so return what we managed to write */
900 return total_written;
901 }
902 if (open_file->invalidHandle) {
903 /* we could deadlock if we called
904 filemap_fdatawait from here so tell
905 reopen_file not to flush data to server
906 now */
907 rc = cifs_reopen_file(open_file, false);
908 if (rc != 0)
909 break;
910 }
911
912 rc = CIFSSMBWrite(xid, pTcon,
913 open_file->netfid,
914 min_t(const int, cifs_sb->wsize,
915 write_size - total_written),
916 *poffset, &bytes_written,
917 NULL, write_data + total_written, 0);
918 }
919 if (rc || (bytes_written == 0)) {
920 if (total_written)
921 break;
922 else {
923 FreeXid(xid);
924 return rc;
925 }
926 } else {
927 cifs_update_eof(cifsi, *poffset, bytes_written);
928 *poffset += bytes_written;
929 }
930 }
931
932 cifs_stats_bytes_written(pTcon, total_written);
933
934/* Do not update local mtime - server will set its actual value on write
935 * inode->i_ctime = inode->i_mtime =
936 * current_fs_time(inode->i_sb);*/
937 if (total_written > 0) {
938 spin_lock(&inode->i_lock);
939 if (*poffset > inode->i_size)
940 i_size_write(inode, *poffset);
941 spin_unlock(&inode->i_lock);
942 }
943 mark_inode_dirty_sync(inode);
944
945 FreeXid(xid);
946 return total_written;
947}
948
949static ssize_t cifs_write(struct cifsFileInfo *open_file, 860static ssize_t cifs_write(struct cifsFileInfo *open_file,
950 const char *write_data, size_t write_size, 861 const char *write_data, size_t write_size,
951 loff_t *poffset) 862 loff_t *poffset)
@@ -1420,9 +1331,10 @@ retry_write:
1420 return rc; 1331 return rc;
1421} 1332}
1422 1333
1423static int cifs_writepage(struct page *page, struct writeback_control *wbc) 1334static int
1335cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1424{ 1336{
1425 int rc = -EFAULT; 1337 int rc;
1426 int xid; 1338 int xid;
1427 1339
1428 xid = GetXid(); 1340 xid = GetXid();
@@ -1442,15 +1354,29 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1442 * to fail to update with the state of the page correctly. 1354 * to fail to update with the state of the page correctly.
1443 */ 1355 */
1444 set_page_writeback(page); 1356 set_page_writeback(page);
1357retry_write:
1445 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE); 1358 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1446 SetPageUptodate(page); /* BB add check for error and Clearuptodate? */ 1359 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1447 unlock_page(page); 1360 goto retry_write;
1361 else if (rc == -EAGAIN)
1362 redirty_page_for_writepage(wbc, page);
1363 else if (rc != 0)
1364 SetPageError(page);
1365 else
1366 SetPageUptodate(page);
1448 end_page_writeback(page); 1367 end_page_writeback(page);
1449 page_cache_release(page); 1368 page_cache_release(page);
1450 FreeXid(xid); 1369 FreeXid(xid);
1451 return rc; 1370 return rc;
1452} 1371}
1453 1372
1373static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1374{
1375 int rc = cifs_writepage_locked(page, wbc);
1376 unlock_page(page);
1377 return rc;
1378}
1379
1454static int cifs_write_end(struct file *file, struct address_space *mapping, 1380static int cifs_write_end(struct file *file, struct address_space *mapping,
1455 loff_t pos, unsigned len, unsigned copied, 1381 loff_t pos, unsigned len, unsigned copied,
1456 struct page *page, void *fsdata) 1382 struct page *page, void *fsdata)
@@ -1519,8 +1445,13 @@ int cifs_strict_fsync(struct file *file, int datasync)
1519 cFYI(1, "Sync file - name: %s datasync: 0x%x", 1445 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1520 file->f_path.dentry->d_name.name, datasync); 1446 file->f_path.dentry->d_name.name, datasync);
1521 1447
1522 if (!CIFS_I(inode)->clientCanCacheRead) 1448 if (!CIFS_I(inode)->clientCanCacheRead) {
1523 cifs_invalidate_mapping(inode); 1449 rc = cifs_invalidate_mapping(inode);
1450 if (rc) {
1451 cFYI(1, "rc: %d during invalidate phase", rc);
1452 rc = 0; /* don't care about it in fsync */
1453 }
1454 }
1524 1455
1525 tcon = tlink_tcon(smbfile->tlink); 1456 tcon = tlink_tcon(smbfile->tlink);
1526 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) 1457 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
@@ -1726,7 +1657,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
1726 return total_written; 1657 return total_written;
1727} 1658}
1728 1659
1729static ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, 1660ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
1730 unsigned long nr_segs, loff_t pos) 1661 unsigned long nr_segs, loff_t pos)
1731{ 1662{
1732 ssize_t written; 1663 ssize_t written;
@@ -1849,17 +1780,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
1849 return total_read; 1780 return total_read;
1850} 1781}
1851 1782
1852ssize_t cifs_user_read(struct file *file, char __user *read_data, 1783ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
1853 size_t read_size, loff_t *poffset)
1854{
1855 struct iovec iov;
1856 iov.iov_base = read_data;
1857 iov.iov_len = read_size;
1858
1859 return cifs_iovec_read(file, &iov, 1, poffset);
1860}
1861
1862static ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
1863 unsigned long nr_segs, loff_t pos) 1784 unsigned long nr_segs, loff_t pos)
1864{ 1785{
1865 ssize_t read; 1786 ssize_t read;
@@ -1987,8 +1908,11 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
1987 1908
1988 xid = GetXid(); 1909 xid = GetXid();
1989 1910
1990 if (!CIFS_I(inode)->clientCanCacheRead) 1911 if (!CIFS_I(inode)->clientCanCacheRead) {
1991 cifs_invalidate_mapping(inode); 1912 rc = cifs_invalidate_mapping(inode);
1913 if (rc)
1914 return rc;
1915 }
1992 1916
1993 rc = generic_file_mmap(file, vma); 1917 rc = generic_file_mmap(file, vma);
1994 if (rc == 0) 1918 if (rc == 0)
@@ -2415,6 +2339,27 @@ static void cifs_invalidate_page(struct page *page, unsigned long offset)
2415 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode); 2339 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
2416} 2340}
2417 2341
2342static int cifs_launder_page(struct page *page)
2343{
2344 int rc = 0;
2345 loff_t range_start = page_offset(page);
2346 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
2347 struct writeback_control wbc = {
2348 .sync_mode = WB_SYNC_ALL,
2349 .nr_to_write = 0,
2350 .range_start = range_start,
2351 .range_end = range_end,
2352 };
2353
2354 cFYI(1, "Launder page: %p", page);
2355
2356 if (clear_page_dirty_for_io(page))
2357 rc = cifs_writepage_locked(page, &wbc);
2358
2359 cifs_fscache_invalidate_page(page, page->mapping->host);
2360 return rc;
2361}
2362
2418void cifs_oplock_break(struct work_struct *work) 2363void cifs_oplock_break(struct work_struct *work)
2419{ 2364{
2420 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 2365 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
@@ -2486,7 +2431,7 @@ const struct address_space_operations cifs_addr_ops = {
2486 .set_page_dirty = __set_page_dirty_nobuffers, 2431 .set_page_dirty = __set_page_dirty_nobuffers,
2487 .releasepage = cifs_release_page, 2432 .releasepage = cifs_release_page,
2488 .invalidatepage = cifs_invalidate_page, 2433 .invalidatepage = cifs_invalidate_page,
2489 /* .direct_IO = */ 2434 .launder_page = cifs_launder_page,
2490}; 2435};
2491 2436
2492/* 2437/*
@@ -2503,5 +2448,5 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
2503 .set_page_dirty = __set_page_dirty_nobuffers, 2448 .set_page_dirty = __set_page_dirty_nobuffers,
2504 .releasepage = cifs_release_page, 2449 .releasepage = cifs_release_page,
2505 .invalidatepage = cifs_invalidate_page, 2450 .invalidatepage = cifs_invalidate_page,
2506 /* .direct_IO = */ 2451 .launder_page = cifs_launder_page,
2507}; 2452};
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 8852470b4fbb..de02ed5e25c2 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -878,7 +878,7 @@ retry_iget5_locked:
878} 878}
879 879
880/* gets root inode */ 880/* gets root inode */
881struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) 881struct inode *cifs_root_iget(struct super_block *sb)
882{ 882{
883 int xid; 883 int xid;
884 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 884 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -1683,71 +1683,70 @@ cifs_inode_needs_reval(struct inode *inode)
1683/* 1683/*
1684 * Zap the cache. Called when invalid_mapping flag is set. 1684 * Zap the cache. Called when invalid_mapping flag is set.
1685 */ 1685 */
1686void 1686int
1687cifs_invalidate_mapping(struct inode *inode) 1687cifs_invalidate_mapping(struct inode *inode)
1688{ 1688{
1689 int rc; 1689 int rc = 0;
1690 struct cifsInodeInfo *cifs_i = CIFS_I(inode); 1690 struct cifsInodeInfo *cifs_i = CIFS_I(inode);
1691 1691
1692 cifs_i->invalid_mapping = false; 1692 cifs_i->invalid_mapping = false;
1693 1693
1694 /* write back any cached data */
1695 if (inode->i_mapping && inode->i_mapping->nrpages != 0) { 1694 if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
1696 rc = filemap_write_and_wait(inode->i_mapping); 1695 rc = invalidate_inode_pages2(inode->i_mapping);
1697 mapping_set_error(inode->i_mapping, rc); 1696 if (rc) {
1697 cERROR(1, "%s: could not invalidate inode %p", __func__,
1698 inode);
1699 cifs_i->invalid_mapping = true;
1700 }
1698 } 1701 }
1699 invalidate_remote_inode(inode); 1702
1700 cifs_fscache_reset_inode_cookie(inode); 1703 cifs_fscache_reset_inode_cookie(inode);
1704 return rc;
1701} 1705}
1702 1706
1703int cifs_revalidate_file(struct file *filp) 1707int cifs_revalidate_file_attr(struct file *filp)
1704{ 1708{
1705 int rc = 0; 1709 int rc = 0;
1706 struct inode *inode = filp->f_path.dentry->d_inode; 1710 struct inode *inode = filp->f_path.dentry->d_inode;
1707 struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data; 1711 struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
1708 1712
1709 if (!cifs_inode_needs_reval(inode)) 1713 if (!cifs_inode_needs_reval(inode))
1710 goto check_inval; 1714 return rc;
1711 1715
1712 if (tlink_tcon(cfile->tlink)->unix_ext) 1716 if (tlink_tcon(cfile->tlink)->unix_ext)
1713 rc = cifs_get_file_info_unix(filp); 1717 rc = cifs_get_file_info_unix(filp);
1714 else 1718 else
1715 rc = cifs_get_file_info(filp); 1719 rc = cifs_get_file_info(filp);
1716 1720
1717check_inval:
1718 if (CIFS_I(inode)->invalid_mapping)
1719 cifs_invalidate_mapping(inode);
1720
1721 return rc; 1721 return rc;
1722} 1722}
1723 1723
1724/* revalidate a dentry's inode attributes */ 1724int cifs_revalidate_dentry_attr(struct dentry *dentry)
1725int cifs_revalidate_dentry(struct dentry *dentry)
1726{ 1725{
1727 int xid; 1726 int xid;
1728 int rc = 0; 1727 int rc = 0;
1729 char *full_path = NULL;
1730 struct inode *inode = dentry->d_inode; 1728 struct inode *inode = dentry->d_inode;
1731 struct super_block *sb = dentry->d_sb; 1729 struct super_block *sb = dentry->d_sb;
1730 char *full_path = NULL;
1732 1731
1733 if (inode == NULL) 1732 if (inode == NULL)
1734 return -ENOENT; 1733 return -ENOENT;
1735 1734
1736 xid = GetXid();
1737
1738 if (!cifs_inode_needs_reval(inode)) 1735 if (!cifs_inode_needs_reval(inode))
1739 goto check_inval; 1736 return rc;
1737
1738 xid = GetXid();
1740 1739
1741 /* can not safely grab the rename sem here if rename calls revalidate 1740 /* can not safely grab the rename sem here if rename calls revalidate
1742 since that would deadlock */ 1741 since that would deadlock */
1743 full_path = build_path_from_dentry(dentry); 1742 full_path = build_path_from_dentry(dentry);
1744 if (full_path == NULL) { 1743 if (full_path == NULL) {
1745 rc = -ENOMEM; 1744 rc = -ENOMEM;
1746 goto check_inval; 1745 goto out;
1747 } 1746 }
1748 1747
1749 cFYI(1, "Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld " 1748 cFYI(1, "Update attributes: %s inode 0x%p count %d dentry: 0x%p d_time "
1750 "jiffies %ld", full_path, inode, inode->i_count.counter, 1749 "%ld jiffies %ld", full_path, inode, inode->i_count.counter,
1751 dentry, dentry->d_time, jiffies); 1750 dentry, dentry->d_time, jiffies);
1752 1751
1753 if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext) 1752 if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)
@@ -1756,41 +1755,83 @@ int cifs_revalidate_dentry(struct dentry *dentry)
1756 rc = cifs_get_inode_info(&inode, full_path, NULL, sb, 1755 rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
1757 xid, NULL); 1756 xid, NULL);
1758 1757
1759check_inval: 1758out:
1760 if (CIFS_I(inode)->invalid_mapping)
1761 cifs_invalidate_mapping(inode);
1762
1763 kfree(full_path); 1759 kfree(full_path);
1764 FreeXid(xid); 1760 FreeXid(xid);
1765 return rc; 1761 return rc;
1766} 1762}
1767 1763
1764int cifs_revalidate_file(struct file *filp)
1765{
1766 int rc;
1767 struct inode *inode = filp->f_path.dentry->d_inode;
1768
1769 rc = cifs_revalidate_file_attr(filp);
1770 if (rc)
1771 return rc;
1772
1773 if (CIFS_I(inode)->invalid_mapping)
1774 rc = cifs_invalidate_mapping(inode);
1775 return rc;
1776}
1777
1778/* revalidate a dentry's inode attributes */
1779int cifs_revalidate_dentry(struct dentry *dentry)
1780{
1781 int rc;
1782 struct inode *inode = dentry->d_inode;
1783
1784 rc = cifs_revalidate_dentry_attr(dentry);
1785 if (rc)
1786 return rc;
1787
1788 if (CIFS_I(inode)->invalid_mapping)
1789 rc = cifs_invalidate_mapping(inode);
1790 return rc;
1791}
1792
1768int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry, 1793int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1769 struct kstat *stat) 1794 struct kstat *stat)
1770{ 1795{
1771 struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb); 1796 struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
1772 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 1797 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
1773 int err = cifs_revalidate_dentry(dentry); 1798 struct inode *inode = dentry->d_inode;
1774 1799 int rc;
1775 if (!err) {
1776 generic_fillattr(dentry->d_inode, stat);
1777 stat->blksize = CIFS_MAX_MSGSIZE;
1778 stat->ino = CIFS_I(dentry->d_inode)->uniqueid;
1779 1800
1780 /* 1801 /*
1781 * If on a multiuser mount without unix extensions, and the 1802 * We need to be sure that all dirty pages are written and the server
1782 * admin hasn't overridden them, set the ownership to the 1803 * has actual ctime, mtime and file length.
1783 * fsuid/fsgid of the current process. 1804 */
1784 */ 1805 if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
1785 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) && 1806 inode->i_mapping->nrpages != 0) {
1786 !tcon->unix_ext) { 1807 rc = filemap_fdatawait(inode->i_mapping);
1787 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) 1808 if (rc) {
1788 stat->uid = current_fsuid(); 1809 mapping_set_error(inode->i_mapping, rc);
1789 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) 1810 return rc;
1790 stat->gid = current_fsgid();
1791 } 1811 }
1792 } 1812 }
1793 return err; 1813
1814 rc = cifs_revalidate_dentry_attr(dentry);
1815 if (rc)
1816 return rc;
1817
1818 generic_fillattr(inode, stat);
1819 stat->blksize = CIFS_MAX_MSGSIZE;
1820 stat->ino = CIFS_I(inode)->uniqueid;
1821
1822 /*
1823 * If on a multiuser mount without unix extensions, and the admin hasn't
1824 * overridden them, set the ownership to the fsuid/fsgid of the current
1825 * process.
1826 */
1827 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) &&
1828 !tcon->unix_ext) {
1829 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID))
1830 stat->uid = current_fsuid();
1831 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID))
1832 stat->gid = current_fsgid();
1833 }
1834 return rc;
1794} 1835}
1795 1836
1796static int cifs_truncate_page(struct address_space *mapping, loff_t from) 1837static int cifs_truncate_page(struct address_space *mapping, loff_t from)
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 0c684ae4c071..907531ac5888 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -304,12 +304,10 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
304 304
305 memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */ 305 memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */
306 306
307 buffer->smb_buf_length = 307 buffer->smb_buf_length = cpu_to_be32(
308 (2 * word_count) + sizeof(struct smb_hdr) - 308 (2 * word_count) + sizeof(struct smb_hdr) -
309 4 /* RFC 1001 length field does not count */ + 309 4 /* RFC 1001 length field does not count */ +
310 2 /* for bcc field itself */ ; 310 2 /* for bcc field itself */) ;
311 /* Note that this is the only network field that has to be converted
312 to big endian and it is done just before we send it */
313 311
314 buffer->Protocol[0] = 0xFF; 312 buffer->Protocol[0] = 0xFF;
315 buffer->Protocol[1] = 'S'; 313 buffer->Protocol[1] = 'S';
@@ -424,7 +422,7 @@ check_smb_hdr(struct smb_hdr *smb, __u16 mid)
424int 422int
425checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) 423checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
426{ 424{
427 __u32 len = smb->smb_buf_length; 425 __u32 len = be32_to_cpu(smb->smb_buf_length);
428 __u32 clc_len; /* calculated length */ 426 __u32 clc_len; /* calculated length */
429 cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len); 427 cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len);
430 428
@@ -464,7 +462,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
464 462
465 if (check_smb_hdr(smb, mid)) 463 if (check_smb_hdr(smb, mid))
466 return 1; 464 return 1;
467 clc_len = smbCalcSize_LE(smb); 465 clc_len = smbCalcSize(smb);
468 466
469 if (4 + len != length) { 467 if (4 + len != length) {
470 cERROR(1, "Length read does not match RFC1001 length %d", 468 cERROR(1, "Length read does not match RFC1001 length %d",
@@ -521,7 +519,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
521 (struct smb_com_transaction_change_notify_rsp *)buf; 519 (struct smb_com_transaction_change_notify_rsp *)buf;
522 struct file_notify_information *pnotify; 520 struct file_notify_information *pnotify;
523 __u32 data_offset = 0; 521 __u32 data_offset = 0;
524 if (get_bcc_le(buf) > sizeof(struct file_notify_information)) { 522 if (get_bcc(buf) > sizeof(struct file_notify_information)) {
525 data_offset = le32_to_cpu(pSMBr->DataOffset); 523 data_offset = le32_to_cpu(pSMBr->DataOffset);
526 524
527 pnotify = (struct file_notify_information *) 525 pnotify = (struct file_notify_information *)
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 79f641eeda30..79b71c2c7c9d 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -919,13 +919,6 @@ smbCalcSize(struct smb_hdr *ptr)
919 2 /* size of the bcc field */ + get_bcc(ptr)); 919 2 /* size of the bcc field */ + get_bcc(ptr));
920} 920}
921 921
922unsigned int
923smbCalcSize_LE(struct smb_hdr *ptr)
924{
925 return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) +
926 2 /* size of the bcc field */ + get_bcc_le(ptr));
927}
928
929/* The following are taken from fs/ntfs/util.c */ 922/* The following are taken from fs/ntfs/util.c */
930 923
931#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000) 924#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index f6728eb6f4b9..7dd462100378 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -276,7 +276,7 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
276} 276}
277 277
278static void 278static void
279decode_unicode_ssetup(char **pbcc_area, __u16 bleft, struct cifsSesInfo *ses, 279decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
280 const struct nls_table *nls_cp) 280 const struct nls_table *nls_cp)
281{ 281{
282 int len; 282 int len;
@@ -284,19 +284,6 @@ decode_unicode_ssetup(char **pbcc_area, __u16 bleft, struct cifsSesInfo *ses,
284 284
285 cFYI(1, "bleft %d", bleft); 285 cFYI(1, "bleft %d", bleft);
286 286
287 /*
288 * Windows servers do not always double null terminate their final
289 * Unicode string. Check to see if there are an uneven number of bytes
290 * left. If so, then add an extra NULL pad byte to the end of the
291 * response.
292 *
293 * See section 2.7.2 in "Implementing CIFS" for details
294 */
295 if (bleft % 2) {
296 data[bleft] = 0;
297 ++bleft;
298 }
299
300 kfree(ses->serverOS); 287 kfree(ses->serverOS);
301 ses->serverOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp); 288 ses->serverOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp);
302 cFYI(1, "serverOS=%s", ses->serverOS); 289 cFYI(1, "serverOS=%s", ses->serverOS);
@@ -634,7 +621,7 @@ ssetup_ntlmssp_authenticate:
634 and rest of bcc area. This allows us to avoid 621 and rest of bcc area. This allows us to avoid
635 a large buffer 17K allocation */ 622 a large buffer 17K allocation */
636 iov[0].iov_base = (char *)pSMB; 623 iov[0].iov_base = (char *)pSMB;
637 iov[0].iov_len = smb_buf->smb_buf_length + 4; 624 iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4;
638 625
639 /* setting this here allows the code at the end of the function 626 /* setting this here allows the code at the end of the function
640 to free the request buffer if there's an error */ 627 to free the request buffer if there's an error */
@@ -669,7 +656,7 @@ ssetup_ntlmssp_authenticate:
669 * to use challenge/response method (i.e. Password bit is 1). 656 * to use challenge/response method (i.e. Password bit is 1).
670 */ 657 */
671 658
672 calc_lanman_hash(ses->password, ses->server->cryptkey, 659 rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
673 ses->server->secMode & SECMODE_PW_ENCRYPT ? 660 ses->server->secMode & SECMODE_PW_ENCRYPT ?
674 true : false, lnm_session_key); 661 true : false, lnm_session_key);
675 662
@@ -872,9 +859,10 @@ ssetup_ntlmssp_authenticate:
872 iov[2].iov_len = (long) bcc_ptr - (long) str_area; 859 iov[2].iov_len = (long) bcc_ptr - (long) str_area;
873 860
874 count = iov[1].iov_len + iov[2].iov_len; 861 count = iov[1].iov_len + iov[2].iov_len;
875 smb_buf->smb_buf_length += count; 862 smb_buf->smb_buf_length =
863 cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count);
876 864
877 put_bcc_le(count, smb_buf); 865 put_bcc(count, smb_buf);
878 866
879 rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type, 867 rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type,
880 CIFS_LOG_ERROR); 868 CIFS_LOG_ERROR);
@@ -929,7 +917,9 @@ ssetup_ntlmssp_authenticate:
929 } 917 }
930 918
931 /* BB check if Unicode and decode strings */ 919 /* BB check if Unicode and decode strings */
932 if (smb_buf->Flags2 & SMBFLG2_UNICODE) { 920 if (bytes_remaining == 0) {
921 /* no string area to decode, do nothing */
922 } else if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
933 /* unicode string area must be word-aligned */ 923 /* unicode string area must be word-aligned */
934 if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) { 924 if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
935 ++bcc_ptr; 925 ++bcc_ptr;
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
deleted file mode 100644
index 04721485925d..000000000000
--- a/fs/cifs/smbdes.c
+++ /dev/null
@@ -1,418 +0,0 @@
1/*
2 Unix SMB/Netbios implementation.
3 Version 1.9.
4
5 a partial implementation of DES designed for use in the
6 SMB authentication protocol
7
8 Copyright (C) Andrew Tridgell 1998
9 Modified by Steve French (sfrench@us.ibm.com) 2002,2004
10
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24*/
25
26/* NOTES:
27
28 This code makes no attempt to be fast! In fact, it is a very
29 slow implementation
30
31 This code is NOT a complete DES implementation. It implements only
32 the minimum necessary for SMB authentication, as used by all SMB
33 products (including every copy of Microsoft Windows95 ever sold)
34
35 In particular, it can only do a unchained forward DES pass. This
36 means it is not possible to use this code for encryption/decryption
37 of data, instead it is only useful as a "hash" algorithm.
38
39 There is no entry point into this code that allows normal DES operation.
40
41 I believe this means that this code does not come under ITAR
42 regulations but this is NOT a legal opinion. If you are concerned
43 about the applicability of ITAR regulations to this code then you
44 should confirm it for yourself (and maybe let me know if you come
45 up with a different answer to the one above)
46*/
47#include <linux/slab.h>
48#define uchar unsigned char
49
50static uchar perm1[56] = { 57, 49, 41, 33, 25, 17, 9,
51 1, 58, 50, 42, 34, 26, 18,
52 10, 2, 59, 51, 43, 35, 27,
53 19, 11, 3, 60, 52, 44, 36,
54 63, 55, 47, 39, 31, 23, 15,
55 7, 62, 54, 46, 38, 30, 22,
56 14, 6, 61, 53, 45, 37, 29,
57 21, 13, 5, 28, 20, 12, 4
58};
59
60static uchar perm2[48] = { 14, 17, 11, 24, 1, 5,
61 3, 28, 15, 6, 21, 10,
62 23, 19, 12, 4, 26, 8,
63 16, 7, 27, 20, 13, 2,
64 41, 52, 31, 37, 47, 55,
65 30, 40, 51, 45, 33, 48,
66 44, 49, 39, 56, 34, 53,
67 46, 42, 50, 36, 29, 32
68};
69
70static uchar perm3[64] = { 58, 50, 42, 34, 26, 18, 10, 2,
71 60, 52, 44, 36, 28, 20, 12, 4,
72 62, 54, 46, 38, 30, 22, 14, 6,
73 64, 56, 48, 40, 32, 24, 16, 8,
74 57, 49, 41, 33, 25, 17, 9, 1,
75 59, 51, 43, 35, 27, 19, 11, 3,
76 61, 53, 45, 37, 29, 21, 13, 5,
77 63, 55, 47, 39, 31, 23, 15, 7
78};
79
80static uchar perm4[48] = { 32, 1, 2, 3, 4, 5,
81 4, 5, 6, 7, 8, 9,
82 8, 9, 10, 11, 12, 13,
83 12, 13, 14, 15, 16, 17,
84 16, 17, 18, 19, 20, 21,
85 20, 21, 22, 23, 24, 25,
86 24, 25, 26, 27, 28, 29,
87 28, 29, 30, 31, 32, 1
88};
89
90static uchar perm5[32] = { 16, 7, 20, 21,
91 29, 12, 28, 17,
92 1, 15, 23, 26,
93 5, 18, 31, 10,
94 2, 8, 24, 14,
95 32, 27, 3, 9,
96 19, 13, 30, 6,
97 22, 11, 4, 25
98};
99
100static uchar perm6[64] = { 40, 8, 48, 16, 56, 24, 64, 32,
101 39, 7, 47, 15, 55, 23, 63, 31,
102 38, 6, 46, 14, 54, 22, 62, 30,
103 37, 5, 45, 13, 53, 21, 61, 29,
104 36, 4, 44, 12, 52, 20, 60, 28,
105 35, 3, 43, 11, 51, 19, 59, 27,
106 34, 2, 42, 10, 50, 18, 58, 26,
107 33, 1, 41, 9, 49, 17, 57, 25
108};
109
110static uchar sc[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
111
112static uchar sbox[8][4][16] = {
113 {{14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7},
114 {0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8},
115 {4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0},
116 {15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13} },
117
118 {{15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10},
119 {3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5},
120 {0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15},
121 {13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9} },
122
123 {{10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8},
124 {13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1},
125 {13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7},
126 {1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12} },
127
128 {{7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15},
129 {13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9},
130 {10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4},
131 {3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14} },
132
133 {{2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9},
134 {14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6},
135 {4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14},
136 {11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3} },
137
138 {{12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11},
139 {10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8},
140 {9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6},
141 {4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13} },
142
143 {{4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1},
144 {13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6},
145 {1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2},
146 {6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12} },
147
148 {{13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7},
149 {1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2},
150 {7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8},
151 {2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11} }
152};
153
154static void
155permute(char *out, char *in, uchar *p, int n)
156{
157 int i;
158 for (i = 0; i < n; i++)
159 out[i] = in[p[i] - 1];
160}
161
162static void
163lshift(char *d, int count, int n)
164{
165 char out[64];
166 int i;
167 for (i = 0; i < n; i++)
168 out[i] = d[(i + count) % n];
169 for (i = 0; i < n; i++)
170 d[i] = out[i];
171}
172
173static void
174concat(char *out, char *in1, char *in2, int l1, int l2)
175{
176 while (l1--)
177 *out++ = *in1++;
178 while (l2--)
179 *out++ = *in2++;
180}
181
182static void
183xor(char *out, char *in1, char *in2, int n)
184{
185 int i;
186 for (i = 0; i < n; i++)
187 out[i] = in1[i] ^ in2[i];
188}
189
190static void
191dohash(char *out, char *in, char *key, int forw)
192{
193 int i, j, k;
194 char *pk1;
195 char c[28];
196 char d[28];
197 char *cd;
198 char (*ki)[48];
199 char *pd1;
200 char l[32], r[32];
201 char *rl;
202
203 /* Have to reduce stack usage */
204 pk1 = kmalloc(56+56+64+64, GFP_KERNEL);
205 if (pk1 == NULL)
206 return;
207
208 ki = kmalloc(16*48, GFP_KERNEL);
209 if (ki == NULL) {
210 kfree(pk1);
211 return;
212 }
213
214 cd = pk1 + 56;
215 pd1 = cd + 56;
216 rl = pd1 + 64;
217
218 permute(pk1, key, perm1, 56);
219
220 for (i = 0; i < 28; i++)
221 c[i] = pk1[i];
222 for (i = 0; i < 28; i++)
223 d[i] = pk1[i + 28];
224
225 for (i = 0; i < 16; i++) {
226 lshift(c, sc[i], 28);
227 lshift(d, sc[i], 28);
228
229 concat(cd, c, d, 28, 28);
230 permute(ki[i], cd, perm2, 48);
231 }
232
233 permute(pd1, in, perm3, 64);
234
235 for (j = 0; j < 32; j++) {
236 l[j] = pd1[j];
237 r[j] = pd1[j + 32];
238 }
239
240 for (i = 0; i < 16; i++) {
241 char *er; /* er[48] */
242 char *erk; /* erk[48] */
243 char b[8][6];
244 char *cb; /* cb[32] */
245 char *pcb; /* pcb[32] */
246 char *r2; /* r2[32] */
247
248 er = kmalloc(48+48+32+32+32, GFP_KERNEL);
249 if (er == NULL) {
250 kfree(pk1);
251 kfree(ki);
252 return;
253 }
254 erk = er+48;
255 cb = erk+48;
256 pcb = cb+32;
257 r2 = pcb+32;
258
259 permute(er, r, perm4, 48);
260
261 xor(erk, er, ki[forw ? i : 15 - i], 48);
262
263 for (j = 0; j < 8; j++)
264 for (k = 0; k < 6; k++)
265 b[j][k] = erk[j * 6 + k];
266
267 for (j = 0; j < 8; j++) {
268 int m, n;
269 m = (b[j][0] << 1) | b[j][5];
270
271 n = (b[j][1] << 3) | (b[j][2] << 2) | (b[j][3] <<
272 1) | b[j][4];
273
274 for (k = 0; k < 4; k++)
275 b[j][k] =
276 (sbox[j][m][n] & (1 << (3 - k))) ? 1 : 0;
277 }
278
279 for (j = 0; j < 8; j++)
280 for (k = 0; k < 4; k++)
281 cb[j * 4 + k] = b[j][k];
282 permute(pcb, cb, perm5, 32);
283
284 xor(r2, l, pcb, 32);
285
286 for (j = 0; j < 32; j++)
287 l[j] = r[j];
288
289 for (j = 0; j < 32; j++)
290 r[j] = r2[j];
291
292 kfree(er);
293 }
294
295 concat(rl, r, l, 32, 32);
296
297 permute(out, rl, perm6, 64);
298 kfree(pk1);
299 kfree(ki);
300}
301
302static void
303str_to_key(unsigned char *str, unsigned char *key)
304{
305 int i;
306
307 key[0] = str[0] >> 1;
308 key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
309 key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
310 key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
311 key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
312 key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
313 key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
314 key[7] = str[6] & 0x7F;
315 for (i = 0; i < 8; i++)
316 key[i] = (key[i] << 1);
317}
318
319static void
320smbhash(unsigned char *out, const unsigned char *in, unsigned char *key,
321 int forw)
322{
323 int i;
324 char *outb; /* outb[64] */
325 char *inb; /* inb[64] */
326 char *keyb; /* keyb[64] */
327 unsigned char key2[8];
328
329 outb = kmalloc(64 * 3, GFP_KERNEL);
330 if (outb == NULL)
331 return;
332
333 inb = outb + 64;
334 keyb = inb + 64;
335
336 str_to_key(key, key2);
337
338 for (i = 0; i < 64; i++) {
339 inb[i] = (in[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
340 keyb[i] = (key2[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
341 outb[i] = 0;
342 }
343
344 dohash(outb, inb, keyb, forw);
345
346 for (i = 0; i < 8; i++)
347 out[i] = 0;
348
349 for (i = 0; i < 64; i++) {
350 if (outb[i])
351 out[i / 8] |= (1 << (7 - (i % 8)));
352 }
353 kfree(outb);
354}
355
356void
357E_P16(unsigned char *p14, unsigned char *p16)
358{
359 unsigned char sp8[8] =
360 { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
361 smbhash(p16, sp8, p14, 1);
362 smbhash(p16 + 8, sp8, p14 + 7, 1);
363}
364
365void
366E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
367{
368 smbhash(p24, c8, p21, 1);
369 smbhash(p24 + 8, c8, p21 + 7, 1);
370 smbhash(p24 + 16, c8, p21 + 14, 1);
371}
372
373#if 0 /* currently unused */
374static void
375D_P16(unsigned char *p14, unsigned char *in, unsigned char *out)
376{
377 smbhash(out, in, p14, 0);
378 smbhash(out + 8, in + 8, p14 + 7, 0);
379}
380
381static void
382E_old_pw_hash(unsigned char *p14, unsigned char *in, unsigned char *out)
383{
384 smbhash(out, in, p14, 1);
385 smbhash(out + 8, in + 8, p14 + 7, 1);
386}
387/* these routines are currently unneeded, but may be
388 needed later */
389void
390cred_hash1(unsigned char *out, unsigned char *in, unsigned char *key)
391{
392 unsigned char buf[8];
393
394 smbhash(buf, in, key, 1);
395 smbhash(out, buf, key + 9, 1);
396}
397
398void
399cred_hash2(unsigned char *out, unsigned char *in, unsigned char *key)
400{
401 unsigned char buf[8];
402 static unsigned char key2[8];
403
404 smbhash(buf, in, key, 1);
405 key2[0] = key[7];
406 smbhash(out, buf, key2, 1);
407}
408
409void
410cred_hash3(unsigned char *out, unsigned char *in, unsigned char *key, int forw)
411{
412 static unsigned char key2[8];
413
414 smbhash(out, in, key, forw);
415 key2[0] = key[7];
416 smbhash(out + 8, in + 8, key2, forw);
417}
418#endif /* unneeded routines */
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index b5041c849981..1525d5e662b6 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -47,6 +47,88 @@
47#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8) 47#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8)
48#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val))) 48#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val)))
49 49
50static void
51str_to_key(unsigned char *str, unsigned char *key)
52{
53 int i;
54
55 key[0] = str[0] >> 1;
56 key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
57 key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
58 key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
59 key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
60 key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
61 key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
62 key[7] = str[6] & 0x7F;
63 for (i = 0; i < 8; i++)
64 key[i] = (key[i] << 1);
65}
66
67static int
68smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
69{
70 int rc;
71 unsigned char key2[8];
72 struct crypto_blkcipher *tfm_des;
73 struct scatterlist sgin, sgout;
74 struct blkcipher_desc desc;
75
76 str_to_key(key, key2);
77
78 tfm_des = crypto_alloc_blkcipher("ecb(des)", 0, CRYPTO_ALG_ASYNC);
79 if (IS_ERR(tfm_des)) {
80 rc = PTR_ERR(tfm_des);
81 cERROR(1, "could not allocate des crypto API\n");
82 goto smbhash_err;
83 }
84
85 desc.tfm = tfm_des;
86
87 crypto_blkcipher_setkey(tfm_des, key2, 8);
88
89 sg_init_one(&sgin, in, 8);
90 sg_init_one(&sgout, out, 8);
91
92 rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8);
93 if (rc) {
94 cERROR(1, "could not encrypt crypt key rc: %d\n", rc);
95 crypto_free_blkcipher(tfm_des);
96 goto smbhash_err;
97 }
98
99smbhash_err:
100 return rc;
101}
102
103static int
104E_P16(unsigned char *p14, unsigned char *p16)
105{
106 int rc;
107 unsigned char sp8[8] =
108 { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
109
110 rc = smbhash(p16, sp8, p14);
111 if (rc)
112 return rc;
113 rc = smbhash(p16 + 8, sp8, p14 + 7);
114 return rc;
115}
116
117static int
118E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
119{
120 int rc;
121
122 rc = smbhash(p24, c8, p21);
123 if (rc)
124 return rc;
125 rc = smbhash(p24 + 8, c8, p21 + 7);
126 if (rc)
127 return rc;
128 rc = smbhash(p24 + 16, c8, p21 + 14);
129 return rc;
130}
131
50/* produce a md4 message digest from data of length n bytes */ 132/* produce a md4 message digest from data of length n bytes */
51int 133int
52mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len) 134mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len)
@@ -87,40 +169,30 @@ mdfour_err:
87 return rc; 169 return rc;
88} 170}
89 171
90/* Does the des encryption from the NT or LM MD4 hash. */
91static void
92SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
93 unsigned char p24[24])
94{
95 unsigned char p21[21];
96
97 memset(p21, '\0', 21);
98
99 memcpy(p21, passwd, 16);
100 E_P24(p21, c8, p24);
101}
102
103/* 172/*
104 This implements the X/Open SMB password encryption 173 This implements the X/Open SMB password encryption
105 It takes a password, a 8 byte "crypt key" and puts 24 bytes of 174 It takes a password, a 8 byte "crypt key" and puts 24 bytes of
106 encrypted password into p24 */ 175 encrypted password into p24 */
107/* Note that password must be uppercased and null terminated */ 176/* Note that password must be uppercased and null terminated */
108void 177int
109SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24) 178SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24)
110{ 179{
111 unsigned char p14[15], p21[21]; 180 int rc;
181 unsigned char p14[14], p16[16], p21[21];
112 182
113 memset(p21, '\0', 21);
114 memset(p14, '\0', 14); 183 memset(p14, '\0', 14);
115 strncpy((char *) p14, (char *) passwd, 14); 184 memset(p16, '\0', 16);
185 memset(p21, '\0', 21);
116 186
117/* strupper((char *)p14); *//* BB at least uppercase the easy range */ 187 memcpy(p14, passwd, 14);
118 E_P16(p14, p21); 188 rc = E_P16(p14, p16);
189 if (rc)
190 return rc;
119 191
120 SMBOWFencrypt(p21, c8, p24); 192 memcpy(p21, p16, 16);
193 rc = E_P24(p21, c8, p24);
121 194
122 memset(p14, 0, 15); 195 return rc;
123 memset(p21, 0, 21);
124} 196}
125 197
126/* Routines for Windows NT MD4 Hash functions. */ 198/* Routines for Windows NT MD4 Hash functions. */
@@ -279,16 +351,18 @@ int
279SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) 351SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24)
280{ 352{
281 int rc; 353 int rc;
282 unsigned char p21[21]; 354 unsigned char p16[16], p21[21];
283 355
356 memset(p16, '\0', 16);
284 memset(p21, '\0', 21); 357 memset(p21, '\0', 21);
285 358
286 rc = E_md4hash(passwd, p21); 359 rc = E_md4hash(passwd, p16);
287 if (rc) { 360 if (rc) {
288 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc); 361 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
289 return rc; 362 return rc;
290 } 363 }
291 SMBOWFencrypt(p21, c8, p24); 364 memcpy(p21, p16, 16);
365 rc = E_P24(p21, c8, p24);
292 return rc; 366 return rc;
293} 367}
294 368
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 46d8756f2b24..f2513fb8c391 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -129,7 +129,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
129 unsigned int len = iov[0].iov_len; 129 unsigned int len = iov[0].iov_len;
130 unsigned int total_len; 130 unsigned int total_len;
131 int first_vec = 0; 131 int first_vec = 0;
132 unsigned int smb_buf_length = smb_buffer->smb_buf_length; 132 unsigned int smb_buf_length = be32_to_cpu(smb_buffer->smb_buf_length);
133 struct socket *ssocket = server->ssocket; 133 struct socket *ssocket = server->ssocket;
134 134
135 if (ssocket == NULL) 135 if (ssocket == NULL)
@@ -144,17 +144,10 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
144 else 144 else
145 smb_msg.msg_flags = MSG_NOSIGNAL; 145 smb_msg.msg_flags = MSG_NOSIGNAL;
146 146
147 /* smb header is converted in header_assemble. bcc and rest of SMB word
148 area, and byte area if necessary, is converted to littleendian in
149 cifssmb.c and RFC1001 len is converted to bigendian in smb_send
150 Flags2 is converted in SendReceive */
151
152
153 total_len = 0; 147 total_len = 0;
154 for (i = 0; i < n_vec; i++) 148 for (i = 0; i < n_vec; i++)
155 total_len += iov[i].iov_len; 149 total_len += iov[i].iov_len;
156 150
157 smb_buffer->smb_buf_length = cpu_to_be32(smb_buffer->smb_buf_length);
158 cFYI(1, "Sending smb: total_len %d", total_len); 151 cFYI(1, "Sending smb: total_len %d", total_len);
159 dump_smb(smb_buffer, len); 152 dump_smb(smb_buffer, len);
160 153
@@ -243,7 +236,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
243 236
244 /* Don't want to modify the buffer as a 237 /* Don't want to modify the buffer as a
245 side effect of this call. */ 238 side effect of this call. */
246 smb_buffer->smb_buf_length = smb_buf_length; 239 smb_buffer->smb_buf_length = cpu_to_be32(smb_buf_length);
247 240
248 return rc; 241 return rc;
249} 242}
@@ -387,7 +380,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
387#ifdef CONFIG_CIFS_STATS2 380#ifdef CONFIG_CIFS_STATS2
388 atomic_inc(&server->inSend); 381 atomic_inc(&server->inSend);
389#endif 382#endif
390 rc = smb_send(server, in_buf, in_buf->smb_buf_length); 383 rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
391#ifdef CONFIG_CIFS_STATS2 384#ifdef CONFIG_CIFS_STATS2
392 atomic_dec(&server->inSend); 385 atomic_dec(&server->inSend);
393 mid->when_sent = jiffies; 386 mid->when_sent = jiffies;
@@ -422,7 +415,7 @@ SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
422 int resp_buf_type; 415 int resp_buf_type;
423 416
424 iov[0].iov_base = (char *)in_buf; 417 iov[0].iov_base = (char *)in_buf;
425 iov[0].iov_len = in_buf->smb_buf_length + 4; 418 iov[0].iov_len = be32_to_cpu(in_buf->smb_buf_length) + 4;
426 flags |= CIFS_NO_RESP; 419 flags |= CIFS_NO_RESP;
427 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags); 420 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags);
428 cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc); 421 cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc);
@@ -488,10 +481,10 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
488 int rc = 0; 481 int rc = 0;
489 482
490 /* -4 for RFC1001 length and +2 for BCC field */ 483 /* -4 for RFC1001 length and +2 for BCC field */
491 in_buf->smb_buf_length = sizeof(struct smb_hdr) - 4 + 2; 484 in_buf->smb_buf_length = cpu_to_be32(sizeof(struct smb_hdr) - 4 + 2);
492 in_buf->Command = SMB_COM_NT_CANCEL; 485 in_buf->Command = SMB_COM_NT_CANCEL;
493 in_buf->WordCount = 0; 486 in_buf->WordCount = 0;
494 put_bcc_le(0, in_buf); 487 put_bcc(0, in_buf);
495 488
496 mutex_lock(&server->srv_mutex); 489 mutex_lock(&server->srv_mutex);
497 rc = cifs_sign_smb(in_buf, server, &mid->sequence_number); 490 rc = cifs_sign_smb(in_buf, server, &mid->sequence_number);
@@ -499,7 +492,7 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
499 mutex_unlock(&server->srv_mutex); 492 mutex_unlock(&server->srv_mutex);
500 return rc; 493 return rc;
501 } 494 }
502 rc = smb_send(server, in_buf, in_buf->smb_buf_length); 495 rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
503 mutex_unlock(&server->srv_mutex); 496 mutex_unlock(&server->srv_mutex);
504 497
505 cFYI(1, "issued NT_CANCEL for mid %u, rc = %d", 498 cFYI(1, "issued NT_CANCEL for mid %u, rc = %d",
@@ -612,7 +605,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
612 return rc; 605 return rc;
613 } 606 }
614 607
615 receive_len = midQ->resp_buf->smb_buf_length; 608 receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
616 609
617 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 610 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
618 cERROR(1, "Frame too large received. Length: %d Xid: %d", 611 cERROR(1, "Frame too large received. Length: %d Xid: %d",
@@ -651,11 +644,6 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
651 rc = map_smb_to_linux_error(midQ->resp_buf, 644 rc = map_smb_to_linux_error(midQ->resp_buf,
652 flags & CIFS_LOG_ERROR); 645 flags & CIFS_LOG_ERROR);
653 646
654 /* convert ByteCount if necessary */
655 if (receive_len >= sizeof(struct smb_hdr) - 4
656 /* do not count RFC1001 header */ +
657 (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
658 put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
659 if ((flags & CIFS_NO_RESP) == 0) 647 if ((flags & CIFS_NO_RESP) == 0)
660 midQ->resp_buf = NULL; /* mark it so buf will 648 midQ->resp_buf = NULL; /* mark it so buf will
661 not be freed by 649 not be freed by
@@ -698,9 +686,10 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
698 to the same server. We may make this configurable later or 686 to the same server. We may make this configurable later or
699 use ses->maxReq */ 687 use ses->maxReq */
700 688
701 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 689 if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
690 MAX_CIFS_HDR_SIZE - 4) {
702 cERROR(1, "Illegal length, greater than maximum frame, %d", 691 cERROR(1, "Illegal length, greater than maximum frame, %d",
703 in_buf->smb_buf_length); 692 be32_to_cpu(in_buf->smb_buf_length));
704 return -EIO; 693 return -EIO;
705 } 694 }
706 695
@@ -733,7 +722,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
733#ifdef CONFIG_CIFS_STATS2 722#ifdef CONFIG_CIFS_STATS2
734 atomic_inc(&ses->server->inSend); 723 atomic_inc(&ses->server->inSend);
735#endif 724#endif
736 rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length); 725 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
737#ifdef CONFIG_CIFS_STATS2 726#ifdef CONFIG_CIFS_STATS2
738 atomic_dec(&ses->server->inSend); 727 atomic_dec(&ses->server->inSend);
739 midQ->when_sent = jiffies; 728 midQ->when_sent = jiffies;
@@ -768,7 +757,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
768 return rc; 757 return rc;
769 } 758 }
770 759
771 receive_len = midQ->resp_buf->smb_buf_length; 760 receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
772 761
773 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 762 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
774 cERROR(1, "Frame too large received. Length: %d Xid: %d", 763 cERROR(1, "Frame too large received. Length: %d Xid: %d",
@@ -781,7 +770,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
781 770
782 if (midQ->resp_buf && out_buf 771 if (midQ->resp_buf && out_buf
783 && (midQ->midState == MID_RESPONSE_RECEIVED)) { 772 && (midQ->midState == MID_RESPONSE_RECEIVED)) {
784 out_buf->smb_buf_length = receive_len; 773 out_buf->smb_buf_length = cpu_to_be32(receive_len);
785 memcpy((char *)out_buf + 4, 774 memcpy((char *)out_buf + 4,
786 (char *)midQ->resp_buf + 4, 775 (char *)midQ->resp_buf + 4,
787 receive_len); 776 receive_len);
@@ -800,16 +789,10 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
800 } 789 }
801 } 790 }
802 791
803 *pbytes_returned = out_buf->smb_buf_length; 792 *pbytes_returned = be32_to_cpu(out_buf->smb_buf_length);
804 793
805 /* BB special case reconnect tid and uid here? */ 794 /* BB special case reconnect tid and uid here? */
806 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ ); 795 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
807
808 /* convert ByteCount if necessary */
809 if (receive_len >= sizeof(struct smb_hdr) - 4
810 /* do not count RFC1001 header */ +
811 (2 * out_buf->WordCount) + 2 /* bcc */ )
812 put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
813 } else { 796 } else {
814 rc = -EIO; 797 rc = -EIO;
815 cERROR(1, "Bad MID state?"); 798 cERROR(1, "Bad MID state?");
@@ -877,9 +860,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
877 to the same server. We may make this configurable later or 860 to the same server. We may make this configurable later or
878 use ses->maxReq */ 861 use ses->maxReq */
879 862
880 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 863 if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
864 MAX_CIFS_HDR_SIZE - 4) {
881 cERROR(1, "Illegal length, greater than maximum frame, %d", 865 cERROR(1, "Illegal length, greater than maximum frame, %d",
882 in_buf->smb_buf_length); 866 be32_to_cpu(in_buf->smb_buf_length));
883 return -EIO; 867 return -EIO;
884 } 868 }
885 869
@@ -910,7 +894,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
910#ifdef CONFIG_CIFS_STATS2 894#ifdef CONFIG_CIFS_STATS2
911 atomic_inc(&ses->server->inSend); 895 atomic_inc(&ses->server->inSend);
912#endif 896#endif
913 rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length); 897 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
914#ifdef CONFIG_CIFS_STATS2 898#ifdef CONFIG_CIFS_STATS2
915 atomic_dec(&ses->server->inSend); 899 atomic_dec(&ses->server->inSend);
916 midQ->when_sent = jiffies; 900 midQ->when_sent = jiffies;
@@ -977,7 +961,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
977 if (rc != 0) 961 if (rc != 0)
978 return rc; 962 return rc;
979 963
980 receive_len = midQ->resp_buf->smb_buf_length; 964 receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
981 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 965 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
982 cERROR(1, "Frame too large received. Length: %d Xid: %d", 966 cERROR(1, "Frame too large received. Length: %d Xid: %d",
983 receive_len, xid); 967 receive_len, xid);
@@ -993,7 +977,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
993 goto out; 977 goto out;
994 } 978 }
995 979
996 out_buf->smb_buf_length = receive_len; 980 out_buf->smb_buf_length = cpu_to_be32(receive_len);
997 memcpy((char *)out_buf + 4, 981 memcpy((char *)out_buf + 4,
998 (char *)midQ->resp_buf + 4, 982 (char *)midQ->resp_buf + 4,
999 receive_len); 983 receive_len);
@@ -1012,17 +996,11 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
1012 } 996 }
1013 } 997 }
1014 998
1015 *pbytes_returned = out_buf->smb_buf_length; 999 *pbytes_returned = be32_to_cpu(out_buf->smb_buf_length);
1016 1000
1017 /* BB special case reconnect tid and uid here? */ 1001 /* BB special case reconnect tid and uid here? */
1018 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ ); 1002 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
1019 1003
1020 /* convert ByteCount if necessary */
1021 if (receive_len >= sizeof(struct smb_hdr) - 4
1022 /* do not count RFC1001 header */ +
1023 (2 * out_buf->WordCount) + 2 /* bcc */ )
1024 put_bcc(get_bcc_le(out_buf), out_buf);
1025
1026out: 1004out:
1027 delete_mid(midQ); 1005 delete_mid(midQ);
1028 if (rstart && rc == -EACCES) 1006 if (rstart && rc == -EACCES)
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index eae2a1491608..912995e013ec 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -112,6 +112,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
112 struct cifsTconInfo *pTcon; 112 struct cifsTconInfo *pTcon;
113 struct super_block *sb; 113 struct super_block *sb;
114 char *full_path; 114 char *full_path;
115 struct cifs_ntsd *pacl;
115 116
116 if (direntry == NULL) 117 if (direntry == NULL)
117 return -EIO; 118 return -EIO;
@@ -166,6 +167,25 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
166 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, 167 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
167 (__u16)value_size, cifs_sb->local_nls, 168 (__u16)value_size, cifs_sb->local_nls,
168 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 169 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
170 } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
171 strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
172 pacl = kmalloc(value_size, GFP_KERNEL);
173 if (!pacl) {
174 cFYI(1, "%s: Can't allocate memory for ACL",
175 __func__);
176 rc = -ENOMEM;
177 } else {
178#ifdef CONFIG_CIFS_ACL
179 memcpy(pacl, ea_value, value_size);
180 rc = set_cifs_acl(pacl, value_size,
181 direntry->d_inode, full_path);
182 if (rc == 0) /* force revalidate of the inode */
183 CIFS_I(direntry->d_inode)->time = 0;
184 kfree(pacl);
185#else
186 cFYI(1, "Set CIFS ACL not supported yet");
187#endif /* CONFIG_CIFS_ACL */
188 }
169 } else { 189 } else {
170 int temp; 190 int temp;
171 temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS, 191 temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
diff --git a/fs/compat.c b/fs/compat.c
index 72fe6cda9108..0ea00832de23 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1306,241 +1306,6 @@ compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int
1306 return do_sys_open(dfd, filename, flags, mode); 1306 return do_sys_open(dfd, filename, flags, mode);
1307} 1307}
1308 1308
1309/*
1310 * compat_count() counts the number of arguments/envelopes. It is basically
1311 * a copy of count() from fs/exec.c, except that it works with 32 bit argv
1312 * and envp pointers.
1313 */
1314static int compat_count(compat_uptr_t __user *argv, int max)
1315{
1316 int i = 0;
1317
1318 if (argv != NULL) {
1319 for (;;) {
1320 compat_uptr_t p;
1321
1322 if (get_user(p, argv))
1323 return -EFAULT;
1324 if (!p)
1325 break;
1326 argv++;
1327 if (i++ >= max)
1328 return -E2BIG;
1329
1330 if (fatal_signal_pending(current))
1331 return -ERESTARTNOHAND;
1332 cond_resched();
1333 }
1334 }
1335 return i;
1336}
1337
1338/*
1339 * compat_copy_strings() is basically a copy of copy_strings() from fs/exec.c
1340 * except that it works with 32 bit argv and envp pointers.
1341 */
1342static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
1343 struct linux_binprm *bprm)
1344{
1345 struct page *kmapped_page = NULL;
1346 char *kaddr = NULL;
1347 unsigned long kpos = 0;
1348 int ret;
1349
1350 while (argc-- > 0) {
1351 compat_uptr_t str;
1352 int len;
1353 unsigned long pos;
1354
1355 if (get_user(str, argv+argc) ||
1356 !(len = strnlen_user(compat_ptr(str), MAX_ARG_STRLEN))) {
1357 ret = -EFAULT;
1358 goto out;
1359 }
1360
1361 if (len > MAX_ARG_STRLEN) {
1362 ret = -E2BIG;
1363 goto out;
1364 }
1365
1366 /* We're going to work our way backwords. */
1367 pos = bprm->p;
1368 str += len;
1369 bprm->p -= len;
1370
1371 while (len > 0) {
1372 int offset, bytes_to_copy;
1373
1374 if (fatal_signal_pending(current)) {
1375 ret = -ERESTARTNOHAND;
1376 goto out;
1377 }
1378 cond_resched();
1379
1380 offset = pos % PAGE_SIZE;
1381 if (offset == 0)
1382 offset = PAGE_SIZE;
1383
1384 bytes_to_copy = offset;
1385 if (bytes_to_copy > len)
1386 bytes_to_copy = len;
1387
1388 offset -= bytes_to_copy;
1389 pos -= bytes_to_copy;
1390 str -= bytes_to_copy;
1391 len -= bytes_to_copy;
1392
1393 if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
1394 struct page *page;
1395
1396 page = get_arg_page(bprm, pos, 1);
1397 if (!page) {
1398 ret = -E2BIG;
1399 goto out;
1400 }
1401
1402 if (kmapped_page) {
1403 flush_kernel_dcache_page(kmapped_page);
1404 kunmap(kmapped_page);
1405 put_page(kmapped_page);
1406 }
1407 kmapped_page = page;
1408 kaddr = kmap(kmapped_page);
1409 kpos = pos & PAGE_MASK;
1410 flush_cache_page(bprm->vma, kpos,
1411 page_to_pfn(kmapped_page));
1412 }
1413 if (copy_from_user(kaddr+offset, compat_ptr(str),
1414 bytes_to_copy)) {
1415 ret = -EFAULT;
1416 goto out;
1417 }
1418 }
1419 }
1420 ret = 0;
1421out:
1422 if (kmapped_page) {
1423 flush_kernel_dcache_page(kmapped_page);
1424 kunmap(kmapped_page);
1425 put_page(kmapped_page);
1426 }
1427 return ret;
1428}
1429
1430/*
1431 * compat_do_execve() is mostly a copy of do_execve(), with the exception
1432 * that it processes 32 bit argv and envp pointers.
1433 */
1434int compat_do_execve(char * filename,
1435 compat_uptr_t __user *argv,
1436 compat_uptr_t __user *envp,
1437 struct pt_regs * regs)
1438{
1439 struct linux_binprm *bprm;
1440 struct file *file;
1441 struct files_struct *displaced;
1442 bool clear_in_exec;
1443 int retval;
1444
1445 retval = unshare_files(&displaced);
1446 if (retval)
1447 goto out_ret;
1448
1449 retval = -ENOMEM;
1450 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1451 if (!bprm)
1452 goto out_files;
1453
1454 retval = prepare_bprm_creds(bprm);
1455 if (retval)
1456 goto out_free;
1457
1458 retval = check_unsafe_exec(bprm);
1459 if (retval < 0)
1460 goto out_free;
1461 clear_in_exec = retval;
1462 current->in_execve = 1;
1463
1464 file = open_exec(filename);
1465 retval = PTR_ERR(file);
1466 if (IS_ERR(file))
1467 goto out_unmark;
1468
1469 sched_exec();
1470
1471 bprm->file = file;
1472 bprm->filename = filename;
1473 bprm->interp = filename;
1474
1475 retval = bprm_mm_init(bprm);
1476 if (retval)
1477 goto out_file;
1478
1479 bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
1480 if ((retval = bprm->argc) < 0)
1481 goto out;
1482
1483 bprm->envc = compat_count(envp, MAX_ARG_STRINGS);
1484 if ((retval = bprm->envc) < 0)
1485 goto out;
1486
1487 retval = prepare_binprm(bprm);
1488 if (retval < 0)
1489 goto out;
1490
1491 retval = copy_strings_kernel(1, &bprm->filename, bprm);
1492 if (retval < 0)
1493 goto out;
1494
1495 bprm->exec = bprm->p;
1496 retval = compat_copy_strings(bprm->envc, envp, bprm);
1497 if (retval < 0)
1498 goto out;
1499
1500 retval = compat_copy_strings(bprm->argc, argv, bprm);
1501 if (retval < 0)
1502 goto out;
1503
1504 retval = search_binary_handler(bprm, regs);
1505 if (retval < 0)
1506 goto out;
1507
1508 /* execve succeeded */
1509 current->fs->in_exec = 0;
1510 current->in_execve = 0;
1511 acct_update_integrals(current);
1512 free_bprm(bprm);
1513 if (displaced)
1514 put_files_struct(displaced);
1515 return retval;
1516
1517out:
1518 if (bprm->mm) {
1519 acct_arg_size(bprm, 0);
1520 mmput(bprm->mm);
1521 }
1522
1523out_file:
1524 if (bprm->file) {
1525 allow_write_access(bprm->file);
1526 fput(bprm->file);
1527 }
1528
1529out_unmark:
1530 if (clear_in_exec)
1531 current->fs->in_exec = 0;
1532 current->in_execve = 0;
1533
1534out_free:
1535 free_bprm(bprm);
1536
1537out_files:
1538 if (displaced)
1539 reset_files_struct(displaced);
1540out_ret:
1541 return retval;
1542}
1543
1544#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) 1309#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
1545 1310
1546static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, 1311static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 9908c20bb1a5..9d17d350abc5 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -53,11 +53,14 @@ DEFINE_SPINLOCK(configfs_dirent_lock);
53static void configfs_d_iput(struct dentry * dentry, 53static void configfs_d_iput(struct dentry * dentry,
54 struct inode * inode) 54 struct inode * inode)
55{ 55{
56 struct configfs_dirent * sd = dentry->d_fsdata; 56 struct configfs_dirent *sd = dentry->d_fsdata;
57 57
58 if (sd) { 58 if (sd) {
59 BUG_ON(sd->s_dentry != dentry); 59 BUG_ON(sd->s_dentry != dentry);
60 /* Coordinate with configfs_readdir */
61 spin_lock(&configfs_dirent_lock);
60 sd->s_dentry = NULL; 62 sd->s_dentry = NULL;
63 spin_unlock(&configfs_dirent_lock);
61 configfs_put(sd); 64 configfs_put(sd);
62 } 65 }
63 iput(inode); 66 iput(inode);
@@ -689,7 +692,8 @@ static int create_default_group(struct config_group *parent_group,
689 sd = child->d_fsdata; 692 sd = child->d_fsdata;
690 sd->s_type |= CONFIGFS_USET_DEFAULT; 693 sd->s_type |= CONFIGFS_USET_DEFAULT;
691 } else { 694 } else {
692 d_delete(child); 695 BUG_ON(child->d_inode);
696 d_drop(child);
693 dput(child); 697 dput(child);
694 } 698 }
695 } 699 }
@@ -1547,7 +1551,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
1547 struct configfs_dirent * parent_sd = dentry->d_fsdata; 1551 struct configfs_dirent * parent_sd = dentry->d_fsdata;
1548 struct configfs_dirent *cursor = filp->private_data; 1552 struct configfs_dirent *cursor = filp->private_data;
1549 struct list_head *p, *q = &cursor->s_sibling; 1553 struct list_head *p, *q = &cursor->s_sibling;
1550 ino_t ino; 1554 ino_t ino = 0;
1551 int i = filp->f_pos; 1555 int i = filp->f_pos;
1552 1556
1553 switch (i) { 1557 switch (i) {
@@ -1575,6 +1579,7 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
1575 struct configfs_dirent *next; 1579 struct configfs_dirent *next;
1576 const char * name; 1580 const char * name;
1577 int len; 1581 int len;
1582 struct inode *inode = NULL;
1578 1583
1579 next = list_entry(p, struct configfs_dirent, 1584 next = list_entry(p, struct configfs_dirent,
1580 s_sibling); 1585 s_sibling);
@@ -1583,9 +1588,28 @@ static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir
1583 1588
1584 name = configfs_get_name(next); 1589 name = configfs_get_name(next);
1585 len = strlen(name); 1590 len = strlen(name);
1586 if (next->s_dentry) 1591
1587 ino = next->s_dentry->d_inode->i_ino; 1592 /*
1588 else 1593 * We'll have a dentry and an inode for
1594 * PINNED items and for open attribute
1595 * files. We lock here to prevent a race
1596 * with configfs_d_iput() clearing
1597 * s_dentry before calling iput().
1598 *
1599 * Why do we go to the trouble? If
1600 * someone has an attribute file open,
1601 * the inode number should match until
1602 * they close it. Beyond that, we don't
1603 * care.
1604 */
1605 spin_lock(&configfs_dirent_lock);
1606 dentry = next->s_dentry;
1607 if (dentry)
1608 inode = dentry->d_inode;
1609 if (inode)
1610 ino = inode->i_ino;
1611 spin_unlock(&configfs_dirent_lock);
1612 if (!inode)
1589 ino = iunique(configfs_sb, 2); 1613 ino = iunique(configfs_sb, 2);
1590 1614
1591 if (filldir(dirent, name, len, filp->f_pos, ino, 1615 if (filldir(dirent, name, len, filp->f_pos, ino,
@@ -1685,7 +1709,8 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1685 err = configfs_attach_group(sd->s_element, &group->cg_item, 1709 err = configfs_attach_group(sd->s_element, &group->cg_item,
1686 dentry); 1710 dentry);
1687 if (err) { 1711 if (err) {
1688 d_delete(dentry); 1712 BUG_ON(dentry->d_inode);
1713 d_drop(dentry);
1689 dput(dentry); 1714 dput(dentry);
1690 } else { 1715 } else {
1691 spin_lock(&configfs_dirent_lock); 1716 spin_lock(&configfs_dirent_lock);
diff --git a/fs/dcache.c b/fs/dcache.c
index 22a0ef41bad1..37f72ee5bf7c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -35,6 +35,7 @@
35#include <linux/hardirq.h> 35#include <linux/hardirq.h>
36#include <linux/bit_spinlock.h> 36#include <linux/bit_spinlock.h>
37#include <linux/rculist_bl.h> 37#include <linux/rculist_bl.h>
38#include <linux/prefetch.h>
38#include "internal.h" 39#include "internal.h"
39 40
40/* 41/*
@@ -1219,7 +1220,7 @@ void shrink_dcache_parent(struct dentry * parent)
1219EXPORT_SYMBOL(shrink_dcache_parent); 1220EXPORT_SYMBOL(shrink_dcache_parent);
1220 1221
1221/* 1222/*
1222 * Scan `nr' dentries and return the number which remain. 1223 * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
1223 * 1224 *
1224 * We need to avoid reentering the filesystem if the caller is performing a 1225 * We need to avoid reentering the filesystem if the caller is performing a
1225 * GFP_NOFS allocation attempt. One example deadlock is: 1226 * GFP_NOFS allocation attempt. One example deadlock is:
@@ -1230,8 +1231,12 @@ EXPORT_SYMBOL(shrink_dcache_parent);
1230 * 1231 *
1231 * In this case we return -1 to tell the caller that we baled. 1232 * In this case we return -1 to tell the caller that we baled.
1232 */ 1233 */
1233static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 1234static int shrink_dcache_memory(struct shrinker *shrink,
1235 struct shrink_control *sc)
1234{ 1236{
1237 int nr = sc->nr_to_scan;
1238 gfp_t gfp_mask = sc->gfp_mask;
1239
1235 if (nr) { 1240 if (nr) {
1236 if (!(gfp_mask & __GFP_FS)) 1241 if (!(gfp_mask & __GFP_FS))
1237 return -1; 1242 return -1;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 89d394d8fe24..90f76575c056 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -428,26 +428,17 @@ static ssize_t write_file_bool(struct file *file, const char __user *user_buf,
428 size_t count, loff_t *ppos) 428 size_t count, loff_t *ppos)
429{ 429{
430 char buf[32]; 430 char buf[32];
431 int buf_size; 431 size_t buf_size;
432 bool bv;
432 u32 *val = file->private_data; 433 u32 *val = file->private_data;
433 434
434 buf_size = min(count, (sizeof(buf)-1)); 435 buf_size = min(count, (sizeof(buf)-1));
435 if (copy_from_user(buf, user_buf, buf_size)) 436 if (copy_from_user(buf, user_buf, buf_size))
436 return -EFAULT; 437 return -EFAULT;
437 438
438 switch (buf[0]) { 439 if (strtobool(buf, &bv) == 0)
439 case 'y': 440 *val = bv;
440 case 'Y': 441
441 case '1':
442 *val = 1;
443 break;
444 case 'n':
445 case 'N':
446 case '0':
447 *val = 0;
448 break;
449 }
450
451 return count; 442 return count;
452} 443}
453 444
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 0d329ff8ed4c..9b026ea8baa9 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -100,6 +100,7 @@ struct dlm_cluster {
100 unsigned int cl_log_debug; 100 unsigned int cl_log_debug;
101 unsigned int cl_protocol; 101 unsigned int cl_protocol;
102 unsigned int cl_timewarn_cs; 102 unsigned int cl_timewarn_cs;
103 unsigned int cl_waitwarn_us;
103}; 104};
104 105
105enum { 106enum {
@@ -114,6 +115,7 @@ enum {
114 CLUSTER_ATTR_LOG_DEBUG, 115 CLUSTER_ATTR_LOG_DEBUG,
115 CLUSTER_ATTR_PROTOCOL, 116 CLUSTER_ATTR_PROTOCOL,
116 CLUSTER_ATTR_TIMEWARN_CS, 117 CLUSTER_ATTR_TIMEWARN_CS,
118 CLUSTER_ATTR_WAITWARN_US,
117}; 119};
118 120
119struct cluster_attribute { 121struct cluster_attribute {
@@ -166,6 +168,7 @@ CLUSTER_ATTR(scan_secs, 1);
166CLUSTER_ATTR(log_debug, 0); 168CLUSTER_ATTR(log_debug, 0);
167CLUSTER_ATTR(protocol, 0); 169CLUSTER_ATTR(protocol, 0);
168CLUSTER_ATTR(timewarn_cs, 1); 170CLUSTER_ATTR(timewarn_cs, 1);
171CLUSTER_ATTR(waitwarn_us, 0);
169 172
170static struct configfs_attribute *cluster_attrs[] = { 173static struct configfs_attribute *cluster_attrs[] = {
171 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, 174 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -179,6 +182,7 @@ static struct configfs_attribute *cluster_attrs[] = {
179 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, 182 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
180 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, 183 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
181 [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, 184 [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
185 [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr,
182 NULL, 186 NULL,
183}; 187};
184 188
@@ -439,6 +443,7 @@ static struct config_group *make_cluster(struct config_group *g,
439 cl->cl_log_debug = dlm_config.ci_log_debug; 443 cl->cl_log_debug = dlm_config.ci_log_debug;
440 cl->cl_protocol = dlm_config.ci_protocol; 444 cl->cl_protocol = dlm_config.ci_protocol;
441 cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; 445 cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
446 cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
442 447
443 space_list = &sps->ss_group; 448 space_list = &sps->ss_group;
444 comm_list = &cms->cs_group; 449 comm_list = &cms->cs_group;
@@ -986,6 +991,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
986#define DEFAULT_LOG_DEBUG 0 991#define DEFAULT_LOG_DEBUG 0
987#define DEFAULT_PROTOCOL 0 992#define DEFAULT_PROTOCOL 0
988#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ 993#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
994#define DEFAULT_WAITWARN_US 0
989 995
990struct dlm_config_info dlm_config = { 996struct dlm_config_info dlm_config = {
991 .ci_tcp_port = DEFAULT_TCP_PORT, 997 .ci_tcp_port = DEFAULT_TCP_PORT,
@@ -998,6 +1004,7 @@ struct dlm_config_info dlm_config = {
998 .ci_scan_secs = DEFAULT_SCAN_SECS, 1004 .ci_scan_secs = DEFAULT_SCAN_SECS,
999 .ci_log_debug = DEFAULT_LOG_DEBUG, 1005 .ci_log_debug = DEFAULT_LOG_DEBUG,
1000 .ci_protocol = DEFAULT_PROTOCOL, 1006 .ci_protocol = DEFAULT_PROTOCOL,
1001 .ci_timewarn_cs = DEFAULT_TIMEWARN_CS 1007 .ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
1008 .ci_waitwarn_us = DEFAULT_WAITWARN_US
1002}; 1009};
1003 1010
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 4f1d6fce58c5..dd0ce24d5a80 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -28,6 +28,7 @@ struct dlm_config_info {
28 int ci_log_debug; 28 int ci_log_debug;
29 int ci_protocol; 29 int ci_protocol;
30 int ci_timewarn_cs; 30 int ci_timewarn_cs;
31 int ci_waitwarn_us;
31}; 32};
32 33
33extern struct dlm_config_info dlm_config; 34extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index b94204913011..0262451eb9c6 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -209,6 +209,7 @@ struct dlm_args {
209#define DLM_IFL_WATCH_TIMEWARN 0x00400000 209#define DLM_IFL_WATCH_TIMEWARN 0x00400000
210#define DLM_IFL_TIMEOUT_CANCEL 0x00800000 210#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
211#define DLM_IFL_DEADLOCK_CANCEL 0x01000000 211#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
212#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */
212#define DLM_IFL_USER 0x00000001 213#define DLM_IFL_USER 0x00000001
213#define DLM_IFL_ORPHAN 0x00000002 214#define DLM_IFL_ORPHAN 0x00000002
214 215
@@ -245,6 +246,7 @@ struct dlm_lkb {
245 246
246 int8_t lkb_wait_type; /* type of reply waiting for */ 247 int8_t lkb_wait_type; /* type of reply waiting for */
247 int8_t lkb_wait_count; 248 int8_t lkb_wait_count;
249 int lkb_wait_nodeid; /* for debugging */
248 250
249 struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ 251 struct list_head lkb_idtbl_list; /* lockspace lkbtbl */
250 struct list_head lkb_statequeue; /* rsb g/c/w list */ 252 struct list_head lkb_statequeue; /* rsb g/c/w list */
@@ -254,6 +256,7 @@ struct dlm_lkb {
254 struct list_head lkb_ownqueue; /* list of locks for a process */ 256 struct list_head lkb_ownqueue; /* list of locks for a process */
255 struct list_head lkb_time_list; 257 struct list_head lkb_time_list;
256 ktime_t lkb_timestamp; 258 ktime_t lkb_timestamp;
259 ktime_t lkb_wait_time;
257 unsigned long lkb_timeout_cs; 260 unsigned long lkb_timeout_cs;
258 261
259 struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE]; 262 struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE];
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 56d6bfcc1e48..f71d0b5abd95 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -799,10 +799,84 @@ static int msg_reply_type(int mstype)
799 return -1; 799 return -1;
800} 800}
801 801
802static int nodeid_warned(int nodeid, int num_nodes, int *warned)
803{
804 int i;
805
806 for (i = 0; i < num_nodes; i++) {
807 if (!warned[i]) {
808 warned[i] = nodeid;
809 return 0;
810 }
811 if (warned[i] == nodeid)
812 return 1;
813 }
814 return 0;
815}
816
817void dlm_scan_waiters(struct dlm_ls *ls)
818{
819 struct dlm_lkb *lkb;
820 ktime_t zero = ktime_set(0, 0);
821 s64 us;
822 s64 debug_maxus = 0;
823 u32 debug_scanned = 0;
824 u32 debug_expired = 0;
825 int num_nodes = 0;
826 int *warned = NULL;
827
828 if (!dlm_config.ci_waitwarn_us)
829 return;
830
831 mutex_lock(&ls->ls_waiters_mutex);
832
833 list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
834 if (ktime_equal(lkb->lkb_wait_time, zero))
835 continue;
836
837 debug_scanned++;
838
839 us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time));
840
841 if (us < dlm_config.ci_waitwarn_us)
842 continue;
843
844 lkb->lkb_wait_time = zero;
845
846 debug_expired++;
847 if (us > debug_maxus)
848 debug_maxus = us;
849
850 if (!num_nodes) {
851 num_nodes = ls->ls_num_nodes;
852 warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int));
853 if (warned)
854 memset(warned, 0, num_nodes * sizeof(int));
855 }
856 if (!warned)
857 continue;
858 if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned))
859 continue;
860
861 log_error(ls, "waitwarn %x %lld %d us check connection to "
862 "node %d", lkb->lkb_id, (long long)us,
863 dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
864 }
865 mutex_unlock(&ls->ls_waiters_mutex);
866
867 if (warned)
868 kfree(warned);
869
870 if (debug_expired)
871 log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
872 debug_scanned, debug_expired,
873 dlm_config.ci_waitwarn_us, (long long)debug_maxus);
874}
875
802/* add/remove lkb from global waiters list of lkb's waiting for 876/* add/remove lkb from global waiters list of lkb's waiting for
803 a reply from a remote node */ 877 a reply from a remote node */
804 878
805static int add_to_waiters(struct dlm_lkb *lkb, int mstype) 879static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
806{ 880{
807 struct dlm_ls *ls = lkb->lkb_resource->res_ls; 881 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
808 int error = 0; 882 int error = 0;
@@ -842,6 +916,8 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
842 916
843 lkb->lkb_wait_count++; 917 lkb->lkb_wait_count++;
844 lkb->lkb_wait_type = mstype; 918 lkb->lkb_wait_type = mstype;
919 lkb->lkb_wait_time = ktime_get();
920 lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
845 hold_lkb(lkb); 921 hold_lkb(lkb);
846 list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); 922 list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
847 out: 923 out:
@@ -961,10 +1037,10 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms)
961 struct dlm_ls *ls = lkb->lkb_resource->res_ls; 1037 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
962 int error; 1038 int error;
963 1039
964 if (ms != &ls->ls_stub_ms) 1040 if (ms->m_flags != DLM_IFL_STUB_MS)
965 mutex_lock(&ls->ls_waiters_mutex); 1041 mutex_lock(&ls->ls_waiters_mutex);
966 error = _remove_from_waiters(lkb, ms->m_type, ms); 1042 error = _remove_from_waiters(lkb, ms->m_type, ms);
967 if (ms != &ls->ls_stub_ms) 1043 if (ms->m_flags != DLM_IFL_STUB_MS)
968 mutex_unlock(&ls->ls_waiters_mutex); 1044 mutex_unlock(&ls->ls_waiters_mutex);
969 return error; 1045 return error;
970} 1046}
@@ -1157,6 +1233,16 @@ void dlm_adjust_timeouts(struct dlm_ls *ls)
1157 list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) 1233 list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
1158 lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us); 1234 lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
1159 mutex_unlock(&ls->ls_timeout_mutex); 1235 mutex_unlock(&ls->ls_timeout_mutex);
1236
1237 if (!dlm_config.ci_waitwarn_us)
1238 return;
1239
1240 mutex_lock(&ls->ls_waiters_mutex);
1241 list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
1242 if (ktime_to_us(lkb->lkb_wait_time))
1243 lkb->lkb_wait_time = ktime_get();
1244 }
1245 mutex_unlock(&ls->ls_waiters_mutex);
1160} 1246}
1161 1247
1162/* lkb is master or local copy */ 1248/* lkb is master or local copy */
@@ -1376,14 +1462,8 @@ static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb)
1376 ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become 1462 ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become
1377 compatible with other granted locks */ 1463 compatible with other granted locks */
1378 1464
1379static void munge_demoted(struct dlm_lkb *lkb, struct dlm_message *ms) 1465static void munge_demoted(struct dlm_lkb *lkb)
1380{ 1466{
1381 if (ms->m_type != DLM_MSG_CONVERT_REPLY) {
1382 log_print("munge_demoted %x invalid reply type %d",
1383 lkb->lkb_id, ms->m_type);
1384 return;
1385 }
1386
1387 if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) { 1467 if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) {
1388 log_print("munge_demoted %x invalid modes gr %d rq %d", 1468 log_print("munge_demoted %x invalid modes gr %d rq %d",
1389 lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode); 1469 lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode);
@@ -2844,12 +2924,12 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
2844 struct dlm_mhandle *mh; 2924 struct dlm_mhandle *mh;
2845 int to_nodeid, error; 2925 int to_nodeid, error;
2846 2926
2847 error = add_to_waiters(lkb, mstype); 2927 to_nodeid = r->res_nodeid;
2928
2929 error = add_to_waiters(lkb, mstype, to_nodeid);
2848 if (error) 2930 if (error)
2849 return error; 2931 return error;
2850 2932
2851 to_nodeid = r->res_nodeid;
2852
2853 error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); 2933 error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
2854 if (error) 2934 if (error)
2855 goto fail; 2935 goto fail;
@@ -2880,9 +2960,9 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2880 /* down conversions go without a reply from the master */ 2960 /* down conversions go without a reply from the master */
2881 if (!error && down_conversion(lkb)) { 2961 if (!error && down_conversion(lkb)) {
2882 remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY); 2962 remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY);
2963 r->res_ls->ls_stub_ms.m_flags = DLM_IFL_STUB_MS;
2883 r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; 2964 r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
2884 r->res_ls->ls_stub_ms.m_result = 0; 2965 r->res_ls->ls_stub_ms.m_result = 0;
2885 r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags;
2886 __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); 2966 __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms);
2887 } 2967 }
2888 2968
@@ -2951,12 +3031,12 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb)
2951 struct dlm_mhandle *mh; 3031 struct dlm_mhandle *mh;
2952 int to_nodeid, error; 3032 int to_nodeid, error;
2953 3033
2954 error = add_to_waiters(lkb, DLM_MSG_LOOKUP); 3034 to_nodeid = dlm_dir_nodeid(r);
3035
3036 error = add_to_waiters(lkb, DLM_MSG_LOOKUP, to_nodeid);
2955 if (error) 3037 if (error)
2956 return error; 3038 return error;
2957 3039
2958 to_nodeid = dlm_dir_nodeid(r);
2959
2960 error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh); 3040 error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh);
2961 if (error) 3041 if (error)
2962 goto fail; 3042 goto fail;
@@ -3070,6 +3150,9 @@ static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
3070 3150
3071static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms) 3151static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
3072{ 3152{
3153 if (ms->m_flags == DLM_IFL_STUB_MS)
3154 return;
3155
3073 lkb->lkb_sbflags = ms->m_sbflags; 3156 lkb->lkb_sbflags = ms->m_sbflags;
3074 lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | 3157 lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
3075 (ms->m_flags & 0x0000FFFF); 3158 (ms->m_flags & 0x0000FFFF);
@@ -3612,7 +3695,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
3612 /* convert was queued on remote master */ 3695 /* convert was queued on remote master */
3613 receive_flags_reply(lkb, ms); 3696 receive_flags_reply(lkb, ms);
3614 if (is_demoted(lkb)) 3697 if (is_demoted(lkb))
3615 munge_demoted(lkb, ms); 3698 munge_demoted(lkb);
3616 del_lkb(r, lkb); 3699 del_lkb(r, lkb);
3617 add_lkb(r, lkb, DLM_LKSTS_CONVERT); 3700 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
3618 add_timeout(lkb); 3701 add_timeout(lkb);
@@ -3622,7 +3705,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
3622 /* convert was granted on remote master */ 3705 /* convert was granted on remote master */
3623 receive_flags_reply(lkb, ms); 3706 receive_flags_reply(lkb, ms);
3624 if (is_demoted(lkb)) 3707 if (is_demoted(lkb))
3625 munge_demoted(lkb, ms); 3708 munge_demoted(lkb);
3626 grant_lock_pc(r, lkb, ms); 3709 grant_lock_pc(r, lkb, ms);
3627 queue_cast(r, lkb, 0); 3710 queue_cast(r, lkb, 0);
3628 break; 3711 break;
@@ -3996,15 +4079,17 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid)
3996 dlm_put_lockspace(ls); 4079 dlm_put_lockspace(ls);
3997} 4080}
3998 4081
3999static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) 4082static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb,
4083 struct dlm_message *ms_stub)
4000{ 4084{
4001 if (middle_conversion(lkb)) { 4085 if (middle_conversion(lkb)) {
4002 hold_lkb(lkb); 4086 hold_lkb(lkb);
4003 ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; 4087 memset(ms_stub, 0, sizeof(struct dlm_message));
4004 ls->ls_stub_ms.m_result = -EINPROGRESS; 4088 ms_stub->m_flags = DLM_IFL_STUB_MS;
4005 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 4089 ms_stub->m_type = DLM_MSG_CONVERT_REPLY;
4006 ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; 4090 ms_stub->m_result = -EINPROGRESS;
4007 _receive_convert_reply(lkb, &ls->ls_stub_ms); 4091 ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
4092 _receive_convert_reply(lkb, ms_stub);
4008 4093
4009 /* Same special case as in receive_rcom_lock_args() */ 4094 /* Same special case as in receive_rcom_lock_args() */
4010 lkb->lkb_grmode = DLM_LOCK_IV; 4095 lkb->lkb_grmode = DLM_LOCK_IV;
@@ -4045,13 +4130,27 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb)
4045void dlm_recover_waiters_pre(struct dlm_ls *ls) 4130void dlm_recover_waiters_pre(struct dlm_ls *ls)
4046{ 4131{
4047 struct dlm_lkb *lkb, *safe; 4132 struct dlm_lkb *lkb, *safe;
4133 struct dlm_message *ms_stub;
4048 int wait_type, stub_unlock_result, stub_cancel_result; 4134 int wait_type, stub_unlock_result, stub_cancel_result;
4049 4135
4136 ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message));
4137 if (!ms_stub) {
4138 log_error(ls, "dlm_recover_waiters_pre no mem");
4139 return;
4140 }
4141
4050 mutex_lock(&ls->ls_waiters_mutex); 4142 mutex_lock(&ls->ls_waiters_mutex);
4051 4143
4052 list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) { 4144 list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) {
4053 log_debug(ls, "pre recover waiter lkid %x type %d flags %x", 4145
4054 lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags); 4146 /* exclude debug messages about unlocks because there can be so
4147 many and they aren't very interesting */
4148
4149 if (lkb->lkb_wait_type != DLM_MSG_UNLOCK) {
4150 log_debug(ls, "recover_waiter %x nodeid %d "
4151 "msg %d to %d", lkb->lkb_id, lkb->lkb_nodeid,
4152 lkb->lkb_wait_type, lkb->lkb_wait_nodeid);
4153 }
4055 4154
4056 /* all outstanding lookups, regardless of destination will be 4155 /* all outstanding lookups, regardless of destination will be
4057 resent after recovery is done */ 4156 resent after recovery is done */
@@ -4097,26 +4196,28 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
4097 break; 4196 break;
4098 4197
4099 case DLM_MSG_CONVERT: 4198 case DLM_MSG_CONVERT:
4100 recover_convert_waiter(ls, lkb); 4199 recover_convert_waiter(ls, lkb, ms_stub);
4101 break; 4200 break;
4102 4201
4103 case DLM_MSG_UNLOCK: 4202 case DLM_MSG_UNLOCK:
4104 hold_lkb(lkb); 4203 hold_lkb(lkb);
4105 ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY; 4204 memset(ms_stub, 0, sizeof(struct dlm_message));
4106 ls->ls_stub_ms.m_result = stub_unlock_result; 4205 ms_stub->m_flags = DLM_IFL_STUB_MS;
4107 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 4206 ms_stub->m_type = DLM_MSG_UNLOCK_REPLY;
4108 ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; 4207 ms_stub->m_result = stub_unlock_result;
4109 _receive_unlock_reply(lkb, &ls->ls_stub_ms); 4208 ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
4209 _receive_unlock_reply(lkb, ms_stub);
4110 dlm_put_lkb(lkb); 4210 dlm_put_lkb(lkb);
4111 break; 4211 break;
4112 4212
4113 case DLM_MSG_CANCEL: 4213 case DLM_MSG_CANCEL:
4114 hold_lkb(lkb); 4214 hold_lkb(lkb);
4115 ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY; 4215 memset(ms_stub, 0, sizeof(struct dlm_message));
4116 ls->ls_stub_ms.m_result = stub_cancel_result; 4216 ms_stub->m_flags = DLM_IFL_STUB_MS;
4117 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 4217 ms_stub->m_type = DLM_MSG_CANCEL_REPLY;
4118 ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; 4218 ms_stub->m_result = stub_cancel_result;
4119 _receive_cancel_reply(lkb, &ls->ls_stub_ms); 4219 ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
4220 _receive_cancel_reply(lkb, ms_stub);
4120 dlm_put_lkb(lkb); 4221 dlm_put_lkb(lkb);
4121 break; 4222 break;
4122 4223
@@ -4127,6 +4228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
4127 schedule(); 4228 schedule();
4128 } 4229 }
4129 mutex_unlock(&ls->ls_waiters_mutex); 4230 mutex_unlock(&ls->ls_waiters_mutex);
4231 kfree(ms_stub);
4130} 4232}
4131 4233
4132static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls) 4234static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
@@ -4191,8 +4293,8 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
4191 ou = is_overlap_unlock(lkb); 4293 ou = is_overlap_unlock(lkb);
4192 err = 0; 4294 err = 0;
4193 4295
4194 log_debug(ls, "recover_waiters_post %x type %d flags %x %s", 4296 log_debug(ls, "recover_waiter %x nodeid %d msg %d r_nodeid %d",
4195 lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name); 4297 lkb->lkb_id, lkb->lkb_nodeid, mstype, r->res_nodeid);
4196 4298
4197 /* At this point we assume that we won't get a reply to any 4299 /* At this point we assume that we won't get a reply to any
4198 previous op or overlap op on this lock. First, do a big 4300 previous op or overlap op on this lock. First, do a big
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 88e93c80cc22..265017a7c3e7 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -24,6 +24,7 @@ int dlm_put_lkb(struct dlm_lkb *lkb);
24void dlm_scan_rsbs(struct dlm_ls *ls); 24void dlm_scan_rsbs(struct dlm_ls *ls);
25int dlm_lock_recovery_try(struct dlm_ls *ls); 25int dlm_lock_recovery_try(struct dlm_ls *ls);
26void dlm_unlock_recovery(struct dlm_ls *ls); 26void dlm_unlock_recovery(struct dlm_ls *ls);
27void dlm_scan_waiters(struct dlm_ls *ls);
27void dlm_scan_timeout(struct dlm_ls *ls); 28void dlm_scan_timeout(struct dlm_ls *ls);
28void dlm_adjust_timeouts(struct dlm_ls *ls); 29void dlm_adjust_timeouts(struct dlm_ls *ls);
29 30
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index f994a7dfda85..14cbf4099753 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -243,7 +243,6 @@ static struct dlm_ls *find_ls_to_scan(void)
243static int dlm_scand(void *data) 243static int dlm_scand(void *data)
244{ 244{
245 struct dlm_ls *ls; 245 struct dlm_ls *ls;
246 int timeout_jiffies = dlm_config.ci_scan_secs * HZ;
247 246
248 while (!kthread_should_stop()) { 247 while (!kthread_should_stop()) {
249 ls = find_ls_to_scan(); 248 ls = find_ls_to_scan();
@@ -252,13 +251,14 @@ static int dlm_scand(void *data)
252 ls->ls_scan_time = jiffies; 251 ls->ls_scan_time = jiffies;
253 dlm_scan_rsbs(ls); 252 dlm_scan_rsbs(ls);
254 dlm_scan_timeout(ls); 253 dlm_scan_timeout(ls);
254 dlm_scan_waiters(ls);
255 dlm_unlock_recovery(ls); 255 dlm_unlock_recovery(ls);
256 } else { 256 } else {
257 ls->ls_scan_time += HZ; 257 ls->ls_scan_time += HZ;
258 } 258 }
259 } else { 259 continue;
260 schedule_timeout_interruptible(timeout_jiffies);
261 } 260 }
261 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
262 } 262 }
263 return 0; 263 return 0;
264} 264}
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 30d8b85febbf..e2b878004364 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -71,6 +71,36 @@ static void send_op(struct plock_op *op)
71 wake_up(&send_wq); 71 wake_up(&send_wq);
72} 72}
73 73
74/* If a process was killed while waiting for the only plock on a file,
75 locks_remove_posix will not see any lock on the file so it won't
76 send an unlock-close to us to pass on to userspace to clean up the
77 abandoned waiter. So, we have to insert the unlock-close when the
78 lock call is interrupted. */
79
80static void do_unlock_close(struct dlm_ls *ls, u64 number,
81 struct file *file, struct file_lock *fl)
82{
83 struct plock_op *op;
84
85 op = kzalloc(sizeof(*op), GFP_NOFS);
86 if (!op)
87 return;
88
89 op->info.optype = DLM_PLOCK_OP_UNLOCK;
90 op->info.pid = fl->fl_pid;
91 op->info.fsid = ls->ls_global_id;
92 op->info.number = number;
93 op->info.start = 0;
94 op->info.end = OFFSET_MAX;
95 if (fl->fl_lmops && fl->fl_lmops->fl_grant)
96 op->info.owner = (__u64) fl->fl_pid;
97 else
98 op->info.owner = (__u64)(long) fl->fl_owner;
99
100 op->info.flags |= DLM_PLOCK_FL_CLOSE;
101 send_op(op);
102}
103
74int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, 104int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
75 int cmd, struct file_lock *fl) 105 int cmd, struct file_lock *fl)
76{ 106{
@@ -114,9 +144,19 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
114 144
115 send_op(op); 145 send_op(op);
116 146
117 if (xop->callback == NULL) 147 if (xop->callback == NULL) {
118 wait_event(recv_wq, (op->done != 0)); 148 rv = wait_event_killable(recv_wq, (op->done != 0));
119 else { 149 if (rv == -ERESTARTSYS) {
150 log_debug(ls, "dlm_posix_lock: wait killed %llx",
151 (unsigned long long)number);
152 spin_lock(&ops_lock);
153 list_del(&op->list);
154 spin_unlock(&ops_lock);
155 kfree(xop);
156 do_unlock_close(ls, number, file, fl);
157 goto out;
158 }
159 } else {
120 rv = FILE_LOCK_DEFERRED; 160 rv = FILE_LOCK_DEFERRED;
121 goto out; 161 goto out;
122 } 162 }
@@ -233,6 +273,13 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
233 else 273 else
234 op->info.owner = (__u64)(long) fl->fl_owner; 274 op->info.owner = (__u64)(long) fl->fl_owner;
235 275
276 if (fl->fl_flags & FL_CLOSE) {
277 op->info.flags |= DLM_PLOCK_FL_CLOSE;
278 send_op(op);
279 rv = 0;
280 goto out;
281 }
282
236 send_op(op); 283 send_op(op);
237 wait_event(recv_wq, (op->done != 0)); 284 wait_event(recv_wq, (op->done != 0));
238 285
@@ -334,7 +381,10 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
334 spin_lock(&ops_lock); 381 spin_lock(&ops_lock);
335 if (!list_empty(&send_list)) { 382 if (!list_empty(&send_list)) {
336 op = list_entry(send_list.next, struct plock_op, list); 383 op = list_entry(send_list.next, struct plock_op, list);
337 list_move(&op->list, &recv_list); 384 if (op->info.flags & DLM_PLOCK_FL_CLOSE)
385 list_del(&op->list);
386 else
387 list_move(&op->list, &recv_list);
338 memcpy(&info, &op->info, sizeof(info)); 388 memcpy(&info, &op->info, sizeof(info));
339 } 389 }
340 spin_unlock(&ops_lock); 390 spin_unlock(&ops_lock);
@@ -342,6 +392,13 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
342 if (!op) 392 if (!op)
343 return -EAGAIN; 393 return -EAGAIN;
344 394
395 /* there is no need to get a reply from userspace for unlocks
396 that were generated by the vfs cleaning up for a close
397 (the process did not make an unlock call). */
398
399 if (op->info.flags & DLM_PLOCK_FL_CLOSE)
400 kfree(op);
401
345 if (copy_to_user(u, &info, sizeof(info))) 402 if (copy_to_user(u, &info, sizeof(info)))
346 return -EFAULT; 403 return -EFAULT;
347 return sizeof(info); 404 return sizeof(info);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index d5ab3fe7c198..e96bf3e9be88 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -611,7 +611,6 @@ static ssize_t device_write(struct file *file, const char __user *buf,
611 611
612 out_sig: 612 out_sig:
613 sigprocmask(SIG_SETMASK, &tmpsig, NULL); 613 sigprocmask(SIG_SETMASK, &tmpsig, NULL);
614 recalc_sigpending();
615 out_free: 614 out_free:
616 kfree(kbuf); 615 kfree(kbuf);
617 return error; 616 return error;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 98b77c89494c..c00e055b6282 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -40,9 +40,12 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
40static void drop_slab(void) 40static void drop_slab(void)
41{ 41{
42 int nr_objects; 42 int nr_objects;
43 struct shrink_control shrink = {
44 .gfp_mask = GFP_KERNEL,
45 };
43 46
44 do { 47 do {
45 nr_objects = shrink_slab(1000, GFP_KERNEL, 1000); 48 nr_objects = shrink_slab(&shrink, 1000, 1000);
46 } while (nr_objects > 10); 49 } while (nr_objects > 10);
47} 50}
48 51
diff --git a/fs/exec.c b/fs/exec.c
index 5e62d26a4fec..936f5776655c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
55#include <linux/fs_struct.h> 55#include <linux/fs_struct.h>
56#include <linux/pipe_fs_i.h> 56#include <linux/pipe_fs_i.h>
57#include <linux/oom.h> 57#include <linux/oom.h>
58#include <linux/compat.h>
58 59
59#include <asm/uaccess.h> 60#include <asm/uaccess.h>
60#include <asm/mmu_context.h> 61#include <asm/mmu_context.h>
@@ -166,8 +167,13 @@ out:
166} 167}
167 168
168#ifdef CONFIG_MMU 169#ifdef CONFIG_MMU
169 170/*
170void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) 171 * The nascent bprm->mm is not visible until exec_mmap() but it can
172 * use a lot of memory, account these pages in current->mm temporary
173 * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
174 * change the counter back via acct_arg_size(0).
175 */
176static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
171{ 177{
172 struct mm_struct *mm = current->mm; 178 struct mm_struct *mm = current->mm;
173 long diff = (long)(pages - bprm->vma_pages); 179 long diff = (long)(pages - bprm->vma_pages);
@@ -186,7 +192,7 @@ void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
186#endif 192#endif
187} 193}
188 194
189struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, 195static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
190 int write) 196 int write)
191{ 197{
192 struct page *page; 198 struct page *page;
@@ -194,7 +200,7 @@ struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
194 200
195#ifdef CONFIG_STACK_GROWSUP 201#ifdef CONFIG_STACK_GROWSUP
196 if (write) { 202 if (write) {
197 ret = expand_stack_downwards(bprm->vma, pos); 203 ret = expand_downwards(bprm->vma, pos);
198 if (ret < 0) 204 if (ret < 0)
199 return NULL; 205 return NULL;
200 } 206 }
@@ -305,11 +311,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
305 311
306#else 312#else
307 313
308void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) 314static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
309{ 315{
310} 316}
311 317
312struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, 318static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
313 int write) 319 int write)
314{ 320{
315 struct page *page; 321 struct page *page;
@@ -398,22 +404,56 @@ err:
398 return err; 404 return err;
399} 405}
400 406
407struct user_arg_ptr {
408#ifdef CONFIG_COMPAT
409 bool is_compat;
410#endif
411 union {
412 const char __user *const __user *native;
413#ifdef CONFIG_COMPAT
414 compat_uptr_t __user *compat;
415#endif
416 } ptr;
417};
418
419static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
420{
421 const char __user *native;
422
423#ifdef CONFIG_COMPAT
424 if (unlikely(argv.is_compat)) {
425 compat_uptr_t compat;
426
427 if (get_user(compat, argv.ptr.compat + nr))
428 return ERR_PTR(-EFAULT);
429
430 return compat_ptr(compat);
431 }
432#endif
433
434 if (get_user(native, argv.ptr.native + nr))
435 return ERR_PTR(-EFAULT);
436
437 return native;
438}
439
401/* 440/*
402 * count() counts the number of strings in array ARGV. 441 * count() counts the number of strings in array ARGV.
403 */ 442 */
404static int count(const char __user * const __user * argv, int max) 443static int count(struct user_arg_ptr argv, int max)
405{ 444{
406 int i = 0; 445 int i = 0;
407 446
408 if (argv != NULL) { 447 if (argv.ptr.native != NULL) {
409 for (;;) { 448 for (;;) {
410 const char __user * p; 449 const char __user *p = get_user_arg_ptr(argv, i);
411 450
412 if (get_user(p, argv))
413 return -EFAULT;
414 if (!p) 451 if (!p)
415 break; 452 break;
416 argv++; 453
454 if (IS_ERR(p))
455 return -EFAULT;
456
417 if (i++ >= max) 457 if (i++ >= max)
418 return -E2BIG; 458 return -E2BIG;
419 459
@@ -430,7 +470,7 @@ static int count(const char __user * const __user * argv, int max)
430 * processes's memory to the new process's stack. The call to get_user_pages() 470 * processes's memory to the new process's stack. The call to get_user_pages()
431 * ensures the destination page is created and not swapped out. 471 * ensures the destination page is created and not swapped out.
432 */ 472 */
433static int copy_strings(int argc, const char __user *const __user *argv, 473static int copy_strings(int argc, struct user_arg_ptr argv,
434 struct linux_binprm *bprm) 474 struct linux_binprm *bprm)
435{ 475{
436 struct page *kmapped_page = NULL; 476 struct page *kmapped_page = NULL;
@@ -443,16 +483,18 @@ static int copy_strings(int argc, const char __user *const __user *argv,
443 int len; 483 int len;
444 unsigned long pos; 484 unsigned long pos;
445 485
446 if (get_user(str, argv+argc) || 486 ret = -EFAULT;
447 !(len = strnlen_user(str, MAX_ARG_STRLEN))) { 487 str = get_user_arg_ptr(argv, argc);
448 ret = -EFAULT; 488 if (IS_ERR(str))
449 goto out; 489 goto out;
450 }
451 490
452 if (!valid_arg_len(bprm, len)) { 491 len = strnlen_user(str, MAX_ARG_STRLEN);
453 ret = -E2BIG; 492 if (!len)
493 goto out;
494
495 ret = -E2BIG;
496 if (!valid_arg_len(bprm, len))
454 goto out; 497 goto out;
455 }
456 498
457 /* We're going to work our way backwords. */ 499 /* We're going to work our way backwords. */
458 pos = bprm->p; 500 pos = bprm->p;
@@ -519,14 +561,19 @@ out:
519/* 561/*
520 * Like copy_strings, but get argv and its values from kernel memory. 562 * Like copy_strings, but get argv and its values from kernel memory.
521 */ 563 */
522int copy_strings_kernel(int argc, const char *const *argv, 564int copy_strings_kernel(int argc, const char *const *__argv,
523 struct linux_binprm *bprm) 565 struct linux_binprm *bprm)
524{ 566{
525 int r; 567 int r;
526 mm_segment_t oldfs = get_fs(); 568 mm_segment_t oldfs = get_fs();
569 struct user_arg_ptr argv = {
570 .ptr.native = (const char __user *const __user *)__argv,
571 };
572
527 set_fs(KERNEL_DS); 573 set_fs(KERNEL_DS);
528 r = copy_strings(argc, (const char __user *const __user *)argv, bprm); 574 r = copy_strings(argc, argv, bprm);
529 set_fs(oldfs); 575 set_fs(oldfs);
576
530 return r; 577 return r;
531} 578}
532EXPORT_SYMBOL(copy_strings_kernel); 579EXPORT_SYMBOL(copy_strings_kernel);
@@ -553,7 +600,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
553 unsigned long length = old_end - old_start; 600 unsigned long length = old_end - old_start;
554 unsigned long new_start = old_start - shift; 601 unsigned long new_start = old_start - shift;
555 unsigned long new_end = old_end - shift; 602 unsigned long new_end = old_end - shift;
556 struct mmu_gather *tlb; 603 struct mmu_gather tlb;
557 604
558 BUG_ON(new_start > new_end); 605 BUG_ON(new_start > new_end);
559 606
@@ -579,12 +626,12 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
579 return -ENOMEM; 626 return -ENOMEM;
580 627
581 lru_add_drain(); 628 lru_add_drain();
582 tlb = tlb_gather_mmu(mm, 0); 629 tlb_gather_mmu(&tlb, mm, 0);
583 if (new_end > old_start) { 630 if (new_end > old_start) {
584 /* 631 /*
585 * when the old and new regions overlap clear from new_end. 632 * when the old and new regions overlap clear from new_end.
586 */ 633 */
587 free_pgd_range(tlb, new_end, old_end, new_end, 634 free_pgd_range(&tlb, new_end, old_end, new_end,
588 vma->vm_next ? vma->vm_next->vm_start : 0); 635 vma->vm_next ? vma->vm_next->vm_start : 0);
589 } else { 636 } else {
590 /* 637 /*
@@ -593,10 +640,10 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
593 * have constraints on va-space that make this illegal (IA64) - 640 * have constraints on va-space that make this illegal (IA64) -
594 * for the others its just a little faster. 641 * for the others its just a little faster.
595 */ 642 */
596 free_pgd_range(tlb, old_start, old_end, new_end, 643 free_pgd_range(&tlb, old_start, old_end, new_end,
597 vma->vm_next ? vma->vm_next->vm_start : 0); 644 vma->vm_next ? vma->vm_next->vm_start : 0);
598 } 645 }
599 tlb_finish_mmu(tlb, new_end, old_end); 646 tlb_finish_mmu(&tlb, new_end, old_end);
600 647
601 /* 648 /*
602 * Shrink the vma to just the new range. Always succeeds. 649 * Shrink the vma to just the new range. Always succeeds.
@@ -1004,6 +1051,7 @@ char *get_task_comm(char *buf, struct task_struct *tsk)
1004 task_unlock(tsk); 1051 task_unlock(tsk);
1005 return buf; 1052 return buf;
1006} 1053}
1054EXPORT_SYMBOL_GPL(get_task_comm);
1007 1055
1008void set_task_comm(struct task_struct *tsk, char *buf) 1056void set_task_comm(struct task_struct *tsk, char *buf)
1009{ 1057{
@@ -1379,10 +1427,10 @@ EXPORT_SYMBOL(search_binary_handler);
1379/* 1427/*
1380 * sys_execve() executes a new program. 1428 * sys_execve() executes a new program.
1381 */ 1429 */
1382int do_execve(const char * filename, 1430static int do_execve_common(const char *filename,
1383 const char __user *const __user *argv, 1431 struct user_arg_ptr argv,
1384 const char __user *const __user *envp, 1432 struct user_arg_ptr envp,
1385 struct pt_regs * regs) 1433 struct pt_regs *regs)
1386{ 1434{
1387 struct linux_binprm *bprm; 1435 struct linux_binprm *bprm;
1388 struct file *file; 1436 struct file *file;
@@ -1489,6 +1537,34 @@ out_ret:
1489 return retval; 1537 return retval;
1490} 1538}
1491 1539
1540int do_execve(const char *filename,
1541 const char __user *const __user *__argv,
1542 const char __user *const __user *__envp,
1543 struct pt_regs *regs)
1544{
1545 struct user_arg_ptr argv = { .ptr.native = __argv };
1546 struct user_arg_ptr envp = { .ptr.native = __envp };
1547 return do_execve_common(filename, argv, envp, regs);
1548}
1549
1550#ifdef CONFIG_COMPAT
1551int compat_do_execve(char *filename,
1552 compat_uptr_t __user *__argv,
1553 compat_uptr_t __user *__envp,
1554 struct pt_regs *regs)
1555{
1556 struct user_arg_ptr argv = {
1557 .is_compat = true,
1558 .ptr.compat = __argv,
1559 };
1560 struct user_arg_ptr envp = {
1561 .is_compat = true,
1562 .ptr.compat = __envp,
1563 };
1564 return do_execve_common(filename, argv, envp, regs);
1565}
1566#endif
1567
1492void set_binfmt(struct linux_binfmt *new) 1568void set_binfmt(struct linux_binfmt *new)
1493{ 1569{
1494 struct mm_struct *mm = current->mm; 1570 struct mm_struct *mm = current->mm;
@@ -1659,6 +1735,7 @@ static int zap_process(struct task_struct *start, int exit_code)
1659 1735
1660 t = start; 1736 t = start;
1661 do { 1737 do {
1738 task_clear_group_stop_pending(t);
1662 if (t != current && t->mm) { 1739 if (t != current && t->mm) {
1663 sigaddset(&t->pending.signal, SIGKILL); 1740 sigaddset(&t->pending.signal, SIGKILL);
1664 signal_wake_up(t, 1); 1741 signal_wake_up(t, 1);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 0a78dae7e2cb..1dd62ed35b85 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -898,7 +898,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
898 brelse(bh); 898 brelse(bh);
899 899
900 if (!sb_set_blocksize(sb, blocksize)) { 900 if (!sb_set_blocksize(sb, blocksize)) {
901 ext2_msg(sb, KERN_ERR, "error: blocksize is too small"); 901 ext2_msg(sb, KERN_ERR,
902 "error: bad blocksize %d", blocksize);
902 goto failed_sbi; 903 goto failed_sbi;
903 } 904 }
904 905
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 32f3b8695859..34b6d9bfc48a 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1416,10 +1416,19 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1416 frame->at = entries; 1416 frame->at = entries;
1417 frame->bh = bh; 1417 frame->bh = bh;
1418 bh = bh2; 1418 bh = bh2;
1419 /*
1420 * Mark buffers dirty here so that if do_split() fails we write a
1421 * consistent set of buffers to disk.
1422 */
1423 ext3_journal_dirty_metadata(handle, frame->bh);
1424 ext3_journal_dirty_metadata(handle, bh);
1419 de = do_split(handle,dir, &bh, frame, &hinfo, &retval); 1425 de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
1420 dx_release (frames); 1426 if (!de) {
1421 if (!(de)) 1427 ext3_mark_inode_dirty(handle, dir);
1428 dx_release(frames);
1422 return retval; 1429 return retval;
1430 }
1431 dx_release(frames);
1423 1432
1424 return add_dirent_to_buf(handle, dentry, inode, de, bh); 1433 return add_dirent_to_buf(handle, dentry, inode, de, bh);
1425} 1434}
@@ -2189,6 +2198,7 @@ static int ext3_symlink (struct inode * dir,
2189 handle_t *handle; 2198 handle_t *handle;
2190 struct inode * inode; 2199 struct inode * inode;
2191 int l, err, retries = 0; 2200 int l, err, retries = 0;
2201 int credits;
2192 2202
2193 l = strlen(symname)+1; 2203 l = strlen(symname)+1;
2194 if (l > dir->i_sb->s_blocksize) 2204 if (l > dir->i_sb->s_blocksize)
@@ -2196,10 +2206,26 @@ static int ext3_symlink (struct inode * dir,
2196 2206
2197 dquot_initialize(dir); 2207 dquot_initialize(dir);
2198 2208
2209 if (l > EXT3_N_BLOCKS * 4) {
2210 /*
2211 * For non-fast symlinks, we just allocate inode and put it on
2212 * orphan list in the first transaction => we need bitmap,
2213 * group descriptor, sb, inode block, quota blocks.
2214 */
2215 credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
2216 } else {
2217 /*
2218 * Fast symlink. We have to add entry to directory
2219 * (EXT3_DATA_TRANS_BLOCKS + EXT3_INDEX_EXTRA_TRANS_BLOCKS),
2220 * allocate new inode (bitmap, group descriptor, inode block,
2221 * quota blocks, sb is already counted in previous macros).
2222 */
2223 credits = EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2224 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2225 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
2226 }
2199retry: 2227retry:
2200 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2228 handle = ext3_journal_start(dir, credits);
2201 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2202 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2203 if (IS_ERR(handle)) 2229 if (IS_ERR(handle))
2204 return PTR_ERR(handle); 2230 return PTR_ERR(handle);
2205 2231
@@ -2211,21 +2237,45 @@ retry:
2211 if (IS_ERR(inode)) 2237 if (IS_ERR(inode))
2212 goto out_stop; 2238 goto out_stop;
2213 2239
2214 if (l > sizeof (EXT3_I(inode)->i_data)) { 2240 if (l > EXT3_N_BLOCKS * 4) {
2215 inode->i_op = &ext3_symlink_inode_operations; 2241 inode->i_op = &ext3_symlink_inode_operations;
2216 ext3_set_aops(inode); 2242 ext3_set_aops(inode);
2217 /* 2243 /*
2218 * page_symlink() calls into ext3_prepare/commit_write. 2244 * We cannot call page_symlink() with transaction started
2219 * We have a transaction open. All is sweetness. It also sets 2245 * because it calls into ext3_write_begin() which acquires page
2220 * i_size in generic_commit_write(). 2246 * lock which ranks below transaction start (and it can also
2247 * wait for journal commit if we are running out of space). So
2248 * we have to stop transaction now and restart it when symlink
2249 * contents is written.
2250 *
2251 * To keep fs consistent in case of crash, we have to put inode
2252 * to orphan list in the mean time.
2221 */ 2253 */
2254 drop_nlink(inode);
2255 err = ext3_orphan_add(handle, inode);
2256 ext3_journal_stop(handle);
2257 if (err)
2258 goto err_drop_inode;
2222 err = __page_symlink(inode, symname, l, 1); 2259 err = __page_symlink(inode, symname, l, 1);
2260 if (err)
2261 goto err_drop_inode;
2262 /*
2263 * Now inode is being linked into dir (EXT3_DATA_TRANS_BLOCKS
2264 * + EXT3_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
2265 */
2266 handle = ext3_journal_start(dir,
2267 EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2268 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 1);
2269 if (IS_ERR(handle)) {
2270 err = PTR_ERR(handle);
2271 goto err_drop_inode;
2272 }
2273 inc_nlink(inode);
2274 err = ext3_orphan_del(handle, inode);
2223 if (err) { 2275 if (err) {
2276 ext3_journal_stop(handle);
2224 drop_nlink(inode); 2277 drop_nlink(inode);
2225 unlock_new_inode(inode); 2278 goto err_drop_inode;
2226 ext3_mark_inode_dirty(handle, inode);
2227 iput (inode);
2228 goto out_stop;
2229 } 2279 }
2230 } else { 2280 } else {
2231 inode->i_op = &ext3_fast_symlink_inode_operations; 2281 inode->i_op = &ext3_fast_symlink_inode_operations;
@@ -2239,6 +2289,10 @@ out_stop:
2239 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) 2289 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
2240 goto retry; 2290 goto retry;
2241 return err; 2291 return err;
2292err_drop_inode:
2293 unlock_new_inode(inode);
2294 iput(inode);
2295 return err;
2242} 2296}
2243 2297
2244static int ext3_link (struct dentry * old_dentry, 2298static int ext3_link (struct dentry * old_dentry,
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index ae8200f84e39..1cc7038e273d 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -151,6 +151,13 @@ static void fat_cache_add(struct inode *inode, struct fat_cache_id *new)
151 spin_unlock(&MSDOS_I(inode)->cache_lru_lock); 151 spin_unlock(&MSDOS_I(inode)->cache_lru_lock);
152 152
153 tmp = fat_cache_alloc(inode); 153 tmp = fat_cache_alloc(inode);
154 if (!tmp) {
155 spin_lock(&MSDOS_I(inode)->cache_lru_lock);
156 MSDOS_I(inode)->nr_caches--;
157 spin_unlock(&MSDOS_I(inode)->cache_lru_lock);
158 return;
159 }
160
154 spin_lock(&MSDOS_I(inode)->cache_lru_lock); 161 spin_lock(&MSDOS_I(inode)->cache_lru_lock);
155 cache = fat_cache_merge(inode, new); 162 cache = fat_cache_merge(inode, new);
156 if (cache != NULL) { 163 if (cache != NULL) {
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index ee42b9e0b16a..4ad64732cbce 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -98,7 +98,7 @@ next:
98 98
99 *bh = sb_bread(sb, phys); 99 *bh = sb_bread(sb, phys);
100 if (*bh == NULL) { 100 if (*bh == NULL) {
101 printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n", 101 fat_msg(sb, KERN_ERR, "Directory bread(block %llu) failed",
102 (llu)phys); 102 (llu)phys);
103 /* skip this block */ 103 /* skip this block */
104 *pos = (iblock + 1) << sb->s_blocksize_bits; 104 *pos = (iblock + 1) << sb->s_blocksize_bits;
@@ -136,9 +136,10 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
136 * but ignore that right now. 136 * but ignore that right now.
137 * Ahem... Stack smashing in ring 0 isn't fun. Fixed. 137 * Ahem... Stack smashing in ring 0 isn't fun. Fixed.
138 */ 138 */
139static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len, 139static int uni16_to_x8(struct super_block *sb, unsigned char *ascii,
140 int uni_xlate, struct nls_table *nls) 140 const wchar_t *uni, int len, struct nls_table *nls)
141{ 141{
142 int uni_xlate = MSDOS_SB(sb)->options.unicode_xlate;
142 const wchar_t *ip; 143 const wchar_t *ip;
143 wchar_t ec; 144 wchar_t ec;
144 unsigned char *op; 145 unsigned char *op;
@@ -166,23 +167,23 @@ static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
166 } 167 }
167 168
168 if (unlikely(*ip)) { 169 if (unlikely(*ip)) {
169 printk(KERN_WARNING "FAT: filename was truncated while " 170 fat_msg(sb, KERN_WARNING, "filename was truncated while "
170 "converting."); 171 "converting.");
171 } 172 }
172 173
173 *op = 0; 174 *op = 0;
174 return (op - ascii); 175 return (op - ascii);
175} 176}
176 177
177static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni, 178static inline int fat_uni_to_x8(struct super_block *sb, const wchar_t *uni,
178 unsigned char *buf, int size) 179 unsigned char *buf, int size)
179{ 180{
181 struct msdos_sb_info *sbi = MSDOS_SB(sb);
180 if (sbi->options.utf8) 182 if (sbi->options.utf8)
181 return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS, 183 return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS,
182 UTF16_HOST_ENDIAN, buf, size); 184 UTF16_HOST_ENDIAN, buf, size);
183 else 185 else
184 return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate, 186 return uni16_to_x8(sb, buf, uni, size, sbi->nls_io);
185 sbi->nls_io);
186} 187}
187 188
188static inline int 189static inline int
@@ -419,7 +420,7 @@ parse_record:
419 420
420 /* Compare shortname */ 421 /* Compare shortname */
421 bufuname[last_u] = 0x0000; 422 bufuname[last_u] = 0x0000;
422 len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname)); 423 len = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
423 if (fat_name_match(sbi, name, name_len, bufname, len)) 424 if (fat_name_match(sbi, name, name_len, bufname, len))
424 goto found; 425 goto found;
425 426
@@ -428,7 +429,7 @@ parse_record:
428 int size = PATH_MAX - FAT_MAX_UNI_SIZE; 429 int size = PATH_MAX - FAT_MAX_UNI_SIZE;
429 430
430 /* Compare longname */ 431 /* Compare longname */
431 len = fat_uni_to_x8(sbi, unicode, longname, size); 432 len = fat_uni_to_x8(sb, unicode, longname, size);
432 if (fat_name_match(sbi, name, name_len, longname, len)) 433 if (fat_name_match(sbi, name, name_len, longname, len))
433 goto found; 434 goto found;
434 } 435 }
@@ -545,7 +546,7 @@ parse_record:
545 if (nr_slots) { 546 if (nr_slots) {
546 void *longname = unicode + FAT_MAX_UNI_CHARS; 547 void *longname = unicode + FAT_MAX_UNI_CHARS;
547 int size = PATH_MAX - FAT_MAX_UNI_SIZE; 548 int size = PATH_MAX - FAT_MAX_UNI_SIZE;
548 int len = fat_uni_to_x8(sbi, unicode, longname, size); 549 int len = fat_uni_to_x8(sb, unicode, longname, size);
549 550
550 fill_name = longname; 551 fill_name = longname;
551 fill_len = len; 552 fill_len = len;
@@ -621,7 +622,7 @@ parse_record:
621 622
622 if (isvfat) { 623 if (isvfat) {
623 bufuname[j] = 0x0000; 624 bufuname[j] = 0x0000;
624 i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname)); 625 i = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
625 } 626 }
626 if (nr_slots) { 627 if (nr_slots) {
627 /* hack for fat_ioctl_filldir() */ 628 /* hack for fat_ioctl_filldir() */
@@ -979,6 +980,7 @@ static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
979 980
980int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo) 981int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
981{ 982{
983 struct super_block *sb = dir->i_sb;
982 struct msdos_dir_entry *de; 984 struct msdos_dir_entry *de;
983 struct buffer_head *bh; 985 struct buffer_head *bh;
984 int err = 0, nr_slots; 986 int err = 0, nr_slots;
@@ -1013,8 +1015,8 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
1013 */ 1015 */
1014 err = __fat_remove_entries(dir, sinfo->slot_off, nr_slots); 1016 err = __fat_remove_entries(dir, sinfo->slot_off, nr_slots);
1015 if (err) { 1017 if (err) {
1016 printk(KERN_WARNING 1018 fat_msg(sb, KERN_WARNING,
1017 "FAT: Couldn't remove the long name slots\n"); 1019 "Couldn't remove the long name slots");
1018 } 1020 }
1019 } 1021 }
1020 1022
@@ -1265,7 +1267,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
1265 if (sbi->fat_bits != 32) 1267 if (sbi->fat_bits != 32)
1266 goto error; 1268 goto error;
1267 } else if (MSDOS_I(dir)->i_start == 0) { 1269 } else if (MSDOS_I(dir)->i_start == 0) {
1268 printk(KERN_ERR "FAT: Corrupted directory (i_pos %lld)\n", 1270 fat_msg(sb, KERN_ERR, "Corrupted directory (i_pos %lld)",
1269 MSDOS_I(dir)->i_pos); 1271 MSDOS_I(dir)->i_pos);
1270 err = -EIO; 1272 err = -EIO;
1271 goto error; 1273 goto error;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index f50408901f7e..8276cc282dec 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -319,19 +319,20 @@ extern struct inode *fat_build_inode(struct super_block *sb,
319 struct msdos_dir_entry *de, loff_t i_pos); 319 struct msdos_dir_entry *de, loff_t i_pos);
320extern int fat_sync_inode(struct inode *inode); 320extern int fat_sync_inode(struct inode *inode);
321extern int fat_fill_super(struct super_block *sb, void *data, int silent, 321extern int fat_fill_super(struct super_block *sb, void *data, int silent,
322 const struct inode_operations *fs_dir_inode_ops, 322 int isvfat, void (*setup)(struct super_block *));
323 int isvfat, void (*setup)(struct super_block *));
324 323
325extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, 324extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
326 struct inode *i2); 325 struct inode *i2);
327/* fat/misc.c */ 326/* fat/misc.c */
328extern void 327extern void
329__fat_fs_error(struct super_block *s, int report, const char *fmt, ...) 328__fat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
329 __attribute__ ((format (printf, 3, 4))) __cold;
330#define fat_fs_error(sb, fmt, args...) \
331 __fat_fs_error(sb, 1, fmt , ## args)
332#define fat_fs_error_ratelimit(sb, fmt, args...) \
333 __fat_fs_error(sb, __ratelimit(&MSDOS_SB(sb)->ratelimit), fmt , ## args)
334void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...)
330 __attribute__ ((format (printf, 3, 4))) __cold; 335 __attribute__ ((format (printf, 3, 4))) __cold;
331#define fat_fs_error(s, fmt, args...) \
332 __fat_fs_error(s, 1, fmt , ## args)
333#define fat_fs_error_ratelimit(s, fmt, args...) \
334 __fat_fs_error(s, __ratelimit(&MSDOS_SB(s)->ratelimit), fmt , ## args)
335extern int fat_clusters_flush(struct super_block *sb); 336extern int fat_clusters_flush(struct super_block *sb);
336extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); 337extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
337extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, 338extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index b47d2c9f4fa1..2e81ac0df7e2 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -95,7 +95,7 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent,
95err_brelse: 95err_brelse:
96 brelse(bhs[0]); 96 brelse(bhs[0]);
97err: 97err:
98 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", (llu)blocknr); 98 fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr);
99 return -EIO; 99 return -EIO;
100} 100}
101 101
@@ -108,7 +108,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent,
108 fatent->fat_inode = MSDOS_SB(sb)->fat_inode; 108 fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
109 fatent->bhs[0] = sb_bread(sb, blocknr); 109 fatent->bhs[0] = sb_bread(sb, blocknr);
110 if (!fatent->bhs[0]) { 110 if (!fatent->bhs[0]) {
111 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", 111 fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)",
112 (llu)blocknr); 112 (llu)blocknr);
113 return -EIO; 113 return -EIO;
114 } 114 }
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 8d68690bdcf1..cb8d8391ac0b 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -581,7 +581,8 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
581 buf->f_bavail = sbi->free_clusters; 581 buf->f_bavail = sbi->free_clusters;
582 buf->f_fsid.val[0] = (u32)id; 582 buf->f_fsid.val[0] = (u32)id;
583 buf->f_fsid.val[1] = (u32)(id >> 32); 583 buf->f_fsid.val[1] = (u32)(id >> 32);
584 buf->f_namelen = sbi->options.isvfat ? FAT_LFN_LEN : 12; 584 buf->f_namelen =
585 (sbi->options.isvfat ? FAT_LFN_LEN : 12) * NLS_MAX_CHARSET_SIZE;
585 586
586 return 0; 587 return 0;
587} 588}
@@ -619,8 +620,8 @@ retry:
619 620
620 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); 621 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
621 if (!bh) { 622 if (!bh) {
622 printk(KERN_ERR "FAT: unable to read inode block " 623 fat_msg(sb, KERN_ERR, "unable to read inode block "
623 "for updating (i_pos %lld)\n", i_pos); 624 "for updating (i_pos %lld)", i_pos);
624 return -EIO; 625 return -EIO;
625 } 626 }
626 spin_lock(&sbi->inode_hash_lock); 627 spin_lock(&sbi->inode_hash_lock);
@@ -976,8 +977,8 @@ static const match_table_t vfat_tokens = {
976 {Opt_err, NULL} 977 {Opt_err, NULL}
977}; 978};
978 979
979static int parse_options(char *options, int is_vfat, int silent, int *debug, 980static int parse_options(struct super_block *sb, char *options, int is_vfat,
980 struct fat_mount_options *opts) 981 int silent, int *debug, struct fat_mount_options *opts)
981{ 982{
982 char *p; 983 char *p;
983 substring_t args[MAX_OPT_ARGS]; 984 substring_t args[MAX_OPT_ARGS];
@@ -1168,15 +1169,15 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1168 1169
1169 /* obsolete mount options */ 1170 /* obsolete mount options */
1170 case Opt_obsolate: 1171 case Opt_obsolate:
1171 printk(KERN_INFO "FAT: \"%s\" option is obsolete, " 1172 fat_msg(sb, KERN_INFO, "\"%s\" option is obsolete, "
1172 "not supported now\n", p); 1173 "not supported now", p);
1173 break; 1174 break;
1174 /* unknown option */ 1175 /* unknown option */
1175 default: 1176 default:
1176 if (!silent) { 1177 if (!silent) {
1177 printk(KERN_ERR 1178 fat_msg(sb, KERN_ERR,
1178 "FAT: Unrecognized mount option \"%s\" " 1179 "Unrecognized mount option \"%s\" "
1179 "or missing value\n", p); 1180 "or missing value", p);
1180 } 1181 }
1181 return -EINVAL; 1182 return -EINVAL;
1182 } 1183 }
@@ -1185,7 +1186,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1185out: 1186out:
1186 /* UTF-8 doesn't provide FAT semantics */ 1187 /* UTF-8 doesn't provide FAT semantics */
1187 if (!strcmp(opts->iocharset, "utf8")) { 1188 if (!strcmp(opts->iocharset, "utf8")) {
1188 printk(KERN_ERR "FAT: utf8 is not a recommended IO charset" 1189 fat_msg(sb, KERN_ERR, "utf8 is not a recommended IO charset"
1189 " for FAT filesystems, filesystem will be " 1190 " for FAT filesystems, filesystem will be "
1190 "case sensitive!\n"); 1191 "case sensitive!\n");
1191 } 1192 }
@@ -1238,8 +1239,7 @@ static int fat_read_root(struct inode *inode)
1238/* 1239/*
1239 * Read the super block of an MS-DOS FS. 1240 * Read the super block of an MS-DOS FS.
1240 */ 1241 */
1241int fat_fill_super(struct super_block *sb, void *data, int silent, 1242int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1242 const struct inode_operations *fs_dir_inode_ops, int isvfat,
1243 void (*setup)(struct super_block *)) 1243 void (*setup)(struct super_block *))
1244{ 1244{
1245 struct inode *root_inode = NULL, *fat_inode = NULL; 1245 struct inode *root_inode = NULL, *fat_inode = NULL;
@@ -1268,11 +1268,10 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1268 sb->s_magic = MSDOS_SUPER_MAGIC; 1268 sb->s_magic = MSDOS_SUPER_MAGIC;
1269 sb->s_op = &fat_sops; 1269 sb->s_op = &fat_sops;
1270 sb->s_export_op = &fat_export_ops; 1270 sb->s_export_op = &fat_export_ops;
1271 sbi->dir_ops = fs_dir_inode_ops;
1272 ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL, 1271 ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL,
1273 DEFAULT_RATELIMIT_BURST); 1272 DEFAULT_RATELIMIT_BURST);
1274 1273
1275 error = parse_options(data, isvfat, silent, &debug, &sbi->options); 1274 error = parse_options(sb, data, isvfat, silent, &debug, &sbi->options);
1276 if (error) 1275 if (error)
1277 goto out_fail; 1276 goto out_fail;
1278 1277
@@ -1282,20 +1281,20 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1282 sb_min_blocksize(sb, 512); 1281 sb_min_blocksize(sb, 512);
1283 bh = sb_bread(sb, 0); 1282 bh = sb_bread(sb, 0);
1284 if (bh == NULL) { 1283 if (bh == NULL) {
1285 printk(KERN_ERR "FAT: unable to read boot sector\n"); 1284 fat_msg(sb, KERN_ERR, "unable to read boot sector");
1286 goto out_fail; 1285 goto out_fail;
1287 } 1286 }
1288 1287
1289 b = (struct fat_boot_sector *) bh->b_data; 1288 b = (struct fat_boot_sector *) bh->b_data;
1290 if (!b->reserved) { 1289 if (!b->reserved) {
1291 if (!silent) 1290 if (!silent)
1292 printk(KERN_ERR "FAT: bogus number of reserved sectors\n"); 1291 fat_msg(sb, KERN_ERR, "bogus number of reserved sectors");
1293 brelse(bh); 1292 brelse(bh);
1294 goto out_invalid; 1293 goto out_invalid;
1295 } 1294 }
1296 if (!b->fats) { 1295 if (!b->fats) {
1297 if (!silent) 1296 if (!silent)
1298 printk(KERN_ERR "FAT: bogus number of FAT structure\n"); 1297 fat_msg(sb, KERN_ERR, "bogus number of FAT structure");
1299 brelse(bh); 1298 brelse(bh);
1300 goto out_invalid; 1299 goto out_invalid;
1301 } 1300 }
@@ -1308,7 +1307,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1308 media = b->media; 1307 media = b->media;
1309 if (!fat_valid_media(media)) { 1308 if (!fat_valid_media(media)) {
1310 if (!silent) 1309 if (!silent)
1311 printk(KERN_ERR "FAT: invalid media value (0x%02x)\n", 1310 fat_msg(sb, KERN_ERR, "invalid media value (0x%02x)",
1312 media); 1311 media);
1313 brelse(bh); 1312 brelse(bh);
1314 goto out_invalid; 1313 goto out_invalid;
@@ -1318,7 +1317,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1318 || (logical_sector_size < 512) 1317 || (logical_sector_size < 512)
1319 || (logical_sector_size > 4096)) { 1318 || (logical_sector_size > 4096)) {
1320 if (!silent) 1319 if (!silent)
1321 printk(KERN_ERR "FAT: bogus logical sector size %u\n", 1320 fat_msg(sb, KERN_ERR, "bogus logical sector size %u",
1322 logical_sector_size); 1321 logical_sector_size);
1323 brelse(bh); 1322 brelse(bh);
1324 goto out_invalid; 1323 goto out_invalid;
@@ -1326,15 +1325,15 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1326 sbi->sec_per_clus = b->sec_per_clus; 1325 sbi->sec_per_clus = b->sec_per_clus;
1327 if (!is_power_of_2(sbi->sec_per_clus)) { 1326 if (!is_power_of_2(sbi->sec_per_clus)) {
1328 if (!silent) 1327 if (!silent)
1329 printk(KERN_ERR "FAT: bogus sectors per cluster %u\n", 1328 fat_msg(sb, KERN_ERR, "bogus sectors per cluster %u",
1330 sbi->sec_per_clus); 1329 sbi->sec_per_clus);
1331 brelse(bh); 1330 brelse(bh);
1332 goto out_invalid; 1331 goto out_invalid;
1333 } 1332 }
1334 1333
1335 if (logical_sector_size < sb->s_blocksize) { 1334 if (logical_sector_size < sb->s_blocksize) {
1336 printk(KERN_ERR "FAT: logical sector size too small for device" 1335 fat_msg(sb, KERN_ERR, "logical sector size too small for device"
1337 " (logical sector size = %u)\n", logical_sector_size); 1336 " (logical sector size = %u)", logical_sector_size);
1338 brelse(bh); 1337 brelse(bh);
1339 goto out_fail; 1338 goto out_fail;
1340 } 1339 }
@@ -1342,14 +1341,14 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1342 brelse(bh); 1341 brelse(bh);
1343 1342
1344 if (!sb_set_blocksize(sb, logical_sector_size)) { 1343 if (!sb_set_blocksize(sb, logical_sector_size)) {
1345 printk(KERN_ERR "FAT: unable to set blocksize %u\n", 1344 fat_msg(sb, KERN_ERR, "unable to set blocksize %u",
1346 logical_sector_size); 1345 logical_sector_size);
1347 goto out_fail; 1346 goto out_fail;
1348 } 1347 }
1349 bh = sb_bread(sb, 0); 1348 bh = sb_bread(sb, 0);
1350 if (bh == NULL) { 1349 if (bh == NULL) {
1351 printk(KERN_ERR "FAT: unable to read boot sector" 1350 fat_msg(sb, KERN_ERR, "unable to read boot sector"
1352 " (logical sector size = %lu)\n", 1351 " (logical sector size = %lu)",
1353 sb->s_blocksize); 1352 sb->s_blocksize);
1354 goto out_fail; 1353 goto out_fail;
1355 } 1354 }
@@ -1385,16 +1384,16 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1385 1384
1386 fsinfo_bh = sb_bread(sb, sbi->fsinfo_sector); 1385 fsinfo_bh = sb_bread(sb, sbi->fsinfo_sector);
1387 if (fsinfo_bh == NULL) { 1386 if (fsinfo_bh == NULL) {
1388 printk(KERN_ERR "FAT: bread failed, FSINFO block" 1387 fat_msg(sb, KERN_ERR, "bread failed, FSINFO block"
1389 " (sector = %lu)\n", sbi->fsinfo_sector); 1388 " (sector = %lu)", sbi->fsinfo_sector);
1390 brelse(bh); 1389 brelse(bh);
1391 goto out_fail; 1390 goto out_fail;
1392 } 1391 }
1393 1392
1394 fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data; 1393 fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data;
1395 if (!IS_FSINFO(fsinfo)) { 1394 if (!IS_FSINFO(fsinfo)) {
1396 printk(KERN_WARNING "FAT: Invalid FSINFO signature: " 1395 fat_msg(sb, KERN_WARNING, "Invalid FSINFO signature: "
1397 "0x%08x, 0x%08x (sector = %lu)\n", 1396 "0x%08x, 0x%08x (sector = %lu)",
1398 le32_to_cpu(fsinfo->signature1), 1397 le32_to_cpu(fsinfo->signature1),
1399 le32_to_cpu(fsinfo->signature2), 1398 le32_to_cpu(fsinfo->signature2),
1400 sbi->fsinfo_sector); 1399 sbi->fsinfo_sector);
@@ -1415,8 +1414,8 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1415 sbi->dir_entries = get_unaligned_le16(&b->dir_entries); 1414 sbi->dir_entries = get_unaligned_le16(&b->dir_entries);
1416 if (sbi->dir_entries & (sbi->dir_per_block - 1)) { 1415 if (sbi->dir_entries & (sbi->dir_per_block - 1)) {
1417 if (!silent) 1416 if (!silent)
1418 printk(KERN_ERR "FAT: bogus directroy-entries per block" 1417 fat_msg(sb, KERN_ERR, "bogus directroy-entries per block"
1419 " (%u)\n", sbi->dir_entries); 1418 " (%u)", sbi->dir_entries);
1420 brelse(bh); 1419 brelse(bh);
1421 goto out_invalid; 1420 goto out_invalid;
1422 } 1421 }
@@ -1438,7 +1437,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1438 total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT); 1437 total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT);
1439 if (total_clusters > MAX_FAT(sb)) { 1438 if (total_clusters > MAX_FAT(sb)) {
1440 if (!silent) 1439 if (!silent)
1441 printk(KERN_ERR "FAT: count of clusters too big (%u)\n", 1440 fat_msg(sb, KERN_ERR, "count of clusters too big (%u)",
1442 total_clusters); 1441 total_clusters);
1443 brelse(bh); 1442 brelse(bh);
1444 goto out_invalid; 1443 goto out_invalid;
@@ -1471,7 +1470,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1471 sprintf(buf, "cp%d", sbi->options.codepage); 1470 sprintf(buf, "cp%d", sbi->options.codepage);
1472 sbi->nls_disk = load_nls(buf); 1471 sbi->nls_disk = load_nls(buf);
1473 if (!sbi->nls_disk) { 1472 if (!sbi->nls_disk) {
1474 printk(KERN_ERR "FAT: codepage %s not found\n", buf); 1473 fat_msg(sb, KERN_ERR, "codepage %s not found", buf);
1475 goto out_fail; 1474 goto out_fail;
1476 } 1475 }
1477 1476
@@ -1479,7 +1478,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1479 if (sbi->options.isvfat) { 1478 if (sbi->options.isvfat) {
1480 sbi->nls_io = load_nls(sbi->options.iocharset); 1479 sbi->nls_io = load_nls(sbi->options.iocharset);
1481 if (!sbi->nls_io) { 1480 if (!sbi->nls_io) {
1482 printk(KERN_ERR "FAT: IO charset %s not found\n", 1481 fat_msg(sb, KERN_ERR, "IO charset %s not found",
1483 sbi->options.iocharset); 1482 sbi->options.iocharset);
1484 goto out_fail; 1483 goto out_fail;
1485 } 1484 }
@@ -1503,7 +1502,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1503 insert_inode_hash(root_inode); 1502 insert_inode_hash(root_inode);
1504 sb->s_root = d_alloc_root(root_inode); 1503 sb->s_root = d_alloc_root(root_inode);
1505 if (!sb->s_root) { 1504 if (!sb->s_root) {
1506 printk(KERN_ERR "FAT: get root inode failed\n"); 1505 fat_msg(sb, KERN_ERR, "get root inode failed");
1507 goto out_fail; 1506 goto out_fail;
1508 } 1507 }
1509 1508
@@ -1512,8 +1511,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1512out_invalid: 1511out_invalid:
1513 error = -EINVAL; 1512 error = -EINVAL;
1514 if (!silent) 1513 if (!silent)
1515 printk(KERN_INFO "VFS: Can't find a valid FAT filesystem" 1514 fat_msg(sb, KERN_INFO, "Can't find a valid FAT filesystem");
1516 " on dev %s.\n", sb->s_id);
1517 1515
1518out_fail: 1516out_fail:
1519 if (fat_inode) 1517 if (fat_inode)
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 970e682ea754..6d93360ca0cc 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -20,30 +20,46 @@
20 * In case the file system is remounted read-only, it can be made writable 20 * In case the file system is remounted read-only, it can be made writable
21 * again by remounting it. 21 * again by remounting it.
22 */ 22 */
23void __fat_fs_error(struct super_block *s, int report, const char *fmt, ...) 23void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
24{ 24{
25 struct fat_mount_options *opts = &MSDOS_SB(s)->options; 25 struct fat_mount_options *opts = &MSDOS_SB(sb)->options;
26 va_list args; 26 va_list args;
27 struct va_format vaf;
27 28
28 if (report) { 29 if (report) {
29 printk(KERN_ERR "FAT: Filesystem error (dev %s)\n", s->s_id);
30
31 printk(KERN_ERR " ");
32 va_start(args, fmt); 30 va_start(args, fmt);
33 vprintk(fmt, args); 31 vaf.fmt = fmt;
32 vaf.va = &args;
33 printk(KERN_ERR "FAT-fs (%s): error, %pV\n", sb->s_id, &vaf);
34 va_end(args); 34 va_end(args);
35 printk("\n");
36 } 35 }
37 36
38 if (opts->errors == FAT_ERRORS_PANIC) 37 if (opts->errors == FAT_ERRORS_PANIC)
39 panic("FAT: fs panic from previous error\n"); 38 panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id);
40 else if (opts->errors == FAT_ERRORS_RO && !(s->s_flags & MS_RDONLY)) { 39 else if (opts->errors == FAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) {
41 s->s_flags |= MS_RDONLY; 40 sb->s_flags |= MS_RDONLY;
42 printk(KERN_ERR "FAT: Filesystem has been set read-only\n"); 41 printk(KERN_ERR "FAT-fs (%s): Filesystem has been "
42 "set read-only\n", sb->s_id);
43 } 43 }
44} 44}
45EXPORT_SYMBOL_GPL(__fat_fs_error); 45EXPORT_SYMBOL_GPL(__fat_fs_error);
46 46
47/**
48 * fat_msg() - print preformated FAT specific messages. Every thing what is
49 * not fat_fs_error() should be fat_msg().
50 */
51void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...)
52{
53 struct va_format vaf;
54 va_list args;
55
56 va_start(args, fmt);
57 vaf.fmt = fmt;
58 vaf.va = &args;
59 printk("%sFAT-fs (%s): %pV\n", level, sb->s_id, &vaf);
60 va_end(args);
61}
62
47/* Flushes the number of free clusters on FAT32 */ 63/* Flushes the number of free clusters on FAT32 */
48/* XXX: Need to write one per FSINFO block. Currently only writes 1 */ 64/* XXX: Need to write one per FSINFO block. Currently only writes 1 */
49int fat_clusters_flush(struct super_block *sb) 65int fat_clusters_flush(struct super_block *sb)
@@ -57,15 +73,15 @@ int fat_clusters_flush(struct super_block *sb)
57 73
58 bh = sb_bread(sb, sbi->fsinfo_sector); 74 bh = sb_bread(sb, sbi->fsinfo_sector);
59 if (bh == NULL) { 75 if (bh == NULL) {
60 printk(KERN_ERR "FAT: bread failed in fat_clusters_flush\n"); 76 fat_msg(sb, KERN_ERR, "bread failed in fat_clusters_flush");
61 return -EIO; 77 return -EIO;
62 } 78 }
63 79
64 fsinfo = (struct fat_boot_fsinfo *)bh->b_data; 80 fsinfo = (struct fat_boot_fsinfo *)bh->b_data;
65 /* Sanity check */ 81 /* Sanity check */
66 if (!IS_FSINFO(fsinfo)) { 82 if (!IS_FSINFO(fsinfo)) {
67 printk(KERN_ERR "FAT: Invalid FSINFO signature: " 83 fat_msg(sb, KERN_ERR, "Invalid FSINFO signature: "
68 "0x%08x, 0x%08x (sector = %lu)\n", 84 "0x%08x, 0x%08x (sector = %lu)",
69 le32_to_cpu(fsinfo->signature1), 85 le32_to_cpu(fsinfo->signature1),
70 le32_to_cpu(fsinfo->signature2), 86 le32_to_cpu(fsinfo->signature2),
71 sbi->fsinfo_sector); 87 sbi->fsinfo_sector);
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index c3eccbd02037..be15437c272e 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -664,14 +664,14 @@ static const struct inode_operations msdos_dir_inode_operations = {
664 664
665static void setup(struct super_block *sb) 665static void setup(struct super_block *sb)
666{ 666{
667 MSDOS_SB(sb)->dir_ops = &msdos_dir_inode_operations;
667 sb->s_d_op = &msdos_dentry_operations; 668 sb->s_d_op = &msdos_dentry_operations;
668 sb->s_flags |= MS_NOATIME; 669 sb->s_flags |= MS_NOATIME;
669} 670}
670 671
671static int msdos_fill_super(struct super_block *sb, void *data, int silent) 672static int msdos_fill_super(struct super_block *sb, void *data, int silent)
672{ 673{
673 return fat_fill_super(sb, data, silent, &msdos_dir_inode_operations, 674 return fat_fill_super(sb, data, silent, 0, setup);
674 0, setup);
675} 675}
676 676
677static struct dentry *msdos_mount(struct file_system_type *fs_type, 677static struct dentry *msdos_mount(struct file_system_type *fs_type,
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index e2466b2f8cf2..c61a6789f36c 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1070,6 +1070,7 @@ static const struct inode_operations vfat_dir_inode_operations = {
1070 1070
1071static void setup(struct super_block *sb) 1071static void setup(struct super_block *sb)
1072{ 1072{
1073 MSDOS_SB(sb)->dir_ops = &vfat_dir_inode_operations;
1073 if (MSDOS_SB(sb)->options.name_check != 's') 1074 if (MSDOS_SB(sb)->options.name_check != 's')
1074 sb->s_d_op = &vfat_ci_dentry_ops; 1075 sb->s_d_op = &vfat_ci_dentry_ops;
1075 else 1076 else
@@ -1078,8 +1079,7 @@ static void setup(struct super_block *sb)
1078 1079
1079static int vfat_fill_super(struct super_block *sb, void *data, int silent) 1080static int vfat_fill_super(struct super_block *sb, void *data, int silent)
1080{ 1081{
1081 return fat_fill_super(sb, data, silent, &vfat_dir_inode_operations, 1082 return fat_fill_super(sb, data, silent, 1, setup);
1082 1, setup);
1083} 1083}
1084 1084
1085static struct dentry *vfat_mount(struct file_system_type *fs_type, 1085static struct dentry *vfat_mount(struct file_system_type *fs_type,
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 2ba6719ac612..1a4311437a8b 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -272,7 +272,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip)
272 * *ip: VFS inode 272 * *ip: VFS inode
273 * 273 *
274 * Description: 274 * Description:
275 * vxfs_put_fake_inode frees all data asssociated with @ip. 275 * vxfs_put_fake_inode frees all data associated with @ip.
276 */ 276 */
277void 277void
278vxfs_put_fake_inode(struct inode *ip) 278vxfs_put_fake_inode(struct inode *ip)
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 48a18f184d50..30afdfa7aec7 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -33,8 +33,6 @@ void fscache_enqueue_operation(struct fscache_operation *op)
33 _enter("{OBJ%x OP%x,%u}", 33 _enter("{OBJ%x OP%x,%u}",
34 op->object->debug_id, op->debug_id, atomic_read(&op->usage)); 34 op->object->debug_id, op->debug_id, atomic_read(&op->usage));
35 35
36 fscache_set_op_state(op, "EnQ");
37
38 ASSERT(list_empty(&op->pend_link)); 36 ASSERT(list_empty(&op->pend_link));
39 ASSERT(op->processor != NULL); 37 ASSERT(op->processor != NULL);
40 ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); 38 ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE);
@@ -66,8 +64,6 @@ EXPORT_SYMBOL(fscache_enqueue_operation);
66static void fscache_run_op(struct fscache_object *object, 64static void fscache_run_op(struct fscache_object *object,
67 struct fscache_operation *op) 65 struct fscache_operation *op)
68{ 66{
69 fscache_set_op_state(op, "Run");
70
71 object->n_in_progress++; 67 object->n_in_progress++;
72 if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) 68 if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
73 wake_up_bit(&op->flags, FSCACHE_OP_WAITING); 69 wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
@@ -88,8 +84,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object,
88 84
89 _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); 85 _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id);
90 86
91 fscache_set_op_state(op, "SubmitX");
92
93 spin_lock(&object->lock); 87 spin_lock(&object->lock);
94 ASSERTCMP(object->n_ops, >=, object->n_in_progress); 88 ASSERTCMP(object->n_ops, >=, object->n_in_progress);
95 ASSERTCMP(object->n_ops, >=, object->n_exclusive); 89 ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -194,8 +188,6 @@ int fscache_submit_op(struct fscache_object *object,
194 188
195 ASSERTCMP(atomic_read(&op->usage), >, 0); 189 ASSERTCMP(atomic_read(&op->usage), >, 0);
196 190
197 fscache_set_op_state(op, "Submit");
198
199 spin_lock(&object->lock); 191 spin_lock(&object->lock);
200 ASSERTCMP(object->n_ops, >=, object->n_in_progress); 192 ASSERTCMP(object->n_ops, >=, object->n_in_progress);
201 ASSERTCMP(object->n_ops, >=, object->n_exclusive); 193 ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -335,8 +327,6 @@ void fscache_put_operation(struct fscache_operation *op)
335 if (!atomic_dec_and_test(&op->usage)) 327 if (!atomic_dec_and_test(&op->usage))
336 return; 328 return;
337 329
338 fscache_set_op_state(op, "Put");
339
340 _debug("PUT OP"); 330 _debug("PUT OP");
341 if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) 331 if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags))
342 BUG(); 332 BUG();
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 41c441c2058d..a2a5d19ece6a 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -155,11 +155,9 @@ static void fscache_attr_changed_op(struct fscache_operation *op)
155 fscache_stat(&fscache_n_attr_changed_calls); 155 fscache_stat(&fscache_n_attr_changed_calls);
156 156
157 if (fscache_object_is_active(object)) { 157 if (fscache_object_is_active(object)) {
158 fscache_set_op_state(op, "CallFS");
159 fscache_stat(&fscache_n_cop_attr_changed); 158 fscache_stat(&fscache_n_cop_attr_changed);
160 ret = object->cache->ops->attr_changed(object); 159 ret = object->cache->ops->attr_changed(object);
161 fscache_stat_d(&fscache_n_cop_attr_changed); 160 fscache_stat_d(&fscache_n_cop_attr_changed);
162 fscache_set_op_state(op, "Done");
163 if (ret < 0) 161 if (ret < 0)
164 fscache_abort_object(object); 162 fscache_abort_object(object);
165 } 163 }
@@ -190,7 +188,6 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
190 188
191 fscache_operation_init(op, fscache_attr_changed_op, NULL); 189 fscache_operation_init(op, fscache_attr_changed_op, NULL);
192 op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); 190 op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE);
193 fscache_set_op_name(op, "Attr");
194 191
195 spin_lock(&cookie->lock); 192 spin_lock(&cookie->lock);
196 193
@@ -257,7 +254,6 @@ static struct fscache_retrieval *fscache_alloc_retrieval(
257 op->context = context; 254 op->context = context;
258 op->start_time = jiffies; 255 op->start_time = jiffies;
259 INIT_LIST_HEAD(&op->to_do); 256 INIT_LIST_HEAD(&op->to_do);
260 fscache_set_op_name(&op->op, "Retr");
261 return op; 257 return op;
262} 258}
263 259
@@ -368,7 +364,6 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
368 _leave(" = -ENOMEM"); 364 _leave(" = -ENOMEM");
369 return -ENOMEM; 365 return -ENOMEM;
370 } 366 }
371 fscache_set_op_name(&op->op, "RetrRA1");
372 367
373 spin_lock(&cookie->lock); 368 spin_lock(&cookie->lock);
374 369
@@ -487,7 +482,6 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
487 op = fscache_alloc_retrieval(mapping, end_io_func, context); 482 op = fscache_alloc_retrieval(mapping, end_io_func, context);
488 if (!op) 483 if (!op)
489 return -ENOMEM; 484 return -ENOMEM;
490 fscache_set_op_name(&op->op, "RetrRAN");
491 485
492 spin_lock(&cookie->lock); 486 spin_lock(&cookie->lock);
493 487
@@ -589,7 +583,6 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
589 op = fscache_alloc_retrieval(page->mapping, NULL, NULL); 583 op = fscache_alloc_retrieval(page->mapping, NULL, NULL);
590 if (!op) 584 if (!op)
591 return -ENOMEM; 585 return -ENOMEM;
592 fscache_set_op_name(&op->op, "RetrAL1");
593 586
594 spin_lock(&cookie->lock); 587 spin_lock(&cookie->lock);
595 588
@@ -662,8 +655,6 @@ static void fscache_write_op(struct fscache_operation *_op)
662 655
663 _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage)); 656 _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage));
664 657
665 fscache_set_op_state(&op->op, "GetPage");
666
667 spin_lock(&object->lock); 658 spin_lock(&object->lock);
668 cookie = object->cookie; 659 cookie = object->cookie;
669 660
@@ -698,15 +689,12 @@ static void fscache_write_op(struct fscache_operation *_op)
698 spin_unlock(&cookie->stores_lock); 689 spin_unlock(&cookie->stores_lock);
699 spin_unlock(&object->lock); 690 spin_unlock(&object->lock);
700 691
701 fscache_set_op_state(&op->op, "Store");
702 fscache_stat(&fscache_n_store_pages); 692 fscache_stat(&fscache_n_store_pages);
703 fscache_stat(&fscache_n_cop_write_page); 693 fscache_stat(&fscache_n_cop_write_page);
704 ret = object->cache->ops->write_page(op, page); 694 ret = object->cache->ops->write_page(op, page);
705 fscache_stat_d(&fscache_n_cop_write_page); 695 fscache_stat_d(&fscache_n_cop_write_page);
706 fscache_set_op_state(&op->op, "EndWrite");
707 fscache_end_page_write(object, page); 696 fscache_end_page_write(object, page);
708 if (ret < 0) { 697 if (ret < 0) {
709 fscache_set_op_state(&op->op, "Abort");
710 fscache_abort_object(object); 698 fscache_abort_object(object);
711 } else { 699 } else {
712 fscache_enqueue_operation(&op->op); 700 fscache_enqueue_operation(&op->op);
@@ -778,7 +766,6 @@ int __fscache_write_page(struct fscache_cookie *cookie,
778 fscache_operation_init(&op->op, fscache_write_op, 766 fscache_operation_init(&op->op, fscache_write_op,
779 fscache_release_write_op); 767 fscache_release_write_op);
780 op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING); 768 op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING);
781 fscache_set_op_name(&op->op, "Write1");
782 769
783 ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); 770 ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
784 if (ret < 0) 771 if (ret < 0)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index e462a7a281bf..0d0e3faddcfa 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -174,7 +174,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
174 if (!inode) 174 if (!inode)
175 return 0; 175 return 0;
176 176
177 if (nd->flags & LOOKUP_RCU) 177 if (nd && (nd->flags & LOOKUP_RCU))
178 return -ECHILD; 178 return -ECHILD;
179 179
180 fc = get_fuse_conn(inode); 180 fc = get_fuse_conn(inode);
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index f3d23ef4e876..86128202384f 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,9 +1,9 @@
1ccflags-y := -I$(src) 1ccflags-y := -I$(src)
2obj-$(CONFIG_GFS2_FS) += gfs2.o 2obj-$(CONFIG_GFS2_FS) += gfs2.o
3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \ 3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
4 glops.o inode.o log.o lops.o main.o meta_io.o \ 4 glops.o log.o lops.o main.o meta_io.o \
5 aops.o dentry.o export.o file.o \ 5 aops.o dentry.o export.o file.o \
6 ops_fstype.o ops_inode.o quota.o \ 6 ops_fstype.o inode.o quota.o \
7 recovery.o rgrp.o super.o sys.o trans.o util.o 7 recovery.o rgrp.o super.o sys.o trans.o util.o
8 8
9gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o 9gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 0f5c4f9d5d62..802ac5eeba28 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1076,8 +1076,8 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1076 bd = bh->b_private; 1076 bd = bh->b_private;
1077 if (bd && bd->bd_ail) 1077 if (bd && bd->bd_ail)
1078 goto cannot_release; 1078 goto cannot_release;
1079 gfs2_assert_warn(sdp, !buffer_pinned(bh)); 1079 if (buffer_pinned(bh) || buffer_dirty(bh))
1080 gfs2_assert_warn(sdp, !buffer_dirty(bh)); 1080 goto not_possible;
1081 bh = bh->b_this_page; 1081 bh = bh->b_this_page;
1082 } while(bh != head); 1082 } while(bh != head);
1083 gfs2_log_unlock(sdp); 1083 gfs2_log_unlock(sdp);
@@ -1107,6 +1107,10 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1107 } while (bh != head); 1107 } while (bh != head);
1108 1108
1109 return try_to_free_buffers(page); 1109 return try_to_free_buffers(page);
1110
1111not_possible: /* Should never happen */
1112 WARN_ON(buffer_dirty(bh));
1113 WARN_ON(buffer_pinned(bh));
1110cannot_release: 1114cannot_release:
1111 gfs2_log_unlock(sdp); 1115 gfs2_log_unlock(sdp);
1112 return 0; 1116 return 0;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 74add2ddcc3f..e65493a8ac00 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -780,6 +780,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
780 metadata = (height != ip->i_height - 1); 780 metadata = (height != ip->i_height - 1);
781 if (metadata) 781 if (metadata)
782 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; 782 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
783 else if (ip->i_depth)
784 revokes = sdp->sd_inptrs;
783 785
784 if (ip != GFS2_I(sdp->sd_rindex)) 786 if (ip != GFS2_I(sdp->sd_rindex))
785 error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); 787 error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index f789c5732b7c..091ee4779538 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -82,12 +82,9 @@
82struct qstr gfs2_qdot __read_mostly; 82struct qstr gfs2_qdot __read_mostly;
83struct qstr gfs2_qdotdot __read_mostly; 83struct qstr gfs2_qdotdot __read_mostly;
84 84
85typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len,
86 u64 leaf_no, void *data);
87typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, 85typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent,
88 const struct qstr *name, void *opaque); 86 const struct qstr *name, void *opaque);
89 87
90
91int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, 88int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
92 struct buffer_head **bhp) 89 struct buffer_head **bhp)
93{ 90{
@@ -1600,7 +1597,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1600 */ 1597 */
1601 1598
1602int gfs2_dir_add(struct inode *inode, const struct qstr *name, 1599int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1603 const struct gfs2_inode *nip, unsigned type) 1600 const struct gfs2_inode *nip)
1604{ 1601{
1605 struct gfs2_inode *ip = GFS2_I(inode); 1602 struct gfs2_inode *ip = GFS2_I(inode);
1606 struct buffer_head *bh; 1603 struct buffer_head *bh;
@@ -1616,7 +1613,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1616 return PTR_ERR(dent); 1613 return PTR_ERR(dent);
1617 dent = gfs2_init_dirent(inode, dent, name, bh); 1614 dent = gfs2_init_dirent(inode, dent, name, bh);
1618 gfs2_inum_out(nip, dent); 1615 gfs2_inum_out(nip, dent);
1619 dent->de_type = cpu_to_be16(type); 1616 dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode));
1620 if (ip->i_diskflags & GFS2_DIF_EXHASH) { 1617 if (ip->i_diskflags & GFS2_DIF_EXHASH) {
1621 leaf = (struct gfs2_leaf *)bh->b_data; 1618 leaf = (struct gfs2_leaf *)bh->b_data;
1622 be16_add_cpu(&leaf->lf_entries, 1); 1619 be16_add_cpu(&leaf->lf_entries, 1);
@@ -1628,6 +1625,8 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1628 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1625 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1629 ip->i_entries++; 1626 ip->i_entries++;
1630 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1627 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1628 if (S_ISDIR(nip->i_inode.i_mode))
1629 inc_nlink(&ip->i_inode);
1631 gfs2_dinode_out(ip, bh->b_data); 1630 gfs2_dinode_out(ip, bh->b_data);
1632 brelse(bh); 1631 brelse(bh);
1633 error = 0; 1632 error = 0;
@@ -1672,8 +1671,9 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1672 * Returns: 0 on success, error code on failure 1671 * Returns: 0 on success, error code on failure
1673 */ 1672 */
1674 1673
1675int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) 1674int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
1676{ 1675{
1676 const struct qstr *name = &dentry->d_name;
1677 struct gfs2_dirent *dent, *prev = NULL; 1677 struct gfs2_dirent *dent, *prev = NULL;
1678 struct buffer_head *bh; 1678 struct buffer_head *bh;
1679 int error; 1679 int error;
@@ -1714,6 +1714,8 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
1714 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1714 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1715 dip->i_entries--; 1715 dip->i_entries--;
1716 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; 1716 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
1717 if (S_ISDIR(dentry->d_inode->i_mode))
1718 drop_nlink(&dip->i_inode);
1717 gfs2_dinode_out(dip, bh->b_data); 1719 gfs2_dinode_out(dip, bh->b_data);
1718 brelse(bh); 1720 brelse(bh);
1719 mark_inode_dirty(&dip->i_inode); 1721 mark_inode_dirty(&dip->i_inode);
@@ -1768,94 +1770,20 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1768} 1770}
1769 1771
1770/** 1772/**
1771 * foreach_leaf - call a function for each leaf in a directory
1772 * @dip: the directory
1773 * @lc: the function to call for each each
1774 * @data: private data to pass to it
1775 *
1776 * Returns: errno
1777 */
1778
1779static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
1780{
1781 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1782 struct buffer_head *bh;
1783 struct gfs2_leaf *leaf;
1784 u32 hsize, len;
1785 u32 ht_offset, lp_offset, ht_offset_cur = -1;
1786 u32 index = 0;
1787 __be64 *lp;
1788 u64 leaf_no;
1789 int error = 0;
1790
1791 hsize = 1 << dip->i_depth;
1792 if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
1793 gfs2_consist_inode(dip);
1794 return -EIO;
1795 }
1796
1797 lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
1798 if (!lp)
1799 return -ENOMEM;
1800
1801 while (index < hsize) {
1802 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1803 ht_offset = index - lp_offset;
1804
1805 if (ht_offset_cur != ht_offset) {
1806 error = gfs2_dir_read_data(dip, (char *)lp,
1807 ht_offset * sizeof(__be64),
1808 sdp->sd_hash_bsize, 1);
1809 if (error != sdp->sd_hash_bsize) {
1810 if (error >= 0)
1811 error = -EIO;
1812 goto out;
1813 }
1814 ht_offset_cur = ht_offset;
1815 }
1816
1817 leaf_no = be64_to_cpu(lp[lp_offset]);
1818 if (leaf_no) {
1819 error = get_leaf(dip, leaf_no, &bh);
1820 if (error)
1821 goto out;
1822 leaf = (struct gfs2_leaf *)bh->b_data;
1823 len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
1824 brelse(bh);
1825
1826 error = lc(dip, index, len, leaf_no, data);
1827 if (error)
1828 goto out;
1829
1830 index = (index & ~(len - 1)) + len;
1831 } else
1832 index++;
1833 }
1834
1835 if (index != hsize) {
1836 gfs2_consist_inode(dip);
1837 error = -EIO;
1838 }
1839
1840out:
1841 kfree(lp);
1842
1843 return error;
1844}
1845
1846/**
1847 * leaf_dealloc - Deallocate a directory leaf 1773 * leaf_dealloc - Deallocate a directory leaf
1848 * @dip: the directory 1774 * @dip: the directory
1849 * @index: the hash table offset in the directory 1775 * @index: the hash table offset in the directory
1850 * @len: the number of pointers to this leaf 1776 * @len: the number of pointers to this leaf
1851 * @leaf_no: the leaf number 1777 * @leaf_no: the leaf number
1852 * @data: not used 1778 * @leaf_bh: buffer_head for the starting leaf
1779 * last_dealloc: 1 if this is the final dealloc for the leaf, else 0
1853 * 1780 *
1854 * Returns: errno 1781 * Returns: errno
1855 */ 1782 */
1856 1783
1857static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, 1784static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1858 u64 leaf_no, void *data) 1785 u64 leaf_no, struct buffer_head *leaf_bh,
1786 int last_dealloc)
1859{ 1787{
1860 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1788 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1861 struct gfs2_leaf *tmp_leaf; 1789 struct gfs2_leaf *tmp_leaf;
@@ -1887,14 +1815,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1887 goto out_qs; 1815 goto out_qs;
1888 1816
1889 /* Count the number of leaves */ 1817 /* Count the number of leaves */
1818 bh = leaf_bh;
1890 1819
1891 for (blk = leaf_no; blk; blk = nblk) { 1820 for (blk = leaf_no; blk; blk = nblk) {
1892 error = get_leaf(dip, blk, &bh); 1821 if (blk != leaf_no) {
1893 if (error) 1822 error = get_leaf(dip, blk, &bh);
1894 goto out_rlist; 1823 if (error)
1824 goto out_rlist;
1825 }
1895 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1826 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1896 nblk = be64_to_cpu(tmp_leaf->lf_next); 1827 nblk = be64_to_cpu(tmp_leaf->lf_next);
1897 brelse(bh); 1828 if (blk != leaf_no)
1829 brelse(bh);
1898 1830
1899 gfs2_rlist_add(sdp, &rlist, blk); 1831 gfs2_rlist_add(sdp, &rlist, blk);
1900 l_blocks++; 1832 l_blocks++;
@@ -1918,13 +1850,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1918 if (error) 1850 if (error)
1919 goto out_rg_gunlock; 1851 goto out_rg_gunlock;
1920 1852
1853 bh = leaf_bh;
1854
1921 for (blk = leaf_no; blk; blk = nblk) { 1855 for (blk = leaf_no; blk; blk = nblk) {
1922 error = get_leaf(dip, blk, &bh); 1856 if (blk != leaf_no) {
1923 if (error) 1857 error = get_leaf(dip, blk, &bh);
1924 goto out_end_trans; 1858 if (error)
1859 goto out_end_trans;
1860 }
1925 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1861 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1926 nblk = be64_to_cpu(tmp_leaf->lf_next); 1862 nblk = be64_to_cpu(tmp_leaf->lf_next);
1927 brelse(bh); 1863 if (blk != leaf_no)
1864 brelse(bh);
1928 1865
1929 gfs2_free_meta(dip, blk, 1); 1866 gfs2_free_meta(dip, blk, 1);
1930 gfs2_add_inode_blocks(&dip->i_inode, -1); 1867 gfs2_add_inode_blocks(&dip->i_inode, -1);
@@ -1942,6 +1879,10 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1942 goto out_end_trans; 1879 goto out_end_trans;
1943 1880
1944 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1881 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1882 /* On the last dealloc, make this a regular file in case we crash.
1883 (We don't want to free these blocks a second time.) */
1884 if (last_dealloc)
1885 dip->i_inode.i_mode = S_IFREG;
1945 gfs2_dinode_out(dip, dibh->b_data); 1886 gfs2_dinode_out(dip, dibh->b_data);
1946 brelse(dibh); 1887 brelse(dibh);
1947 1888
@@ -1975,29 +1916,67 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
1975{ 1916{
1976 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1917 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1977 struct buffer_head *bh; 1918 struct buffer_head *bh;
1978 int error; 1919 struct gfs2_leaf *leaf;
1920 u32 hsize, len;
1921 u32 ht_offset, lp_offset, ht_offset_cur = -1;
1922 u32 index = 0, next_index;
1923 __be64 *lp;
1924 u64 leaf_no;
1925 int error = 0, last;
1979 1926
1980 /* Dealloc on-disk leaves to FREEMETA state */ 1927 hsize = 1 << dip->i_depth;
1981 error = foreach_leaf(dip, leaf_dealloc, NULL); 1928 if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
1982 if (error) 1929 gfs2_consist_inode(dip);
1983 return error; 1930 return -EIO;
1931 }
1984 1932
1985 /* Make this a regular file in case we crash. 1933 lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
1986 (We don't want to free these blocks a second time.) */ 1934 if (!lp)
1935 return -ENOMEM;
1987 1936
1988 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1937 while (index < hsize) {
1989 if (error) 1938 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1990 return error; 1939 ht_offset = index - lp_offset;
1991 1940
1992 error = gfs2_meta_inode_buffer(dip, &bh); 1941 if (ht_offset_cur != ht_offset) {
1993 if (!error) { 1942 error = gfs2_dir_read_data(dip, (char *)lp,
1994 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1943 ht_offset * sizeof(__be64),
1995 ((struct gfs2_dinode *)bh->b_data)->di_mode = 1944 sdp->sd_hash_bsize, 1);
1996 cpu_to_be32(S_IFREG); 1945 if (error != sdp->sd_hash_bsize) {
1997 brelse(bh); 1946 if (error >= 0)
1947 error = -EIO;
1948 goto out;
1949 }
1950 ht_offset_cur = ht_offset;
1951 }
1952
1953 leaf_no = be64_to_cpu(lp[lp_offset]);
1954 if (leaf_no) {
1955 error = get_leaf(dip, leaf_no, &bh);
1956 if (error)
1957 goto out;
1958 leaf = (struct gfs2_leaf *)bh->b_data;
1959 len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
1960
1961 next_index = (index & ~(len - 1)) + len;
1962 last = ((next_index >= hsize) ? 1 : 0);
1963 error = leaf_dealloc(dip, index, len, leaf_no, bh,
1964 last);
1965 brelse(bh);
1966 if (error)
1967 goto out;
1968 index = next_index;
1969 } else
1970 index++;
1998 } 1971 }
1999 1972
2000 gfs2_trans_end(sdp); 1973 if (index != hsize) {
1974 gfs2_consist_inode(dip);
1975 error = -EIO;
1976 }
1977
1978out:
1979 kfree(lp);
2001 1980
2002 return error; 1981 return error;
2003} 1982}
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index a98f644bd3df..e686af11becd 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -22,8 +22,8 @@ extern struct inode *gfs2_dir_search(struct inode *dir,
22extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, 22extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
23 const struct gfs2_inode *ip); 23 const struct gfs2_inode *ip);
24extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, 24extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
25 const struct gfs2_inode *ip, unsigned int type); 25 const struct gfs2_inode *ip);
26extern int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); 26extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
27extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, 27extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
28 filldir_t filldir); 28 filldir_t filldir);
29extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, 29extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index b5a5e60df0d5..fe9945f2ff72 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -139,7 +139,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
139 struct gfs2_sbd *sdp = sb->s_fs_info; 139 struct gfs2_sbd *sdp = sb->s_fs_info;
140 struct inode *inode; 140 struct inode *inode;
141 141
142 inode = gfs2_ilookup(sb, inum->no_addr); 142 inode = gfs2_ilookup(sb, inum->no_addr, 0);
143 if (inode) { 143 if (inode) {
144 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { 144 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
145 iput(inode); 145 iput(inode);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index e48310885c48..a9f5cbe45cd9 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -545,18 +545,10 @@ static int gfs2_close(struct inode *inode, struct file *file)
545/** 545/**
546 * gfs2_fsync - sync the dirty data for a file (across the cluster) 546 * gfs2_fsync - sync the dirty data for a file (across the cluster)
547 * @file: the file that points to the dentry (we ignore this) 547 * @file: the file that points to the dentry (we ignore this)
548 * @dentry: the dentry that points to the inode to sync 548 * @datasync: set if we can ignore timestamp changes
549 * 549 *
550 * The VFS will flush "normal" data for us. We only need to worry 550 * The VFS will flush data for us. We only need to worry
551 * about metadata here. For journaled data, we just do a log flush 551 * about metadata here.
552 * as we can't avoid it. Otherwise we can just bale out if datasync
553 * is set. For stuffed inodes we must flush the log in order to
554 * ensure that all data is on disk.
555 *
556 * The call to write_inode_now() is there to write back metadata and
557 * the inode itself. It does also try and write the data, but thats
558 * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite()
559 * for us.
560 * 552 *
561 * Returns: errno 553 * Returns: errno
562 */ 554 */
@@ -565,22 +557,20 @@ static int gfs2_fsync(struct file *file, int datasync)
565{ 557{
566 struct inode *inode = file->f_mapping->host; 558 struct inode *inode = file->f_mapping->host;
567 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); 559 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
568 int ret = 0; 560 struct gfs2_inode *ip = GFS2_I(inode);
569 561 int ret;
570 if (gfs2_is_jdata(GFS2_I(inode))) {
571 gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
572 return 0;
573 }
574 562
575 if (sync_state != 0) { 563 if (datasync)
576 if (!datasync) 564 sync_state &= ~I_DIRTY_SYNC;
577 ret = write_inode_now(inode, 0);
578 565
579 if (gfs2_is_stuffed(GFS2_I(inode))) 566 if (sync_state) {
580 gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl); 567 ret = sync_inode_metadata(inode, 1);
568 if (ret)
569 return ret;
570 gfs2_ail_flush(ip->i_gl);
581 } 571 }
582 572
583 return ret; 573 return 0;
584} 574}
585 575
586/** 576/**
@@ -826,6 +816,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
826 loff_t bytes, max_bytes; 816 loff_t bytes, max_bytes;
827 struct gfs2_alloc *al; 817 struct gfs2_alloc *al;
828 int error; 818 int error;
819 loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
829 loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; 820 loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
830 next = (next + 1) << sdp->sd_sb.sb_bsize_shift; 821 next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
831 822
@@ -833,13 +824,15 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
833 if (mode & ~FALLOC_FL_KEEP_SIZE) 824 if (mode & ~FALLOC_FL_KEEP_SIZE)
834 return -EOPNOTSUPP; 825 return -EOPNOTSUPP;
835 826
836 offset = (offset >> sdp->sd_sb.sb_bsize_shift) << 827 offset &= bsize_mask;
837 sdp->sd_sb.sb_bsize_shift;
838 828
839 len = next - offset; 829 len = next - offset;
840 bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; 830 bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
841 if (!bytes) 831 if (!bytes)
842 bytes = UINT_MAX; 832 bytes = UINT_MAX;
833 bytes &= bsize_mask;
834 if (bytes == 0)
835 bytes = sdp->sd_sb.sb_bsize;
843 836
844 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); 837 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
845 error = gfs2_glock_nq(&ip->i_gh); 838 error = gfs2_glock_nq(&ip->i_gh);
@@ -870,6 +863,9 @@ retry:
870 if (error) { 863 if (error) {
871 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { 864 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
872 bytes >>= 1; 865 bytes >>= 1;
866 bytes &= bsize_mask;
867 if (bytes == 0)
868 bytes = sdp->sd_sb.sb_bsize;
873 goto retry; 869 goto retry;
874 } 870 }
875 goto out_qunlock; 871 goto out_qunlock;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 7a4fb630a320..2792a790e50b 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -143,14 +143,9 @@ static int demote_ok(const struct gfs2_glock *gl)
143{ 143{
144 const struct gfs2_glock_operations *glops = gl->gl_ops; 144 const struct gfs2_glock_operations *glops = gl->gl_ops;
145 145
146 /* assert_spin_locked(&gl->gl_spin); */
147
148 if (gl->gl_state == LM_ST_UNLOCKED) 146 if (gl->gl_state == LM_ST_UNLOCKED)
149 return 0; 147 return 0;
150 if (test_bit(GLF_LFLUSH, &gl->gl_flags)) 148 if (!list_empty(&gl->gl_holders))
151 return 0;
152 if ((gl->gl_name.ln_type != LM_TYPE_INODE) &&
153 !list_empty(&gl->gl_holders))
154 return 0; 149 return 0;
155 if (glops->go_demote_ok) 150 if (glops->go_demote_ok)
156 return glops->go_demote_ok(gl); 151 return glops->go_demote_ok(gl);
@@ -158,6 +153,31 @@ static int demote_ok(const struct gfs2_glock *gl)
158} 153}
159 154
160 155
156void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
157{
158 spin_lock(&lru_lock);
159
160 if (!list_empty(&gl->gl_lru))
161 list_del_init(&gl->gl_lru);
162 else
163 atomic_inc(&lru_count);
164
165 list_add_tail(&gl->gl_lru, &lru_list);
166 set_bit(GLF_LRU, &gl->gl_flags);
167 spin_unlock(&lru_lock);
168}
169
170static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
171{
172 spin_lock(&lru_lock);
173 if (!list_empty(&gl->gl_lru)) {
174 list_del_init(&gl->gl_lru);
175 atomic_dec(&lru_count);
176 clear_bit(GLF_LRU, &gl->gl_flags);
177 }
178 spin_unlock(&lru_lock);
179}
180
161/** 181/**
162 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list 182 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
163 * @gl: the glock 183 * @gl: the glock
@@ -168,24 +188,8 @@ static int demote_ok(const struct gfs2_glock *gl)
168 188
169static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) 189static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
170{ 190{
171 if (demote_ok(gl)) { 191 if (demote_ok(gl))
172 spin_lock(&lru_lock); 192 gfs2_glock_add_to_lru(gl);
173
174 if (!list_empty(&gl->gl_lru))
175 list_del_init(&gl->gl_lru);
176 else
177 atomic_inc(&lru_count);
178
179 list_add_tail(&gl->gl_lru, &lru_list);
180 spin_unlock(&lru_lock);
181 }
182}
183
184void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
185{
186 spin_lock(&gl->gl_spin);
187 __gfs2_glock_schedule_for_reclaim(gl);
188 spin_unlock(&gl->gl_spin);
189} 193}
190 194
191/** 195/**
@@ -217,12 +221,7 @@ void gfs2_glock_put(struct gfs2_glock *gl)
217 spin_lock_bucket(gl->gl_hash); 221 spin_lock_bucket(gl->gl_hash);
218 hlist_bl_del_rcu(&gl->gl_list); 222 hlist_bl_del_rcu(&gl->gl_list);
219 spin_unlock_bucket(gl->gl_hash); 223 spin_unlock_bucket(gl->gl_hash);
220 spin_lock(&lru_lock); 224 gfs2_glock_remove_from_lru(gl);
221 if (!list_empty(&gl->gl_lru)) {
222 list_del_init(&gl->gl_lru);
223 atomic_dec(&lru_count);
224 }
225 spin_unlock(&lru_lock);
226 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 225 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
227 GLOCK_BUG_ON(gl, mapping && mapping->nrpages); 226 GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
228 trace_gfs2_glock_put(gl); 227 trace_gfs2_glock_put(gl);
@@ -542,11 +541,6 @@ __acquires(&gl->gl_spin)
542 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); 541 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
543 542
544 gfs2_glock_hold(gl); 543 gfs2_glock_hold(gl);
545 if (target != LM_ST_UNLOCKED && (gl->gl_state == LM_ST_SHARED ||
546 gl->gl_state == LM_ST_DEFERRED) &&
547 !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
548 lck_flags |= LM_FLAG_TRY_1CB;
549
550 if (sdp->sd_lockstruct.ls_ops->lm_lock) { 544 if (sdp->sd_lockstruct.ls_ops->lm_lock) {
551 /* lock_dlm */ 545 /* lock_dlm */
552 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); 546 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
@@ -648,7 +642,7 @@ static void delete_work_func(struct work_struct *work)
648 /* Note: Unsafe to dereference ip as we don't hold right refs/locks */ 642 /* Note: Unsafe to dereference ip as we don't hold right refs/locks */
649 643
650 if (ip) 644 if (ip)
651 inode = gfs2_ilookup(sdp->sd_vfs, no_addr); 645 inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1);
652 else 646 else
653 inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); 647 inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
654 if (inode && !IS_ERR(inode)) { 648 if (inode && !IS_ERR(inode)) {
@@ -1025,6 +1019,9 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
1025 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 1019 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
1026 return -EIO; 1020 return -EIO;
1027 1021
1022 if (test_bit(GLF_LRU, &gl->gl_flags))
1023 gfs2_glock_remove_from_lru(gl);
1024
1028 spin_lock(&gl->gl_spin); 1025 spin_lock(&gl->gl_spin);
1029 add_to_queue(gh); 1026 add_to_queue(gh);
1030 if ((LM_FLAG_NOEXP & gh->gh_flags) && 1027 if ((LM_FLAG_NOEXP & gh->gh_flags) &&
@@ -1082,7 +1079,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1082 !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1079 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1083 fast_path = 1; 1080 fast_path = 1;
1084 } 1081 }
1085 __gfs2_glock_schedule_for_reclaim(gl); 1082 if (!test_bit(GLF_LFLUSH, &gl->gl_flags))
1083 __gfs2_glock_schedule_for_reclaim(gl);
1086 trace_gfs2_glock_queue(gh, 0); 1084 trace_gfs2_glock_queue(gh, 0);
1087 spin_unlock(&gl->gl_spin); 1085 spin_unlock(&gl->gl_spin);
1088 if (likely(fast_path)) 1086 if (likely(fast_path))
@@ -1348,11 +1346,14 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1348} 1346}
1349 1347
1350 1348
1351static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 1349static int gfs2_shrink_glock_memory(struct shrinker *shrink,
1350 struct shrink_control *sc)
1352{ 1351{
1353 struct gfs2_glock *gl; 1352 struct gfs2_glock *gl;
1354 int may_demote; 1353 int may_demote;
1355 int nr_skipped = 0; 1354 int nr_skipped = 0;
1355 int nr = sc->nr_to_scan;
1356 gfp_t gfp_mask = sc->gfp_mask;
1356 LIST_HEAD(skipped); 1357 LIST_HEAD(skipped);
1357 1358
1358 if (nr == 0) 1359 if (nr == 0)
@@ -1365,6 +1366,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
1365 while(nr && !list_empty(&lru_list)) { 1366 while(nr && !list_empty(&lru_list)) {
1366 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); 1367 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
1367 list_del_init(&gl->gl_lru); 1368 list_del_init(&gl->gl_lru);
1369 clear_bit(GLF_LRU, &gl->gl_flags);
1368 atomic_dec(&lru_count); 1370 atomic_dec(&lru_count);
1369 1371
1370 /* Test for being demotable */ 1372 /* Test for being demotable */
@@ -1387,6 +1389,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
1387 } 1389 }
1388 nr_skipped++; 1390 nr_skipped++;
1389 list_add(&gl->gl_lru, &skipped); 1391 list_add(&gl->gl_lru, &skipped);
1392 set_bit(GLF_LRU, &gl->gl_flags);
1390 } 1393 }
1391 list_splice(&skipped, &lru_list); 1394 list_splice(&skipped, &lru_list);
1392 atomic_add(nr_skipped, &lru_count); 1395 atomic_add(nr_skipped, &lru_count);
@@ -1459,12 +1462,7 @@ static void thaw_glock(struct gfs2_glock *gl)
1459 1462
1460static void clear_glock(struct gfs2_glock *gl) 1463static void clear_glock(struct gfs2_glock *gl)
1461{ 1464{
1462 spin_lock(&lru_lock); 1465 gfs2_glock_remove_from_lru(gl);
1463 if (!list_empty(&gl->gl_lru)) {
1464 list_del_init(&gl->gl_lru);
1465 atomic_dec(&lru_count);
1466 }
1467 spin_unlock(&lru_lock);
1468 1466
1469 spin_lock(&gl->gl_spin); 1467 spin_lock(&gl->gl_spin);
1470 if (gl->gl_state != LM_ST_UNLOCKED) 1468 if (gl->gl_state != LM_ST_UNLOCKED)
@@ -1599,9 +1597,11 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
1599 return 0; 1597 return 0;
1600} 1598}
1601 1599
1602static const char *gflags2str(char *buf, const unsigned long *gflags) 1600static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
1603{ 1601{
1602 const unsigned long *gflags = &gl->gl_flags;
1604 char *p = buf; 1603 char *p = buf;
1604
1605 if (test_bit(GLF_LOCK, gflags)) 1605 if (test_bit(GLF_LOCK, gflags))
1606 *p++ = 'l'; 1606 *p++ = 'l';
1607 if (test_bit(GLF_DEMOTE, gflags)) 1607 if (test_bit(GLF_DEMOTE, gflags))
@@ -1624,6 +1624,10 @@ static const char *gflags2str(char *buf, const unsigned long *gflags)
1624 *p++ = 'F'; 1624 *p++ = 'F';
1625 if (test_bit(GLF_QUEUED, gflags)) 1625 if (test_bit(GLF_QUEUED, gflags))
1626 *p++ = 'q'; 1626 *p++ = 'q';
1627 if (test_bit(GLF_LRU, gflags))
1628 *p++ = 'L';
1629 if (gl->gl_object)
1630 *p++ = 'o';
1627 *p = 0; 1631 *p = 0;
1628 return buf; 1632 return buf;
1629} 1633}
@@ -1658,14 +1662,15 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
1658 dtime *= 1000000/HZ; /* demote time in uSec */ 1662 dtime *= 1000000/HZ; /* demote time in uSec */
1659 if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) 1663 if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
1660 dtime = 0; 1664 dtime = 0;
1661 gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n", 1665 gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n",
1662 state2str(gl->gl_state), 1666 state2str(gl->gl_state),
1663 gl->gl_name.ln_type, 1667 gl->gl_name.ln_type,
1664 (unsigned long long)gl->gl_name.ln_number, 1668 (unsigned long long)gl->gl_name.ln_number,
1665 gflags2str(gflags_buf, &gl->gl_flags), 1669 gflags2str(gflags_buf, gl),
1666 state2str(gl->gl_target), 1670 state2str(gl->gl_target),
1667 state2str(gl->gl_demote_state), dtime, 1671 state2str(gl->gl_demote_state), dtime,
1668 atomic_read(&gl->gl_ail_count), 1672 atomic_read(&gl->gl_ail_count),
1673 atomic_read(&gl->gl_revokes),
1669 atomic_read(&gl->gl_ref)); 1674 atomic_read(&gl->gl_ref));
1670 1675
1671 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 1676 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index aea160690e94..6b2f757b9281 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -225,11 +225,10 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
225 225
226extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); 226extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
227extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret); 227extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
228extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
229extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); 228extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
230extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip); 229extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
231extern void gfs2_glock_thaw(struct gfs2_sbd *sdp); 230extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
232extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); 231extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl);
233extern void gfs2_glock_free(struct gfs2_glock *gl); 232extern void gfs2_glock_free(struct gfs2_glock *gl);
234 233
235extern int __init gfs2_glock_init(void); 234extern int __init gfs2_glock_init(void);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 25eeb2bcee47..8ef70f464731 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,33 +28,18 @@
28#include "trans.h" 28#include "trans.h"
29 29
30/** 30/**
31 * ail_empty_gl - remove all buffers for a given lock from the AIL 31 * __gfs2_ail_flush - remove all buffers for a given lock from the AIL
32 * @gl: the glock 32 * @gl: the glock
33 * 33 *
34 * None of the buffers should be dirty, locked, or pinned. 34 * None of the buffers should be dirty, locked, or pinned.
35 */ 35 */
36 36
37static void gfs2_ail_empty_gl(struct gfs2_glock *gl) 37static void __gfs2_ail_flush(struct gfs2_glock *gl)
38{ 38{
39 struct gfs2_sbd *sdp = gl->gl_sbd; 39 struct gfs2_sbd *sdp = gl->gl_sbd;
40 struct list_head *head = &gl->gl_ail_list; 40 struct list_head *head = &gl->gl_ail_list;
41 struct gfs2_bufdata *bd; 41 struct gfs2_bufdata *bd;
42 struct buffer_head *bh; 42 struct buffer_head *bh;
43 struct gfs2_trans tr;
44
45 memset(&tr, 0, sizeof(tr));
46 tr.tr_revokes = atomic_read(&gl->gl_ail_count);
47
48 if (!tr.tr_revokes)
49 return;
50
51 /* A shortened, inline version of gfs2_trans_begin() */
52 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
53 tr.tr_ip = (unsigned long)__builtin_return_address(0);
54 INIT_LIST_HEAD(&tr.tr_list_buf);
55 gfs2_log_reserve(sdp, tr.tr_reserved);
56 BUG_ON(current->journal_info);
57 current->journal_info = &tr;
58 43
59 spin_lock(&sdp->sd_ail_lock); 44 spin_lock(&sdp->sd_ail_lock);
60 while (!list_empty(head)) { 45 while (!list_empty(head)) {
@@ -76,7 +61,47 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
76 } 61 }
77 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); 62 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
78 spin_unlock(&sdp->sd_ail_lock); 63 spin_unlock(&sdp->sd_ail_lock);
64}
65
66
67static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
68{
69 struct gfs2_sbd *sdp = gl->gl_sbd;
70 struct gfs2_trans tr;
71
72 memset(&tr, 0, sizeof(tr));
73 tr.tr_revokes = atomic_read(&gl->gl_ail_count);
74
75 if (!tr.tr_revokes)
76 return;
77
78 /* A shortened, inline version of gfs2_trans_begin() */
79 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
80 tr.tr_ip = (unsigned long)__builtin_return_address(0);
81 INIT_LIST_HEAD(&tr.tr_list_buf);
82 gfs2_log_reserve(sdp, tr.tr_reserved);
83 BUG_ON(current->journal_info);
84 current->journal_info = &tr;
85
86 __gfs2_ail_flush(gl);
87
88 gfs2_trans_end(sdp);
89 gfs2_log_flush(sdp, NULL);
90}
91
92void gfs2_ail_flush(struct gfs2_glock *gl)
93{
94 struct gfs2_sbd *sdp = gl->gl_sbd;
95 unsigned int revokes = atomic_read(&gl->gl_ail_count);
96 int ret;
97
98 if (!revokes)
99 return;
79 100
101 ret = gfs2_trans_begin(sdp, 0, revokes);
102 if (ret)
103 return;
104 __gfs2_ail_flush(gl);
80 gfs2_trans_end(sdp); 105 gfs2_trans_end(sdp);
81 gfs2_log_flush(sdp, NULL); 106 gfs2_log_flush(sdp, NULL);
82} 107}
@@ -227,6 +252,119 @@ static int inode_go_demote_ok(const struct gfs2_glock *gl)
227} 252}
228 253
229/** 254/**
255 * gfs2_set_nlink - Set the inode's link count based on on-disk info
256 * @inode: The inode in question
257 * @nlink: The link count
258 *
259 * If the link count has hit zero, it must never be raised, whatever the
260 * on-disk inode might say. When new struct inodes are created the link
261 * count is set to 1, so that we can safely use this test even when reading
262 * in on disk information for the first time.
263 */
264
265static void gfs2_set_nlink(struct inode *inode, u32 nlink)
266{
267 /*
268 * We will need to review setting the nlink count here in the
269 * light of the forthcoming ro bind mount work. This is a reminder
270 * to do that.
271 */
272 if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) {
273 if (nlink == 0)
274 clear_nlink(inode);
275 else
276 inode->i_nlink = nlink;
277 }
278}
279
280static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
281{
282 const struct gfs2_dinode *str = buf;
283 struct timespec atime;
284 u16 height, depth;
285
286 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
287 goto corrupt;
288 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
289 ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
290 ip->i_inode.i_rdev = 0;
291 switch (ip->i_inode.i_mode & S_IFMT) {
292 case S_IFBLK:
293 case S_IFCHR:
294 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
295 be32_to_cpu(str->di_minor));
296 break;
297 };
298
299 ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
300 ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
301 gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
302 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
303 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
304 atime.tv_sec = be64_to_cpu(str->di_atime);
305 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
306 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
307 ip->i_inode.i_atime = atime;
308 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
309 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
310 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
311 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
312
313 ip->i_goal = be64_to_cpu(str->di_goal_meta);
314 ip->i_generation = be64_to_cpu(str->di_generation);
315
316 ip->i_diskflags = be32_to_cpu(str->di_flags);
317 gfs2_set_inode_flags(&ip->i_inode);
318 height = be16_to_cpu(str->di_height);
319 if (unlikely(height > GFS2_MAX_META_HEIGHT))
320 goto corrupt;
321 ip->i_height = (u8)height;
322
323 depth = be16_to_cpu(str->di_depth);
324 if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
325 goto corrupt;
326 ip->i_depth = (u8)depth;
327 ip->i_entries = be32_to_cpu(str->di_entries);
328
329 ip->i_eattr = be64_to_cpu(str->di_eattr);
330 if (S_ISREG(ip->i_inode.i_mode))
331 gfs2_set_aops(&ip->i_inode);
332
333 return 0;
334corrupt:
335 gfs2_consist_inode(ip);
336 return -EIO;
337}
338
339/**
340 * gfs2_inode_refresh - Refresh the incore copy of the dinode
341 * @ip: The GFS2 inode
342 *
343 * Returns: errno
344 */
345
346int gfs2_inode_refresh(struct gfs2_inode *ip)
347{
348 struct buffer_head *dibh;
349 int error;
350
351 error = gfs2_meta_inode_buffer(ip, &dibh);
352 if (error)
353 return error;
354
355 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
356 brelse(dibh);
357 return -EIO;
358 }
359
360 error = gfs2_dinode_in(ip, dibh->b_data);
361 brelse(dibh);
362 clear_bit(GIF_INVALID, &ip->i_flags);
363
364 return error;
365}
366
367/**
230 * inode_go_lock - operation done after an inode lock is locked by a process 368 * inode_go_lock - operation done after an inode lock is locked by a process
231 * @gl: the glock 369 * @gl: the glock
232 * @flags: 370 * @flags:
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index b3aa2e3210fd..6fce409b5a50 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -23,4 +23,6 @@ extern const struct gfs2_glock_operations gfs2_quota_glops;
23extern const struct gfs2_glock_operations gfs2_journal_glops; 23extern const struct gfs2_glock_operations gfs2_journal_glops;
24extern const struct gfs2_glock_operations *gfs2_glops_list[]; 24extern const struct gfs2_glock_operations *gfs2_glops_list[];
25 25
26extern void gfs2_ail_flush(struct gfs2_glock *gl);
27
26#endif /* __GLOPS_DOT_H__ */ 28#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 870a89d6d4dc..0a064e91ac70 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -20,7 +20,6 @@
20 20
21#define DIO_WAIT 0x00000010 21#define DIO_WAIT 0x00000010
22#define DIO_METADATA 0x00000020 22#define DIO_METADATA 0x00000020
23#define DIO_ALL 0x00000100
24 23
25struct gfs2_log_operations; 24struct gfs2_log_operations;
26struct gfs2_log_element; 25struct gfs2_log_element;
@@ -200,6 +199,8 @@ enum {
200 GLF_INITIAL = 10, 199 GLF_INITIAL = 10,
201 GLF_FROZEN = 11, 200 GLF_FROZEN = 11,
202 GLF_QUEUED = 12, 201 GLF_QUEUED = 12,
202 GLF_LRU = 13,
203 GLF_OBJECT = 14, /* Used only for tracing */
203}; 204};
204 205
205struct gfs2_glock { 206struct gfs2_glock {
@@ -234,6 +235,7 @@ struct gfs2_glock {
234 235
235 struct list_head gl_ail_list; 236 struct list_head gl_ail_list;
236 atomic_t gl_ail_count; 237 atomic_t gl_ail_count;
238 atomic_t gl_revokes;
237 struct delayed_work gl_work; 239 struct delayed_work gl_work;
238 struct work_struct gl_delete; 240 struct work_struct gl_delete;
239 struct rcu_head gl_rcu; 241 struct rcu_head gl_rcu;
@@ -374,8 +376,6 @@ struct gfs2_ail {
374 unsigned int ai_first; 376 unsigned int ai_first;
375 struct list_head ai_ail1_list; 377 struct list_head ai_ail1_list;
376 struct list_head ai_ail2_list; 378 struct list_head ai_ail2_list;
377
378 u64 ai_sync_gen;
379}; 379};
380 380
381struct gfs2_journal_extent { 381struct gfs2_journal_extent {
@@ -488,7 +488,6 @@ struct gfs2_sb_host {
488 488
489 char sb_lockproto[GFS2_LOCKNAME_LEN]; 489 char sb_lockproto[GFS2_LOCKNAME_LEN];
490 char sb_locktable[GFS2_LOCKNAME_LEN]; 490 char sb_locktable[GFS2_LOCKNAME_LEN];
491 u8 sb_uuid[16];
492}; 491};
493 492
494/* 493/*
@@ -654,7 +653,6 @@ struct gfs2_sbd {
654 spinlock_t sd_ail_lock; 653 spinlock_t sd_ail_lock;
655 struct list_head sd_ail1_list; 654 struct list_head sd_ail1_list;
656 struct list_head sd_ail2_list; 655 struct list_head sd_ail2_list;
657 u64 sd_ail_sync_gen;
658 656
659 /* Replay stuff */ 657 /* Replay stuff */
660 658
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 9134dcb89479..03e0c529063e 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1,23 +1,25 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2. 7 * of the GNU General Public License version 2.
8 */ 8 */
9 9
10#include <linux/sched.h>
11#include <linux/slab.h> 10#include <linux/slab.h>
12#include <linux/spinlock.h> 11#include <linux/spinlock.h>
13#include <linux/completion.h> 12#include <linux/completion.h>
14#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/namei.h>
15#include <linux/mm.h>
16#include <linux/xattr.h>
15#include <linux/posix_acl.h> 17#include <linux/posix_acl.h>
16#include <linux/sort.h>
17#include <linux/gfs2_ondisk.h> 18#include <linux/gfs2_ondisk.h>
18#include <linux/crc32.h> 19#include <linux/crc32.h>
20#include <linux/fiemap.h>
19#include <linux/security.h> 21#include <linux/security.h>
20#include <linux/time.h> 22#include <asm/uaccess.h>
21 23
22#include "gfs2.h" 24#include "gfs2.h"
23#include "incore.h" 25#include "incore.h"
@@ -26,19 +28,14 @@
26#include "dir.h" 28#include "dir.h"
27#include "xattr.h" 29#include "xattr.h"
28#include "glock.h" 30#include "glock.h"
29#include "glops.h"
30#include "inode.h" 31#include "inode.h"
31#include "log.h"
32#include "meta_io.h" 32#include "meta_io.h"
33#include "quota.h" 33#include "quota.h"
34#include "rgrp.h" 34#include "rgrp.h"
35#include "trans.h" 35#include "trans.h"
36#include "util.h" 36#include "util.h"
37 37#include "super.h"
38struct gfs2_inum_range_host { 38#include "glops.h"
39 u64 ir_start;
40 u64 ir_length;
41};
42 39
43struct gfs2_skip_data { 40struct gfs2_skip_data {
44 u64 no_addr; 41 u64 no_addr;
@@ -74,14 +71,14 @@ static int iget_set(struct inode *inode, void *opaque)
74 return 0; 71 return 0;
75} 72}
76 73
77struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) 74struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
78{ 75{
79 unsigned long hash = (unsigned long)no_addr; 76 unsigned long hash = (unsigned long)no_addr;
80 struct gfs2_skip_data data; 77 struct gfs2_skip_data data;
81 78
82 data.no_addr = no_addr; 79 data.no_addr = no_addr;
83 data.skipped = 0; 80 data.skipped = 0;
84 data.non_block = 0; 81 data.non_block = non_block;
85 return ilookup5(sb, hash, iget_test, &data); 82 return ilookup5(sb, hash, iget_test, &data);
86} 83}
87 84
@@ -248,203 +245,6 @@ fail_iput:
248 goto fail; 245 goto fail;
249} 246}
250 247
251static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
252{
253 const struct gfs2_dinode *str = buf;
254 struct timespec atime;
255 u16 height, depth;
256
257 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
258 goto corrupt;
259 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
260 ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
261 ip->i_inode.i_rdev = 0;
262 switch (ip->i_inode.i_mode & S_IFMT) {
263 case S_IFBLK:
264 case S_IFCHR:
265 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
266 be32_to_cpu(str->di_minor));
267 break;
268 };
269
270 ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
271 ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
272 /*
273 * We will need to review setting the nlink count here in the
274 * light of the forthcoming ro bind mount work. This is a reminder
275 * to do that.
276 */
277 ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
278 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
279 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
280 atime.tv_sec = be64_to_cpu(str->di_atime);
281 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
282 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
283 ip->i_inode.i_atime = atime;
284 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
285 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
286 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
287 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
288
289 ip->i_goal = be64_to_cpu(str->di_goal_meta);
290 ip->i_generation = be64_to_cpu(str->di_generation);
291
292 ip->i_diskflags = be32_to_cpu(str->di_flags);
293 gfs2_set_inode_flags(&ip->i_inode);
294 height = be16_to_cpu(str->di_height);
295 if (unlikely(height > GFS2_MAX_META_HEIGHT))
296 goto corrupt;
297 ip->i_height = (u8)height;
298
299 depth = be16_to_cpu(str->di_depth);
300 if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
301 goto corrupt;
302 ip->i_depth = (u8)depth;
303 ip->i_entries = be32_to_cpu(str->di_entries);
304
305 ip->i_eattr = be64_to_cpu(str->di_eattr);
306 if (S_ISREG(ip->i_inode.i_mode))
307 gfs2_set_aops(&ip->i_inode);
308
309 return 0;
310corrupt:
311 if (gfs2_consist_inode(ip))
312 gfs2_dinode_print(ip);
313 return -EIO;
314}
315
316/**
317 * gfs2_inode_refresh - Refresh the incore copy of the dinode
318 * @ip: The GFS2 inode
319 *
320 * Returns: errno
321 */
322
323int gfs2_inode_refresh(struct gfs2_inode *ip)
324{
325 struct buffer_head *dibh;
326 int error;
327
328 error = gfs2_meta_inode_buffer(ip, &dibh);
329 if (error)
330 return error;
331
332 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
333 brelse(dibh);
334 return -EIO;
335 }
336
337 error = gfs2_dinode_in(ip, dibh->b_data);
338 brelse(dibh);
339 clear_bit(GIF_INVALID, &ip->i_flags);
340
341 return error;
342}
343
344int gfs2_dinode_dealloc(struct gfs2_inode *ip)
345{
346 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
347 struct gfs2_alloc *al;
348 struct gfs2_rgrpd *rgd;
349 int error;
350
351 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
352 if (gfs2_consist_inode(ip))
353 gfs2_dinode_print(ip);
354 return -EIO;
355 }
356
357 al = gfs2_alloc_get(ip);
358 if (!al)
359 return -ENOMEM;
360
361 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
362 if (error)
363 goto out;
364
365 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
366 if (error)
367 goto out_qs;
368
369 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
370 if (!rgd) {
371 gfs2_consist_inode(ip);
372 error = -EIO;
373 goto out_rindex_relse;
374 }
375
376 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
377 &al->al_rgd_gh);
378 if (error)
379 goto out_rindex_relse;
380
381 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1);
382 if (error)
383 goto out_rg_gunlock;
384
385 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
386 set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags);
387
388 gfs2_free_di(rgd, ip);
389
390 gfs2_trans_end(sdp);
391
392out_rg_gunlock:
393 gfs2_glock_dq_uninit(&al->al_rgd_gh);
394out_rindex_relse:
395 gfs2_glock_dq_uninit(&al->al_ri_gh);
396out_qs:
397 gfs2_quota_unhold(ip);
398out:
399 gfs2_alloc_put(ip);
400 return error;
401}
402
403/**
404 * gfs2_change_nlink - Change nlink count on inode
405 * @ip: The GFS2 inode
406 * @diff: The change in the nlink count required
407 *
408 * Returns: errno
409 */
410int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
411{
412 struct buffer_head *dibh;
413 u32 nlink;
414 int error;
415
416 BUG_ON(diff != 1 && diff != -1);
417 nlink = ip->i_inode.i_nlink + diff;
418
419 /* If we are reducing the nlink count, but the new value ends up being
420 bigger than the old one, we must have underflowed. */
421 if (diff < 0 && nlink > ip->i_inode.i_nlink) {
422 if (gfs2_consist_inode(ip))
423 gfs2_dinode_print(ip);
424 return -EIO;
425 }
426
427 error = gfs2_meta_inode_buffer(ip, &dibh);
428 if (error)
429 return error;
430
431 if (diff > 0)
432 inc_nlink(&ip->i_inode);
433 else
434 drop_nlink(&ip->i_inode);
435
436 ip->i_inode.i_ctime = CURRENT_TIME;
437
438 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
439 gfs2_dinode_out(ip, dibh->b_data);
440 brelse(dibh);
441 mark_inode_dirty(&ip->i_inode);
442
443 if (ip->i_inode.i_nlink == 0)
444 gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
445
446 return error;
447}
448 248
449struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) 249struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
450{ 250{
@@ -543,7 +343,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
543 343
544 /* Don't create entries in an unlinked directory */ 344 /* Don't create entries in an unlinked directory */
545 if (!dip->i_inode.i_nlink) 345 if (!dip->i_inode.i_nlink)
546 return -EPERM; 346 return -ENOENT;
547 347
548 error = gfs2_dir_check(&dip->i_inode, name, NULL); 348 error = gfs2_dir_check(&dip->i_inode, name, NULL);
549 switch (error) { 349 switch (error) {
@@ -613,21 +413,44 @@ out:
613 return error; 413 return error;
614} 414}
615 415
416static void gfs2_init_dir(struct buffer_head *dibh,
417 const struct gfs2_inode *parent)
418{
419 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
420 struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
421
422 gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
423 dent->de_inum = di->di_num; /* already GFS2 endian */
424 dent->de_type = cpu_to_be16(DT_DIR);
425
426 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
427 gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
428 gfs2_inum_out(parent, dent);
429 dent->de_type = cpu_to_be16(DT_DIR);
430
431}
432
616/** 433/**
617 * init_dinode - Fill in a new dinode structure 434 * init_dinode - Fill in a new dinode structure
618 * @dip: the directory this inode is being created in 435 * @dip: The directory this inode is being created in
619 * @gl: The glock covering the new inode 436 * @gl: The glock covering the new inode
620 * @inum: the inode number 437 * @inum: The inode number
621 * @mode: the file permissions 438 * @mode: The file permissions
622 * @uid: 439 * @uid: The uid of the new inode
623 * @gid: 440 * @gid: The gid of the new inode
441 * @generation: The generation number of the new inode
442 * @dev: The device number (if a device node)
443 * @symname: The symlink destination (if a symlink)
444 * @size: The inode size (ignored for directories)
445 * @bhp: The buffer head (returned to caller)
624 * 446 *
625 */ 447 */
626 448
627static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 449static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
628 const struct gfs2_inum_host *inum, unsigned int mode, 450 const struct gfs2_inum_host *inum, unsigned int mode,
629 unsigned int uid, unsigned int gid, 451 unsigned int uid, unsigned int gid,
630 const u64 *generation, dev_t dev, struct buffer_head **bhp) 452 const u64 *generation, dev_t dev, const char *symname,
453 unsigned size, struct buffer_head **bhp)
631{ 454{
632 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 455 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
633 struct gfs2_dinode *di; 456 struct gfs2_dinode *di;
@@ -646,7 +469,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
646 di->di_uid = cpu_to_be32(uid); 469 di->di_uid = cpu_to_be32(uid);
647 di->di_gid = cpu_to_be32(gid); 470 di->di_gid = cpu_to_be32(gid);
648 di->di_nlink = 0; 471 di->di_nlink = 0;
649 di->di_size = 0; 472 di->di_size = cpu_to_be64(size);
650 di->di_blocks = cpu_to_be64(1); 473 di->di_blocks = cpu_to_be64(1);
651 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); 474 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
652 di->di_major = cpu_to_be32(MAJOR(dev)); 475 di->di_major = cpu_to_be32(MAJOR(dev));
@@ -654,16 +477,6 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
654 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); 477 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
655 di->di_generation = cpu_to_be64(*generation); 478 di->di_generation = cpu_to_be64(*generation);
656 di->di_flags = 0; 479 di->di_flags = 0;
657
658 if (S_ISREG(mode)) {
659 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
660 gfs2_tune_get(sdp, gt_new_files_jdata))
661 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
662 } else if (S_ISDIR(mode)) {
663 di->di_flags |= cpu_to_be32(dip->i_diskflags &
664 GFS2_DIF_INHERIT_JDATA);
665 }
666
667 di->__pad1 = 0; 480 di->__pad1 = 0;
668 di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); 481 di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0);
669 di->di_height = 0; 482 di->di_height = 0;
@@ -677,7 +490,26 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
677 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); 490 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
678 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); 491 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
679 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 492 memset(&di->di_reserved, 0, sizeof(di->di_reserved));
680 493
494 switch(mode & S_IFMT) {
495 case S_IFREG:
496 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
497 gfs2_tune_get(sdp, gt_new_files_jdata))
498 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
499 break;
500 case S_IFDIR:
501 di->di_flags |= cpu_to_be32(dip->i_diskflags &
502 GFS2_DIF_INHERIT_JDATA);
503 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
504 di->di_size = cpu_to_be64(sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
505 di->di_entries = cpu_to_be32(2);
506 gfs2_init_dir(dibh, dip);
507 break;
508 case S_IFLNK:
509 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, size);
510 break;
511 }
512
681 set_buffer_uptodate(dibh); 513 set_buffer_uptodate(dibh);
682 514
683 *bhp = dibh; 515 *bhp = dibh;
@@ -685,7 +517,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
685 517
686static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 518static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
687 unsigned int mode, const struct gfs2_inum_host *inum, 519 unsigned int mode, const struct gfs2_inum_host *inum,
688 const u64 *generation, dev_t dev, struct buffer_head **bhp) 520 const u64 *generation, dev_t dev, const char *symname,
521 unsigned int size, struct buffer_head **bhp)
689{ 522{
690 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 523 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
691 unsigned int uid, gid; 524 unsigned int uid, gid;
@@ -707,7 +540,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
707 if (error) 540 if (error)
708 goto out_quota; 541 goto out_quota;
709 542
710 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp); 543 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, symname, size, bhp);
711 gfs2_quota_change(dip, +1, uid, gid); 544 gfs2_quota_change(dip, +1, uid, gid);
712 gfs2_trans_end(sdp); 545 gfs2_trans_end(sdp);
713 546
@@ -761,14 +594,16 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
761 goto fail_quota_locks; 594 goto fail_quota_locks;
762 } 595 }
763 596
764 error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode)); 597 error = gfs2_dir_add(&dip->i_inode, name, ip);
765 if (error) 598 if (error)
766 goto fail_end_trans; 599 goto fail_end_trans;
767 600
768 error = gfs2_meta_inode_buffer(ip, &dibh); 601 error = gfs2_meta_inode_buffer(ip, &dibh);
769 if (error) 602 if (error)
770 goto fail_end_trans; 603 goto fail_end_trans;
771 ip->i_inode.i_nlink = 1; 604 inc_nlink(&ip->i_inode);
605 if (S_ISDIR(ip->i_inode.i_mode))
606 inc_nlink(&ip->i_inode);
772 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 607 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
773 gfs2_dinode_out(ip, dibh->b_data); 608 gfs2_dinode_out(ip, dibh->b_data);
774 brelse(dibh); 609 brelse(dibh);
@@ -815,27 +650,25 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
815} 650}
816 651
817/** 652/**
818 * gfs2_createi - Create a new inode 653 * gfs2_create_inode - Create a new inode
819 * @ghs: An array of two holders 654 * @dir: The parent directory
820 * @name: The name of the new file 655 * @dentry: The new dentry
821 * @mode: the permissions on the new inode 656 * @mode: The permissions on the new inode
822 * 657 * @dev: For device nodes, this is the device number
823 * @ghs[0] is an initialized holder for the directory 658 * @symname: For symlinks, this is the link destination
824 * @ghs[1] is the holder for the inode lock 659 * @size: The initial size of the inode (ignored for directories)
825 * 660 *
826 * If the return value is not NULL, the glocks on both the directory and the new 661 * Returns: 0 on success, or error code
827 * file are held. A transaction has been started and an inplace reservation
828 * is held, as well.
829 *
830 * Returns: An inode
831 */ 662 */
832 663
833struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, 664static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
834 unsigned int mode, dev_t dev) 665 unsigned int mode, dev_t dev, const char *symname,
666 unsigned int size)
835{ 667{
668 const struct qstr *name = &dentry->d_name;
669 struct gfs2_holder ghs[2];
836 struct inode *inode = NULL; 670 struct inode *inode = NULL;
837 struct gfs2_inode *dip = ghs->gh_gl->gl_object; 671 struct gfs2_inode *dip = GFS2_I(dir);
838 struct inode *dir = &dip->i_inode;
839 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 672 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
840 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; 673 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
841 int error; 674 int error;
@@ -843,10 +676,9 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
843 struct buffer_head *bh = NULL; 676 struct buffer_head *bh = NULL;
844 677
845 if (!name->len || name->len > GFS2_FNAMESIZE) 678 if (!name->len || name->len > GFS2_FNAMESIZE)
846 return ERR_PTR(-ENAMETOOLONG); 679 return -ENAMETOOLONG;
847 680
848 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); 681 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
849 error = gfs2_glock_nq(ghs);
850 if (error) 682 if (error)
851 goto fail; 683 goto fail;
852 684
@@ -864,7 +696,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
864 if (error) 696 if (error)
865 goto fail_gunlock; 697 goto fail_gunlock;
866 698
867 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh); 699 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, symname, size, &bh);
868 if (error) 700 if (error)
869 goto fail_gunlock2; 701 goto fail_gunlock2;
870 702
@@ -891,18 +723,852 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
891 723
892 if (bh) 724 if (bh)
893 brelse(bh); 725 brelse(bh);
894 return inode; 726
727 gfs2_trans_end(sdp);
728 if (dip->i_alloc->al_rgd)
729 gfs2_inplace_release(dip);
730 gfs2_quota_unlock(dip);
731 gfs2_alloc_put(dip);
732 gfs2_glock_dq_uninit_m(2, ghs);
733 mark_inode_dirty(inode);
734 d_instantiate(dentry, inode);
735 return 0;
895 736
896fail_gunlock2: 737fail_gunlock2:
897 gfs2_glock_dq_uninit(ghs + 1); 738 gfs2_glock_dq_uninit(ghs + 1);
898 if (inode && !IS_ERR(inode)) 739 if (inode && !IS_ERR(inode))
899 iput(inode); 740 iput(inode);
900fail_gunlock: 741fail_gunlock:
901 gfs2_glock_dq(ghs); 742 gfs2_glock_dq_uninit(ghs);
902fail: 743fail:
903 if (bh) 744 if (bh)
904 brelse(bh); 745 brelse(bh);
905 return ERR_PTR(error); 746 return error;
747}
748
749/**
750 * gfs2_create - Create a file
751 * @dir: The directory in which to create the file
752 * @dentry: The dentry of the new file
753 * @mode: The mode of the new file
754 *
755 * Returns: errno
756 */
757
758static int gfs2_create(struct inode *dir, struct dentry *dentry,
759 int mode, struct nameidata *nd)
760{
761 struct inode *inode;
762 int ret;
763
764 for (;;) {
765 ret = gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0);
766 if (ret != -EEXIST || (nd && (nd->flags & LOOKUP_EXCL)))
767 return ret;
768
769 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
770 if (inode) {
771 if (!IS_ERR(inode))
772 break;
773 return PTR_ERR(inode);
774 }
775 }
776
777 d_instantiate(dentry, inode);
778 return 0;
779}
780
781/**
782 * gfs2_lookup - Look up a filename in a directory and return its inode
783 * @dir: The directory inode
784 * @dentry: The dentry of the new inode
785 * @nd: passed from Linux VFS, ignored by us
786 *
787 * Called by the VFS layer. Lock dir and call gfs2_lookupi()
788 *
789 * Returns: errno
790 */
791
792static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
793 struct nameidata *nd)
794{
795 struct inode *inode = NULL;
796
797 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
798 if (inode && IS_ERR(inode))
799 return ERR_CAST(inode);
800
801 if (inode) {
802 struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
803 struct gfs2_holder gh;
804 int error;
805 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
806 if (error) {
807 iput(inode);
808 return ERR_PTR(error);
809 }
810 gfs2_glock_dq_uninit(&gh);
811 return d_splice_alias(inode, dentry);
812 }
813 d_add(dentry, inode);
814
815 return NULL;
816}
817
818/**
819 * gfs2_link - Link to a file
820 * @old_dentry: The inode to link
821 * @dir: Add link to this directory
822 * @dentry: The name of the link
823 *
824 * Link the inode in "old_dentry" into the directory "dir" with the
825 * name in "dentry".
826 *
827 * Returns: errno
828 */
829
830static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
831 struct dentry *dentry)
832{
833 struct gfs2_inode *dip = GFS2_I(dir);
834 struct gfs2_sbd *sdp = GFS2_SB(dir);
835 struct inode *inode = old_dentry->d_inode;
836 struct gfs2_inode *ip = GFS2_I(inode);
837 struct gfs2_holder ghs[2];
838 struct buffer_head *dibh;
839 int alloc_required;
840 int error;
841
842 if (S_ISDIR(inode->i_mode))
843 return -EPERM;
844
845 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
846 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
847
848 error = gfs2_glock_nq(ghs); /* parent */
849 if (error)
850 goto out_parent;
851
852 error = gfs2_glock_nq(ghs + 1); /* child */
853 if (error)
854 goto out_child;
855
856 error = -ENOENT;
857 if (inode->i_nlink == 0)
858 goto out_gunlock;
859
860 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
861 if (error)
862 goto out_gunlock;
863
864 error = gfs2_dir_check(dir, &dentry->d_name, NULL);
865 switch (error) {
866 case -ENOENT:
867 break;
868 case 0:
869 error = -EEXIST;
870 default:
871 goto out_gunlock;
872 }
873
874 error = -EINVAL;
875 if (!dip->i_inode.i_nlink)
876 goto out_gunlock;
877 error = -EFBIG;
878 if (dip->i_entries == (u32)-1)
879 goto out_gunlock;
880 error = -EPERM;
881 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
882 goto out_gunlock;
883 error = -EINVAL;
884 if (!ip->i_inode.i_nlink)
885 goto out_gunlock;
886 error = -EMLINK;
887 if (ip->i_inode.i_nlink == (u32)-1)
888 goto out_gunlock;
889
890 alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
891 if (error < 0)
892 goto out_gunlock;
893 error = 0;
894
895 if (alloc_required) {
896 struct gfs2_alloc *al = gfs2_alloc_get(dip);
897 if (!al) {
898 error = -ENOMEM;
899 goto out_gunlock;
900 }
901
902 error = gfs2_quota_lock_check(dip);
903 if (error)
904 goto out_alloc;
905
906 al->al_requested = sdp->sd_max_dirres;
907
908 error = gfs2_inplace_reserve(dip);
909 if (error)
910 goto out_gunlock_q;
911
912 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
913 gfs2_rg_blocks(al) +
914 2 * RES_DINODE + RES_STATFS +
915 RES_QUOTA, 0);
916 if (error)
917 goto out_ipres;
918 } else {
919 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
920 if (error)
921 goto out_ipres;
922 }
923
924 error = gfs2_meta_inode_buffer(ip, &dibh);
925 if (error)
926 goto out_end_trans;
927
928 error = gfs2_dir_add(dir, &dentry->d_name, ip);
929 if (error)
930 goto out_brelse;
931
932 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
933 inc_nlink(&ip->i_inode);
934 ip->i_inode.i_ctime = CURRENT_TIME;
935 gfs2_dinode_out(ip, dibh->b_data);
936 mark_inode_dirty(&ip->i_inode);
937
938out_brelse:
939 brelse(dibh);
940out_end_trans:
941 gfs2_trans_end(sdp);
942out_ipres:
943 if (alloc_required)
944 gfs2_inplace_release(dip);
945out_gunlock_q:
946 if (alloc_required)
947 gfs2_quota_unlock(dip);
948out_alloc:
949 if (alloc_required)
950 gfs2_alloc_put(dip);
951out_gunlock:
952 gfs2_glock_dq(ghs + 1);
953out_child:
954 gfs2_glock_dq(ghs);
955out_parent:
956 gfs2_holder_uninit(ghs);
957 gfs2_holder_uninit(ghs + 1);
958 if (!error) {
959 ihold(inode);
960 d_instantiate(dentry, inode);
961 mark_inode_dirty(inode);
962 }
963 return error;
964}
965
966/*
967 * gfs2_unlink_ok - check to see that a inode is still in a directory
968 * @dip: the directory
969 * @name: the name of the file
970 * @ip: the inode
971 *
972 * Assumes that the lock on (at least) @dip is held.
973 *
974 * Returns: 0 if the parent/child relationship is correct, errno if it isn't
975 */
976
977static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
978 const struct gfs2_inode *ip)
979{
980 int error;
981
982 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
983 return -EPERM;
984
985 if ((dip->i_inode.i_mode & S_ISVTX) &&
986 dip->i_inode.i_uid != current_fsuid() &&
987 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
988 return -EPERM;
989
990 if (IS_APPEND(&dip->i_inode))
991 return -EPERM;
992
993 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
994 if (error)
995 return error;
996
997 error = gfs2_dir_check(&dip->i_inode, name, ip);
998 if (error)
999 return error;
1000
1001 return 0;
1002}
1003
1004/**
1005 * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it
1006 * @dip: The parent directory
1007 * @name: The name of the entry in the parent directory
1008 * @bh: The inode buffer for the inode to be removed
1009 * @inode: The inode to be removed
1010 *
1011 * Called with all the locks and in a transaction. This will only be
1012 * called for a directory after it has been checked to ensure it is empty.
1013 *
1014 * Returns: 0 on success, or an error
1015 */
1016
1017static int gfs2_unlink_inode(struct gfs2_inode *dip,
1018 const struct dentry *dentry,
1019 struct buffer_head *bh)
1020{
1021 struct inode *inode = dentry->d_inode;
1022 struct gfs2_inode *ip = GFS2_I(inode);
1023 int error;
1024
1025 error = gfs2_dir_del(dip, dentry);
1026 if (error)
1027 return error;
1028
1029 ip->i_entries = 0;
1030 inode->i_ctime = CURRENT_TIME;
1031 if (S_ISDIR(inode->i_mode))
1032 clear_nlink(inode);
1033 else
1034 drop_nlink(inode);
1035 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1036 gfs2_dinode_out(ip, bh->b_data);
1037 mark_inode_dirty(inode);
1038 if (inode->i_nlink == 0)
1039 gfs2_unlink_di(inode);
1040 return 0;
1041}
1042
1043
1044/**
1045 * gfs2_unlink - Unlink an inode (this does rmdir as well)
1046 * @dir: The inode of the directory containing the inode to unlink
1047 * @dentry: The file itself
1048 *
1049 * This routine uses the type of the inode as a flag to figure out
1050 * whether this is an unlink or an rmdir.
1051 *
1052 * Returns: errno
1053 */
1054
1055static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
1056{
1057 struct gfs2_inode *dip = GFS2_I(dir);
1058 struct gfs2_sbd *sdp = GFS2_SB(dir);
1059 struct inode *inode = dentry->d_inode;
1060 struct gfs2_inode *ip = GFS2_I(inode);
1061 struct buffer_head *bh;
1062 struct gfs2_holder ghs[3];
1063 struct gfs2_rgrpd *rgd;
1064 struct gfs2_holder ri_gh;
1065 int error;
1066
1067 error = gfs2_rindex_hold(sdp, &ri_gh);
1068 if (error)
1069 return error;
1070
1071 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1072 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
1073
1074 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
1075 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
1076
1077
1078 error = gfs2_glock_nq(ghs); /* parent */
1079 if (error)
1080 goto out_parent;
1081
1082 error = gfs2_glock_nq(ghs + 1); /* child */
1083 if (error)
1084 goto out_child;
1085
1086 error = -ENOENT;
1087 if (inode->i_nlink == 0)
1088 goto out_rgrp;
1089
1090 if (S_ISDIR(inode->i_mode)) {
1091 error = -ENOTEMPTY;
1092 if (ip->i_entries > 2 || inode->i_nlink > 2)
1093 goto out_rgrp;
1094 }
1095
1096 error = gfs2_glock_nq(ghs + 2); /* rgrp */
1097 if (error)
1098 goto out_rgrp;
1099
1100 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
1101 if (error)
1102 goto out_gunlock;
1103
1104 error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0);
1105 if (error)
1106 goto out_gunlock;
1107
1108 error = gfs2_meta_inode_buffer(ip, &bh);
1109 if (error)
1110 goto out_end_trans;
1111
1112 error = gfs2_unlink_inode(dip, dentry, bh);
1113 brelse(bh);
1114
1115out_end_trans:
1116 gfs2_trans_end(sdp);
1117out_gunlock:
1118 gfs2_glock_dq(ghs + 2);
1119out_rgrp:
1120 gfs2_holder_uninit(ghs + 2);
1121 gfs2_glock_dq(ghs + 1);
1122out_child:
1123 gfs2_holder_uninit(ghs + 1);
1124 gfs2_glock_dq(ghs);
1125out_parent:
1126 gfs2_holder_uninit(ghs);
1127 gfs2_glock_dq_uninit(&ri_gh);
1128 return error;
1129}
1130
1131/**
1132 * gfs2_symlink - Create a symlink
1133 * @dir: The directory to create the symlink in
1134 * @dentry: The dentry to put the symlink in
1135 * @symname: The thing which the link points to
1136 *
1137 * Returns: errno
1138 */
1139
1140static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
1141 const char *symname)
1142{
1143 struct gfs2_sbd *sdp = GFS2_SB(dir);
1144 unsigned int size;
1145
1146 size = strlen(symname);
1147 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
1148 return -ENAMETOOLONG;
1149
1150 return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size);
1151}
1152
1153/**
1154 * gfs2_mkdir - Make a directory
1155 * @dir: The parent directory of the new one
1156 * @dentry: The dentry of the new directory
1157 * @mode: The mode of the new directory
1158 *
1159 * Returns: errno
1160 */
1161
1162static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1163{
1164 return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0);
1165}
1166
1167/**
1168 * gfs2_mknod - Make a special file
1169 * @dir: The directory in which the special file will reside
1170 * @dentry: The dentry of the special file
1171 * @mode: The mode of the special file
1172 * @dev: The device specification of the special file
1173 *
1174 */
1175
1176static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
1177 dev_t dev)
1178{
1179 return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0);
1180}
1181
1182/*
1183 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
1184 * @this: move this
1185 * @to: to here
1186 *
1187 * Follow @to back to the root and make sure we don't encounter @this
1188 * Assumes we already hold the rename lock.
1189 *
1190 * Returns: errno
1191 */
1192
1193static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
1194{
1195 struct inode *dir = &to->i_inode;
1196 struct super_block *sb = dir->i_sb;
1197 struct inode *tmp;
1198 int error = 0;
1199
1200 igrab(dir);
1201
1202 for (;;) {
1203 if (dir == &this->i_inode) {
1204 error = -EINVAL;
1205 break;
1206 }
1207 if (dir == sb->s_root->d_inode) {
1208 error = 0;
1209 break;
1210 }
1211
1212 tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
1213 if (IS_ERR(tmp)) {
1214 error = PTR_ERR(tmp);
1215 break;
1216 }
1217
1218 iput(dir);
1219 dir = tmp;
1220 }
1221
1222 iput(dir);
1223
1224 return error;
1225}
1226
1227/**
1228 * gfs2_rename - Rename a file
1229 * @odir: Parent directory of old file name
1230 * @odentry: The old dentry of the file
1231 * @ndir: Parent directory of new file name
1232 * @ndentry: The new dentry of the file
1233 *
1234 * Returns: errno
1235 */
1236
1237static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1238 struct inode *ndir, struct dentry *ndentry)
1239{
1240 struct gfs2_inode *odip = GFS2_I(odir);
1241 struct gfs2_inode *ndip = GFS2_I(ndir);
1242 struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
1243 struct gfs2_inode *nip = NULL;
1244 struct gfs2_sbd *sdp = GFS2_SB(odir);
1245 struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
1246 struct gfs2_rgrpd *nrgd;
1247 unsigned int num_gh;
1248 int dir_rename = 0;
1249 int alloc_required = 0;
1250 unsigned int x;
1251 int error;
1252
1253 if (ndentry->d_inode) {
1254 nip = GFS2_I(ndentry->d_inode);
1255 if (ip == nip)
1256 return 0;
1257 }
1258
1259 error = gfs2_rindex_hold(sdp, &ri_gh);
1260 if (error)
1261 return error;
1262
1263 if (odip != ndip) {
1264 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
1265 0, &r_gh);
1266 if (error)
1267 goto out;
1268
1269 if (S_ISDIR(ip->i_inode.i_mode)) {
1270 dir_rename = 1;
1271 /* don't move a dirctory into it's subdir */
1272 error = gfs2_ok_to_move(ip, ndip);
1273 if (error)
1274 goto out_gunlock_r;
1275 }
1276 }
1277
1278 num_gh = 1;
1279 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1280 if (odip != ndip) {
1281 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1282 num_gh++;
1283 }
1284 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1285 num_gh++;
1286
1287 if (nip) {
1288 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1289 num_gh++;
1290 /* grab the resource lock for unlink flag twiddling
1291 * this is the case of the target file already existing
1292 * so we unlink before doing the rename
1293 */
1294 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
1295 if (nrgd)
1296 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
1297 }
1298
1299 for (x = 0; x < num_gh; x++) {
1300 error = gfs2_glock_nq(ghs + x);
1301 if (error)
1302 goto out_gunlock;
1303 }
1304
1305 error = -ENOENT;
1306 if (ip->i_inode.i_nlink == 0)
1307 goto out_gunlock;
1308
1309 /* Check out the old directory */
1310
1311 error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
1312 if (error)
1313 goto out_gunlock;
1314
1315 /* Check out the new directory */
1316
1317 if (nip) {
1318 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
1319 if (error)
1320 goto out_gunlock;
1321
1322 if (nip->i_inode.i_nlink == 0) {
1323 error = -EAGAIN;
1324 goto out_gunlock;
1325 }
1326
1327 if (S_ISDIR(nip->i_inode.i_mode)) {
1328 if (nip->i_entries < 2) {
1329 gfs2_consist_inode(nip);
1330 error = -EIO;
1331 goto out_gunlock;
1332 }
1333 if (nip->i_entries > 2) {
1334 error = -ENOTEMPTY;
1335 goto out_gunlock;
1336 }
1337 }
1338 } else {
1339 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
1340 if (error)
1341 goto out_gunlock;
1342
1343 error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
1344 switch (error) {
1345 case -ENOENT:
1346 error = 0;
1347 break;
1348 case 0:
1349 error = -EEXIST;
1350 default:
1351 goto out_gunlock;
1352 };
1353
1354 if (odip != ndip) {
1355 if (!ndip->i_inode.i_nlink) {
1356 error = -ENOENT;
1357 goto out_gunlock;
1358 }
1359 if (ndip->i_entries == (u32)-1) {
1360 error = -EFBIG;
1361 goto out_gunlock;
1362 }
1363 if (S_ISDIR(ip->i_inode.i_mode) &&
1364 ndip->i_inode.i_nlink == (u32)-1) {
1365 error = -EMLINK;
1366 goto out_gunlock;
1367 }
1368 }
1369 }
1370
1371 /* Check out the dir to be renamed */
1372
1373 if (dir_rename) {
1374 error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
1375 if (error)
1376 goto out_gunlock;
1377 }
1378
1379 if (nip == NULL)
1380 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
1381 error = alloc_required;
1382 if (error < 0)
1383 goto out_gunlock;
1384 error = 0;
1385
1386 if (alloc_required) {
1387 struct gfs2_alloc *al = gfs2_alloc_get(ndip);
1388 if (!al) {
1389 error = -ENOMEM;
1390 goto out_gunlock;
1391 }
1392
1393 error = gfs2_quota_lock_check(ndip);
1394 if (error)
1395 goto out_alloc;
1396
1397 al->al_requested = sdp->sd_max_dirres;
1398
1399 error = gfs2_inplace_reserve_ri(ndip);
1400 if (error)
1401 goto out_gunlock_q;
1402
1403 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
1404 gfs2_rg_blocks(al) +
1405 4 * RES_DINODE + 4 * RES_LEAF +
1406 RES_STATFS + RES_QUOTA + 4, 0);
1407 if (error)
1408 goto out_ipreserv;
1409 } else {
1410 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
1411 5 * RES_LEAF + 4, 0);
1412 if (error)
1413 goto out_gunlock;
1414 }
1415
1416 /* Remove the target file, if it exists */
1417
1418 if (nip) {
1419 struct buffer_head *bh;
1420 error = gfs2_meta_inode_buffer(nip, &bh);
1421 if (error)
1422 goto out_end_trans;
1423 error = gfs2_unlink_inode(ndip, ndentry, bh);
1424 brelse(bh);
1425 }
1426
1427 if (dir_rename) {
1428 error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
1429 if (error)
1430 goto out_end_trans;
1431 } else {
1432 struct buffer_head *dibh;
1433 error = gfs2_meta_inode_buffer(ip, &dibh);
1434 if (error)
1435 goto out_end_trans;
1436 ip->i_inode.i_ctime = CURRENT_TIME;
1437 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1438 gfs2_dinode_out(ip, dibh->b_data);
1439 brelse(dibh);
1440 }
1441
1442 error = gfs2_dir_del(odip, odentry);
1443 if (error)
1444 goto out_end_trans;
1445
1446 error = gfs2_dir_add(ndir, &ndentry->d_name, ip);
1447 if (error)
1448 goto out_end_trans;
1449
1450out_end_trans:
1451 gfs2_trans_end(sdp);
1452out_ipreserv:
1453 if (alloc_required)
1454 gfs2_inplace_release(ndip);
1455out_gunlock_q:
1456 if (alloc_required)
1457 gfs2_quota_unlock(ndip);
1458out_alloc:
1459 if (alloc_required)
1460 gfs2_alloc_put(ndip);
1461out_gunlock:
1462 while (x--) {
1463 gfs2_glock_dq(ghs + x);
1464 gfs2_holder_uninit(ghs + x);
1465 }
1466out_gunlock_r:
1467 if (r_gh.gh_gl)
1468 gfs2_glock_dq_uninit(&r_gh);
1469out:
1470 gfs2_glock_dq_uninit(&ri_gh);
1471 return error;
1472}
1473
1474/**
1475 * gfs2_follow_link - Follow a symbolic link
1476 * @dentry: The dentry of the link
1477 * @nd: Data that we pass to vfs_follow_link()
1478 *
1479 * This can handle symlinks of any size.
1480 *
1481 * Returns: 0 on success or error code
1482 */
1483
1484static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
1485{
1486 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
1487 struct gfs2_holder i_gh;
1488 struct buffer_head *dibh;
1489 unsigned int size;
1490 char *buf;
1491 int error;
1492
1493 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
1494 error = gfs2_glock_nq(&i_gh);
1495 if (error) {
1496 gfs2_holder_uninit(&i_gh);
1497 nd_set_link(nd, ERR_PTR(error));
1498 return NULL;
1499 }
1500
1501 size = (unsigned int)i_size_read(&ip->i_inode);
1502 if (size == 0) {
1503 gfs2_consist_inode(ip);
1504 buf = ERR_PTR(-EIO);
1505 goto out;
1506 }
1507
1508 error = gfs2_meta_inode_buffer(ip, &dibh);
1509 if (error) {
1510 buf = ERR_PTR(error);
1511 goto out;
1512 }
1513
1514 buf = kzalloc(size + 1, GFP_NOFS);
1515 if (!buf)
1516 buf = ERR_PTR(-ENOMEM);
1517 else
1518 memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size);
1519 brelse(dibh);
1520out:
1521 gfs2_glock_dq_uninit(&i_gh);
1522 nd_set_link(nd, buf);
1523 return NULL;
1524}
1525
1526static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1527{
1528 char *s = nd_get_link(nd);
1529 if (!IS_ERR(s))
1530 kfree(s);
1531}
1532
1533/**
1534 * gfs2_permission -
1535 * @inode: The inode
1536 * @mask: The mask to be tested
1537 * @flags: Indicates whether this is an RCU path walk or not
1538 *
1539 * This may be called from the VFS directly, or from within GFS2 with the
1540 * inode locked, so we look to see if the glock is already locked and only
1541 * lock the glock if its not already been done.
1542 *
1543 * Returns: errno
1544 */
1545
1546int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1547{
1548 struct gfs2_inode *ip;
1549 struct gfs2_holder i_gh;
1550 int error;
1551 int unlock = 0;
1552
1553
1554 ip = GFS2_I(inode);
1555 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1556 if (flags & IPERM_FLAG_RCU)
1557 return -ECHILD;
1558 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1559 if (error)
1560 return error;
1561 unlock = 1;
1562 }
1563
1564 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
1565 error = -EACCES;
1566 else
1567 error = generic_permission(inode, mask, flags, gfs2_check_acl);
1568 if (unlock)
1569 gfs2_glock_dq_uninit(&i_gh);
1570
1571 return error;
906} 1572}
907 1573
908static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 1574static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
@@ -928,8 +1594,6 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
928 * @ip: 1594 * @ip:
929 * @attr: 1595 * @attr:
930 * 1596 *
931 * Called with a reference on the vnode.
932 *
933 * Returns: errno 1597 * Returns: errno
934 */ 1598 */
935 1599
@@ -949,60 +1613,280 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
949 return error; 1613 return error;
950} 1614}
951 1615
952void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) 1616static int setattr_chown(struct inode *inode, struct iattr *attr)
953{ 1617{
954 struct gfs2_dinode *str = buf; 1618 struct gfs2_inode *ip = GFS2_I(inode);
955 1619 struct gfs2_sbd *sdp = GFS2_SB(inode);
956 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 1620 u32 ouid, ogid, nuid, ngid;
957 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); 1621 int error;
958 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); 1622
959 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 1623 ouid = inode->i_uid;
960 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 1624 ogid = inode->i_gid;
961 str->di_mode = cpu_to_be32(ip->i_inode.i_mode); 1625 nuid = attr->ia_uid;
962 str->di_uid = cpu_to_be32(ip->i_inode.i_uid); 1626 ngid = attr->ia_gid;
963 str->di_gid = cpu_to_be32(ip->i_inode.i_gid); 1627
964 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); 1628 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
965 str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); 1629 ouid = nuid = NO_QUOTA_CHANGE;
966 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); 1630 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
967 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); 1631 ogid = ngid = NO_QUOTA_CHANGE;
968 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); 1632
969 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); 1633 if (!gfs2_alloc_get(ip))
970 1634 return -ENOMEM;
971 str->di_goal_meta = cpu_to_be64(ip->i_goal); 1635
972 str->di_goal_data = cpu_to_be64(ip->i_goal); 1636 error = gfs2_quota_lock(ip, nuid, ngid);
973 str->di_generation = cpu_to_be64(ip->i_generation); 1637 if (error)
974 1638 goto out_alloc;
975 str->di_flags = cpu_to_be32(ip->i_diskflags); 1639
976 str->di_height = cpu_to_be16(ip->i_height); 1640 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
977 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && 1641 error = gfs2_quota_check(ip, nuid, ngid);
978 !(ip->i_diskflags & GFS2_DIF_EXHASH) ? 1642 if (error)
979 GFS2_FORMAT_DE : 0); 1643 goto out_gunlock_q;
980 str->di_depth = cpu_to_be16(ip->i_depth); 1644 }
981 str->di_entries = cpu_to_be32(ip->i_entries); 1645
982 1646 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
983 str->di_eattr = cpu_to_be64(ip->i_eattr); 1647 if (error)
984 str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); 1648 goto out_gunlock_q;
985 str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); 1649
986 str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); 1650 error = gfs2_setattr_simple(ip, attr);
987} 1651 if (error)
988 1652 goto out_end_trans;
989void gfs2_dinode_print(const struct gfs2_inode *ip) 1653
990{ 1654 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
991 printk(KERN_INFO " no_formal_ino = %llu\n", 1655 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
992 (unsigned long long)ip->i_no_formal_ino); 1656 gfs2_quota_change(ip, -blocks, ouid, ogid);
993 printk(KERN_INFO " no_addr = %llu\n", 1657 gfs2_quota_change(ip, blocks, nuid, ngid);
994 (unsigned long long)ip->i_no_addr); 1658 }
995 printk(KERN_INFO " i_size = %llu\n", 1659
996 (unsigned long long)i_size_read(&ip->i_inode)); 1660out_end_trans:
997 printk(KERN_INFO " blocks = %llu\n", 1661 gfs2_trans_end(sdp);
998 (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); 1662out_gunlock_q:
999 printk(KERN_INFO " i_goal = %llu\n", 1663 gfs2_quota_unlock(ip);
1000 (unsigned long long)ip->i_goal); 1664out_alloc:
1001 printk(KERN_INFO " i_diskflags = 0x%.8X\n", ip->i_diskflags); 1665 gfs2_alloc_put(ip);
1002 printk(KERN_INFO " i_height = %u\n", ip->i_height); 1666 return error;
1003 printk(KERN_INFO " i_depth = %u\n", ip->i_depth); 1667}
1004 printk(KERN_INFO " i_entries = %u\n", ip->i_entries); 1668
1005 printk(KERN_INFO " i_eattr = %llu\n", 1669/**
1006 (unsigned long long)ip->i_eattr); 1670 * gfs2_setattr - Change attributes on an inode
1671 * @dentry: The dentry which is changing
1672 * @attr: The structure describing the change
1673 *
1674 * The VFS layer wants to change one or more of an inodes attributes. Write
1675 * that change out to disk.
1676 *
1677 * Returns: errno
1678 */
1679
1680static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
1681{
1682 struct inode *inode = dentry->d_inode;
1683 struct gfs2_inode *ip = GFS2_I(inode);
1684 struct gfs2_holder i_gh;
1685 int error;
1686
1687 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1688 if (error)
1689 return error;
1690
1691 error = -EPERM;
1692 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1693 goto out;
1694
1695 error = inode_change_ok(inode, attr);
1696 if (error)
1697 goto out;
1698
1699 if (attr->ia_valid & ATTR_SIZE)
1700 error = gfs2_setattr_size(inode, attr->ia_size);
1701 else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
1702 error = setattr_chown(inode, attr);
1703 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
1704 error = gfs2_acl_chmod(ip, attr);
1705 else
1706 error = gfs2_setattr_simple(ip, attr);
1707
1708out:
1709 gfs2_glock_dq_uninit(&i_gh);
1710 if (!error)
1711 mark_inode_dirty(inode);
1712 return error;
1713}
1714
1715/**
1716 * gfs2_getattr - Read out an inode's attributes
1717 * @mnt: The vfsmount the inode is being accessed from
1718 * @dentry: The dentry to stat
1719 * @stat: The inode's stats
1720 *
1721 * This may be called from the VFS directly, or from within GFS2 with the
1722 * inode locked, so we look to see if the glock is already locked and only
1723 * lock the glock if its not already been done. Note that its the NFS
1724 * readdirplus operation which causes this to be called (from filldir)
1725 * with the glock already held.
1726 *
1727 * Returns: errno
1728 */
1729
1730static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1731 struct kstat *stat)
1732{
1733 struct inode *inode = dentry->d_inode;
1734 struct gfs2_inode *ip = GFS2_I(inode);
1735 struct gfs2_holder gh;
1736 int error;
1737 int unlock = 0;
1738
1739 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1740 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1741 if (error)
1742 return error;
1743 unlock = 1;
1744 }
1745
1746 generic_fillattr(inode, stat);
1747 if (unlock)
1748 gfs2_glock_dq_uninit(&gh);
1749
1750 return 0;
1751}
1752
1753static int gfs2_setxattr(struct dentry *dentry, const char *name,
1754 const void *data, size_t size, int flags)
1755{
1756 struct inode *inode = dentry->d_inode;
1757 struct gfs2_inode *ip = GFS2_I(inode);
1758 struct gfs2_holder gh;
1759 int ret;
1760
1761 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1762 ret = gfs2_glock_nq(&gh);
1763 if (ret == 0) {
1764 ret = generic_setxattr(dentry, name, data, size, flags);
1765 gfs2_glock_dq(&gh);
1766 }
1767 gfs2_holder_uninit(&gh);
1768 return ret;
1769}
1770
1771static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1772 void *data, size_t size)
1773{
1774 struct inode *inode = dentry->d_inode;
1775 struct gfs2_inode *ip = GFS2_I(inode);
1776 struct gfs2_holder gh;
1777 int ret;
1778
1779 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1780 ret = gfs2_glock_nq(&gh);
1781 if (ret == 0) {
1782 ret = generic_getxattr(dentry, name, data, size);
1783 gfs2_glock_dq(&gh);
1784 }
1785 gfs2_holder_uninit(&gh);
1786 return ret;
1787}
1788
1789static int gfs2_removexattr(struct dentry *dentry, const char *name)
1790{
1791 struct inode *inode = dentry->d_inode;
1792 struct gfs2_inode *ip = GFS2_I(inode);
1793 struct gfs2_holder gh;
1794 int ret;
1795
1796 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1797 ret = gfs2_glock_nq(&gh);
1798 if (ret == 0) {
1799 ret = generic_removexattr(dentry, name);
1800 gfs2_glock_dq(&gh);
1801 }
1802 gfs2_holder_uninit(&gh);
1803 return ret;
1804}
1805
1806static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1807 u64 start, u64 len)
1808{
1809 struct gfs2_inode *ip = GFS2_I(inode);
1810 struct gfs2_holder gh;
1811 int ret;
1812
1813 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
1814 if (ret)
1815 return ret;
1816
1817 mutex_lock(&inode->i_mutex);
1818
1819 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
1820 if (ret)
1821 goto out;
1822
1823 if (gfs2_is_stuffed(ip)) {
1824 u64 phys = ip->i_no_addr << inode->i_blkbits;
1825 u64 size = i_size_read(inode);
1826 u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
1827 FIEMAP_EXTENT_DATA_INLINE;
1828 phys += sizeof(struct gfs2_dinode);
1829 phys += start;
1830 if (start + len > size)
1831 len = size - start;
1832 if (start < size)
1833 ret = fiemap_fill_next_extent(fieinfo, start, phys,
1834 len, flags);
1835 if (ret == 1)
1836 ret = 0;
1837 } else {
1838 ret = __generic_block_fiemap(inode, fieinfo, start, len,
1839 gfs2_block_map);
1840 }
1841
1842 gfs2_glock_dq_uninit(&gh);
1843out:
1844 mutex_unlock(&inode->i_mutex);
1845 return ret;
1007} 1846}
1008 1847
1848const struct inode_operations gfs2_file_iops = {
1849 .permission = gfs2_permission,
1850 .setattr = gfs2_setattr,
1851 .getattr = gfs2_getattr,
1852 .setxattr = gfs2_setxattr,
1853 .getxattr = gfs2_getxattr,
1854 .listxattr = gfs2_listxattr,
1855 .removexattr = gfs2_removexattr,
1856 .fiemap = gfs2_fiemap,
1857};
1858
1859const struct inode_operations gfs2_dir_iops = {
1860 .create = gfs2_create,
1861 .lookup = gfs2_lookup,
1862 .link = gfs2_link,
1863 .unlink = gfs2_unlink,
1864 .symlink = gfs2_symlink,
1865 .mkdir = gfs2_mkdir,
1866 .rmdir = gfs2_unlink,
1867 .mknod = gfs2_mknod,
1868 .rename = gfs2_rename,
1869 .permission = gfs2_permission,
1870 .setattr = gfs2_setattr,
1871 .getattr = gfs2_getattr,
1872 .setxattr = gfs2_setxattr,
1873 .getxattr = gfs2_getxattr,
1874 .listxattr = gfs2_listxattr,
1875 .removexattr = gfs2_removexattr,
1876 .fiemap = gfs2_fiemap,
1877};
1878
1879const struct inode_operations gfs2_symlink_iops = {
1880 .readlink = generic_readlink,
1881 .follow_link = gfs2_follow_link,
1882 .put_link = gfs2_put_link,
1883 .permission = gfs2_permission,
1884 .setattr = gfs2_setattr,
1885 .getattr = gfs2_getattr,
1886 .setxattr = gfs2_setxattr,
1887 .getxattr = gfs2_getxattr,
1888 .listxattr = gfs2_listxattr,
1889 .removexattr = gfs2_removexattr,
1890 .fiemap = gfs2_fiemap,
1891};
1892
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 099ca305e518..31606076f701 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -102,22 +102,16 @@ extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
102extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, 102extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
103 u64 *no_formal_ino, 103 u64 *no_formal_ino,
104 unsigned int blktype); 104 unsigned int blktype);
105extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); 105extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int nonblock);
106 106
107extern int gfs2_inode_refresh(struct gfs2_inode *ip); 107extern int gfs2_inode_refresh(struct gfs2_inode *ip);
108 108
109extern int gfs2_dinode_dealloc(struct gfs2_inode *inode);
110extern int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
111extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, 109extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
112 int is_root); 110 int is_root);
113extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
114 const struct qstr *name,
115 unsigned int mode, dev_t dev);
116extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags); 111extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags);
117extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); 112extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
118extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); 113extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
119extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); 114extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
120extern void gfs2_dinode_print(const struct gfs2_inode *ip);
121 115
122extern const struct inode_operations gfs2_file_iops; 116extern const struct inode_operations gfs2_file_iops;
123extern const struct inode_operations gfs2_dir_iops; 117extern const struct inode_operations gfs2_dir_iops;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 5b102c1887fd..903115f2bb34 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -18,6 +18,7 @@
18#include <linux/kthread.h> 18#include <linux/kthread.h>
19#include <linux/freezer.h> 19#include <linux/freezer.h>
20#include <linux/bio.h> 20#include <linux/bio.h>
21#include <linux/writeback.h>
21 22
22#include "gfs2.h" 23#include "gfs2.h"
23#include "incore.h" 24#include "incore.h"
@@ -83,55 +84,97 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
83/** 84/**
84 * gfs2_ail1_start_one - Start I/O on a part of the AIL 85 * gfs2_ail1_start_one - Start I/O on a part of the AIL
85 * @sdp: the filesystem 86 * @sdp: the filesystem
86 * @tr: the part of the AIL 87 * @wbc: The writeback control structure
88 * @ai: The ail structure
87 * 89 *
88 */ 90 */
89 91
90static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 92static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
93 struct writeback_control *wbc,
94 struct gfs2_ail *ai)
91__releases(&sdp->sd_ail_lock) 95__releases(&sdp->sd_ail_lock)
92__acquires(&sdp->sd_ail_lock) 96__acquires(&sdp->sd_ail_lock)
93{ 97{
98 struct gfs2_glock *gl = NULL;
99 struct address_space *mapping;
94 struct gfs2_bufdata *bd, *s; 100 struct gfs2_bufdata *bd, *s;
95 struct buffer_head *bh; 101 struct buffer_head *bh;
96 int retry;
97 102
98 do { 103 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, bd_ail_st_list) {
99 retry = 0; 104 bh = bd->bd_bh;
100 105
101 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, 106 gfs2_assert(sdp, bd->bd_ail == ai);
102 bd_ail_st_list) {
103 bh = bd->bd_bh;
104 107
105 gfs2_assert(sdp, bd->bd_ail == ai); 108 if (!buffer_busy(bh)) {
109 if (!buffer_uptodate(bh))
110 gfs2_io_error_bh(sdp, bh);
111 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
112 continue;
113 }
106 114
107 if (!buffer_busy(bh)) { 115 if (!buffer_dirty(bh))
108 if (!buffer_uptodate(bh)) 116 continue;
109 gfs2_io_error_bh(sdp, bh); 117 if (gl == bd->bd_gl)
110 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); 118 continue;
111 continue; 119 gl = bd->bd_gl;
112 } 120 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
121 mapping = bh->b_page->mapping;
122 if (!mapping)
123 continue;
124 spin_unlock(&sdp->sd_ail_lock);
125 generic_writepages(mapping, wbc);
126 spin_lock(&sdp->sd_ail_lock);
127 if (wbc->nr_to_write <= 0)
128 break;
129 return 1;
130 }
113 131
114 if (!buffer_dirty(bh)) 132 return 0;
115 continue; 133}
116 134
117 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
118 135
119 get_bh(bh); 136/**
120 spin_unlock(&sdp->sd_ail_lock); 137 * gfs2_ail1_flush - start writeback of some ail1 entries
121 lock_buffer(bh); 138 * @sdp: The super block
122 if (test_clear_buffer_dirty(bh)) { 139 * @wbc: The writeback control structure
123 bh->b_end_io = end_buffer_write_sync; 140 *
124 submit_bh(WRITE_SYNC, bh); 141 * Writes back some ail1 entries, according to the limits in the
125 } else { 142 * writeback control structure
126 unlock_buffer(bh); 143 */
127 brelse(bh); 144
128 } 145void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
129 spin_lock(&sdp->sd_ail_lock); 146{
130 147 struct list_head *head = &sdp->sd_ail1_list;
131 retry = 1; 148 struct gfs2_ail *ai;
149
150 trace_gfs2_ail_flush(sdp, wbc, 1);
151 spin_lock(&sdp->sd_ail_lock);
152restart:
153 list_for_each_entry_reverse(ai, head, ai_list) {
154 if (wbc->nr_to_write <= 0)
132 break; 155 break;
133 } 156 if (gfs2_ail1_start_one(sdp, wbc, ai))
134 } while (retry); 157 goto restart;
158 }
159 spin_unlock(&sdp->sd_ail_lock);
160 trace_gfs2_ail_flush(sdp, wbc, 0);
161}
162
163/**
164 * gfs2_ail1_start - start writeback of all ail1 entries
165 * @sdp: The superblock
166 */
167
168static void gfs2_ail1_start(struct gfs2_sbd *sdp)
169{
170 struct writeback_control wbc = {
171 .sync_mode = WB_SYNC_NONE,
172 .nr_to_write = LONG_MAX,
173 .range_start = 0,
174 .range_end = LLONG_MAX,
175 };
176
177 return gfs2_ail1_flush(sdp, &wbc);
135} 178}
136 179
137/** 180/**
@@ -141,7 +184,7 @@ __acquires(&sdp->sd_ail_lock)
141 * 184 *
142 */ 185 */
143 186
144static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags) 187static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
145{ 188{
146 struct gfs2_bufdata *bd, *s; 189 struct gfs2_bufdata *bd, *s;
147 struct buffer_head *bh; 190 struct buffer_head *bh;
@@ -149,76 +192,63 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
149 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, 192 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
150 bd_ail_st_list) { 193 bd_ail_st_list) {
151 bh = bd->bd_bh; 194 bh = bd->bd_bh;
152
153 gfs2_assert(sdp, bd->bd_ail == ai); 195 gfs2_assert(sdp, bd->bd_ail == ai);
154 196 if (buffer_busy(bh))
155 if (buffer_busy(bh)) { 197 continue;
156 if (flags & DIO_ALL)
157 continue;
158 else
159 break;
160 }
161
162 if (!buffer_uptodate(bh)) 198 if (!buffer_uptodate(bh))
163 gfs2_io_error_bh(sdp, bh); 199 gfs2_io_error_bh(sdp, bh);
164
165 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); 200 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
166 } 201 }
167 202
168 return list_empty(&ai->ai_ail1_list);
169} 203}
170 204
171static void gfs2_ail1_start(struct gfs2_sbd *sdp) 205/**
172{ 206 * gfs2_ail1_empty - Try to empty the ail1 lists
173 struct list_head *head; 207 * @sdp: The superblock
174 u64 sync_gen; 208 *
175 struct gfs2_ail *ai; 209 * Tries to empty the ail1 lists, starting with the oldest first
176 int done = 0; 210 */
177
178 spin_lock(&sdp->sd_ail_lock);
179 head = &sdp->sd_ail1_list;
180 if (list_empty(head)) {
181 spin_unlock(&sdp->sd_ail_lock);
182 return;
183 }
184 sync_gen = sdp->sd_ail_sync_gen++;
185
186 while(!done) {
187 done = 1;
188 list_for_each_entry_reverse(ai, head, ai_list) {
189 if (ai->ai_sync_gen >= sync_gen)
190 continue;
191 ai->ai_sync_gen = sync_gen;
192 gfs2_ail1_start_one(sdp, ai); /* This may drop ail lock */
193 done = 0;
194 break;
195 }
196 }
197
198 spin_unlock(&sdp->sd_ail_lock);
199}
200 211
201static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) 212static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
202{ 213{
203 struct gfs2_ail *ai, *s; 214 struct gfs2_ail *ai, *s;
204 int ret; 215 int ret;
205 216
206 spin_lock(&sdp->sd_ail_lock); 217 spin_lock(&sdp->sd_ail_lock);
207
208 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) { 218 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
209 if (gfs2_ail1_empty_one(sdp, ai, flags)) 219 gfs2_ail1_empty_one(sdp, ai);
220 if (list_empty(&ai->ai_ail1_list))
210 list_move(&ai->ai_list, &sdp->sd_ail2_list); 221 list_move(&ai->ai_list, &sdp->sd_ail2_list);
211 else if (!(flags & DIO_ALL)) 222 else
212 break; 223 break;
213 } 224 }
214
215 ret = list_empty(&sdp->sd_ail1_list); 225 ret = list_empty(&sdp->sd_ail1_list);
216
217 spin_unlock(&sdp->sd_ail_lock); 226 spin_unlock(&sdp->sd_ail_lock);
218 227
219 return ret; 228 return ret;
220} 229}
221 230
231static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
232{
233 struct gfs2_ail *ai;
234 struct gfs2_bufdata *bd;
235 struct buffer_head *bh;
236
237 spin_lock(&sdp->sd_ail_lock);
238 list_for_each_entry_reverse(ai, &sdp->sd_ail1_list, ai_list) {
239 list_for_each_entry(bd, &ai->ai_ail1_list, bd_ail_st_list) {
240 bh = bd->bd_bh;
241 if (!buffer_locked(bh))
242 continue;
243 get_bh(bh);
244 spin_unlock(&sdp->sd_ail_lock);
245 wait_on_buffer(bh);
246 brelse(bh);
247 return;
248 }
249 }
250 spin_unlock(&sdp->sd_ail_lock);
251}
222 252
223/** 253/**
224 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced 254 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
@@ -574,7 +604,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
574 set_buffer_uptodate(bh); 604 set_buffer_uptodate(bh);
575 clear_buffer_dirty(bh); 605 clear_buffer_dirty(bh);
576 606
577 gfs2_ail1_empty(sdp, 0); 607 gfs2_ail1_empty(sdp);
578 tail = current_tail(sdp); 608 tail = current_tail(sdp);
579 609
580 lh = (struct gfs2_log_header *)bh->b_data; 610 lh = (struct gfs2_log_header *)bh->b_data;
@@ -869,9 +899,9 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
869 gfs2_log_flush(sdp, NULL); 899 gfs2_log_flush(sdp, NULL);
870 for (;;) { 900 for (;;) {
871 gfs2_ail1_start(sdp); 901 gfs2_ail1_start(sdp);
872 if (gfs2_ail1_empty(sdp, DIO_ALL)) 902 gfs2_ail1_wait(sdp);
903 if (gfs2_ail1_empty(sdp))
873 break; 904 break;
874 msleep(10);
875 } 905 }
876} 906}
877 907
@@ -905,20 +935,20 @@ int gfs2_logd(void *data)
905 935
906 preflush = atomic_read(&sdp->sd_log_pinned); 936 preflush = atomic_read(&sdp->sd_log_pinned);
907 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { 937 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
908 gfs2_ail1_empty(sdp, DIO_ALL); 938 gfs2_ail1_empty(sdp);
909 gfs2_log_flush(sdp, NULL); 939 gfs2_log_flush(sdp, NULL);
910 gfs2_ail1_empty(sdp, DIO_ALL);
911 } 940 }
912 941
913 if (gfs2_ail_flush_reqd(sdp)) { 942 if (gfs2_ail_flush_reqd(sdp)) {
914 gfs2_ail1_start(sdp); 943 gfs2_ail1_start(sdp);
915 io_schedule(); 944 gfs2_ail1_wait(sdp);
916 gfs2_ail1_empty(sdp, 0); 945 gfs2_ail1_empty(sdp);
917 gfs2_log_flush(sdp, NULL); 946 gfs2_log_flush(sdp, NULL);
918 gfs2_ail1_empty(sdp, DIO_ALL);
919 } 947 }
920 948
921 wake_up(&sdp->sd_log_waitq); 949 if (!gfs2_ail_flush_reqd(sdp))
950 wake_up(&sdp->sd_log_waitq);
951
922 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; 952 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
923 if (freezing(current)) 953 if (freezing(current))
924 refrigerator(); 954 refrigerator();
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 0d007f920234..ab0621698b73 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -12,6 +12,7 @@
12 12
13#include <linux/list.h> 13#include <linux/list.h>
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/writeback.h>
15#include "incore.h" 16#include "incore.h"
16 17
17/** 18/**
@@ -59,6 +60,7 @@ extern struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
59extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); 60extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
60extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); 61extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
61extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); 62extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
63extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
62 64
63extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); 65extern void gfs2_log_shutdown(struct gfs2_sbd *sdp);
64extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp); 66extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 51d27f00ebb4..05bbb124699f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -40,7 +40,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
40{ 40{
41 struct gfs2_bufdata *bd; 41 struct gfs2_bufdata *bd;
42 42
43 gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)); 43 BUG_ON(!current->journal_info);
44 44
45 clear_buffer_dirty(bh); 45 clear_buffer_dirty(bh);
46 if (test_set_buffer_pinned(bh)) 46 if (test_set_buffer_pinned(bh))
@@ -65,6 +65,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
65 * @sdp: the filesystem the buffer belongs to 65 * @sdp: the filesystem the buffer belongs to
66 * @bh: The buffer to unpin 66 * @bh: The buffer to unpin
67 * @ai: 67 * @ai:
68 * @flags: The inode dirty flags
68 * 69 *
69 */ 70 */
70 71
@@ -73,10 +74,8 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
73{ 74{
74 struct gfs2_bufdata *bd = bh->b_private; 75 struct gfs2_bufdata *bd = bh->b_private;
75 76
76 gfs2_assert_withdraw(sdp, buffer_uptodate(bh)); 77 BUG_ON(!buffer_uptodate(bh));
77 78 BUG_ON(!buffer_pinned(bh));
78 if (!buffer_pinned(bh))
79 gfs2_assert_withdraw(sdp, 0);
80 79
81 lock_buffer(bh); 80 lock_buffer(bh);
82 mark_buffer_dirty(bh); 81 mark_buffer_dirty(bh);
@@ -95,8 +94,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
95 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); 94 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
96 spin_unlock(&sdp->sd_ail_lock); 95 spin_unlock(&sdp->sd_ail_lock);
97 96
98 if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags)) 97 clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
99 gfs2_glock_schedule_for_reclaim(bd->bd_gl);
100 trace_gfs2_pin(bd, 0); 98 trace_gfs2_pin(bd, 0);
101 unlock_buffer(bh); 99 unlock_buffer(bh);
102 atomic_dec(&sdp->sd_log_pinned); 100 atomic_dec(&sdp->sd_log_pinned);
@@ -322,12 +320,16 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
322 320
323static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 321static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
324{ 322{
323 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
324 struct gfs2_glock *gl = bd->bd_gl;
325 struct gfs2_trans *tr; 325 struct gfs2_trans *tr;
326 326
327 tr = current->journal_info; 327 tr = current->journal_info;
328 tr->tr_touched = 1; 328 tr->tr_touched = 1;
329 tr->tr_num_revoke++; 329 tr->tr_num_revoke++;
330 sdp->sd_log_num_revoke++; 330 sdp->sd_log_num_revoke++;
331 atomic_inc(&gl->gl_revokes);
332 set_bit(GLF_LFLUSH, &gl->gl_flags);
331 list_add(&le->le_list, &sdp->sd_log_le_revoke); 333 list_add(&le->le_list, &sdp->sd_log_le_revoke);
332} 334}
333 335
@@ -350,9 +352,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
350 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); 352 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
351 offset = sizeof(struct gfs2_log_descriptor); 353 offset = sizeof(struct gfs2_log_descriptor);
352 354
353 while (!list_empty(head)) { 355 list_for_each_entry(bd, head, bd_le.le_list) {
354 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
355 list_del_init(&bd->bd_le.le_list);
356 sdp->sd_log_num_revoke--; 356 sdp->sd_log_num_revoke--;
357 357
358 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { 358 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
@@ -367,8 +367,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
367 } 367 }
368 368
369 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno); 369 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno);
370 kmem_cache_free(gfs2_bufdata_cachep, bd);
371
372 offset += sizeof(u64); 370 offset += sizeof(u64);
373 } 371 }
374 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 372 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
@@ -376,6 +374,22 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
376 submit_bh(WRITE_SYNC, bh); 374 submit_bh(WRITE_SYNC, bh);
377} 375}
378 376
377static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
378{
379 struct list_head *head = &sdp->sd_log_le_revoke;
380 struct gfs2_bufdata *bd;
381 struct gfs2_glock *gl;
382
383 while (!list_empty(head)) {
384 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
385 list_del_init(&bd->bd_le.le_list);
386 gl = bd->bd_gl;
387 atomic_dec(&gl->gl_revokes);
388 clear_bit(GLF_LFLUSH, &gl->gl_flags);
389 kmem_cache_free(gfs2_bufdata_cachep, bd);
390 }
391}
392
379static void revoke_lo_before_scan(struct gfs2_jdesc *jd, 393static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
380 struct gfs2_log_header_host *head, int pass) 394 struct gfs2_log_header_host *head, int pass)
381{ 395{
@@ -749,6 +763,7 @@ const struct gfs2_log_operations gfs2_buf_lops = {
749const struct gfs2_log_operations gfs2_revoke_lops = { 763const struct gfs2_log_operations gfs2_revoke_lops = {
750 .lo_add = revoke_lo_add, 764 .lo_add = revoke_lo_add,
751 .lo_before_commit = revoke_lo_before_commit, 765 .lo_before_commit = revoke_lo_before_commit,
766 .lo_after_commit = revoke_lo_after_commit,
752 .lo_before_scan = revoke_lo_before_scan, 767 .lo_before_scan = revoke_lo_before_scan,
753 .lo_scan_elements = revoke_lo_scan_elements, 768 .lo_scan_elements = revoke_lo_scan_elements,
754 .lo_after_scan = revoke_lo_after_scan, 769 .lo_after_scan = revoke_lo_after_scan,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 888a5f5a1a58..cfa327d33194 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -53,6 +53,7 @@ static void gfs2_init_glock_once(void *foo)
53 INIT_LIST_HEAD(&gl->gl_lru); 53 INIT_LIST_HEAD(&gl->gl_lru);
54 INIT_LIST_HEAD(&gl->gl_ail_list); 54 INIT_LIST_HEAD(&gl->gl_ail_list);
55 atomic_set(&gl->gl_ail_count, 0); 55 atomic_set(&gl->gl_ail_count, 0);
56 atomic_set(&gl->gl_revokes, 0);
56} 57}
57 58
58static void gfs2_init_gl_aspace_once(void *foo) 59static void gfs2_init_gl_aspace_once(void *foo)
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 675349b5a133..747238cd9f96 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -31,6 +31,7 @@
31#include "rgrp.h" 31#include "rgrp.h"
32#include "trans.h" 32#include "trans.h"
33#include "util.h" 33#include "util.h"
34#include "trace_gfs2.h"
34 35
35static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) 36static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc)
36{ 37{
@@ -310,6 +311,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
310 struct gfs2_bufdata *bd = bh->b_private; 311 struct gfs2_bufdata *bd = bh->b_private;
311 312
312 if (test_clear_buffer_pinned(bh)) { 313 if (test_clear_buffer_pinned(bh)) {
314 trace_gfs2_pin(bd, 0);
313 atomic_dec(&sdp->sd_log_pinned); 315 atomic_dec(&sdp->sd_log_pinned);
314 list_del_init(&bd->bd_le.le_list); 316 list_del_init(&bd->bd_le.le_list);
315 if (meta) { 317 if (meta) {
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 6a1d9ba16411..22c526593131 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -77,8 +77,6 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
77 77
78#define buffer_busy(bh) \ 78#define buffer_busy(bh) \
79((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned))) 79((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
80#define buffer_in_io(bh) \
81((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
82 80
83#endif /* __DIO_DOT_H__ */ 81#endif /* __DIO_DOT_H__ */
84 82
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index d3c69eb91c74..8ac9ae189b53 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -126,8 +126,10 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
126 * changed. 126 * changed.
127 */ 127 */
128 128
129static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) 129static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
130{ 130{
131 struct gfs2_sb_host *sb = &sdp->sd_sb;
132
131 if (sb->sb_magic != GFS2_MAGIC || 133 if (sb->sb_magic != GFS2_MAGIC ||
132 sb->sb_type != GFS2_METATYPE_SB) { 134 sb->sb_type != GFS2_METATYPE_SB) {
133 if (!silent) 135 if (!silent)
@@ -157,8 +159,10 @@ static void end_bio_io_page(struct bio *bio, int error)
157 unlock_page(page); 159 unlock_page(page);
158} 160}
159 161
160static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) 162static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
161{ 163{
164 struct gfs2_sb_host *sb = &sdp->sd_sb;
165 struct super_block *s = sdp->sd_vfs;
162 const struct gfs2_sb *str = buf; 166 const struct gfs2_sb *str = buf;
163 167
164 sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); 168 sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
@@ -175,7 +179,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
175 179
176 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); 180 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
177 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); 181 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
178 memcpy(sb->sb_uuid, str->sb_uuid, 16); 182 memcpy(s->s_uuid, str->sb_uuid, 16);
179} 183}
180 184
181/** 185/**
@@ -197,7 +201,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
197 * Returns: 0 on success or error 201 * Returns: 0 on success or error
198 */ 202 */
199 203
200static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) 204static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
201{ 205{
202 struct super_block *sb = sdp->sd_vfs; 206 struct super_block *sb = sdp->sd_vfs;
203 struct gfs2_sb *p; 207 struct gfs2_sb *p;
@@ -227,10 +231,10 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
227 return -EIO; 231 return -EIO;
228 } 232 }
229 p = kmap(page); 233 p = kmap(page);
230 gfs2_sb_in(&sdp->sd_sb, p); 234 gfs2_sb_in(sdp, p);
231 kunmap(page); 235 kunmap(page);
232 __free_page(page); 236 __free_page(page);
233 return 0; 237 return gfs2_check_sb(sdp, silent);
234} 238}
235 239
236/** 240/**
@@ -247,17 +251,13 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
247 unsigned int x; 251 unsigned int x;
248 int error; 252 int error;
249 253
250 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); 254 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
251 if (error) { 255 if (error) {
252 if (!silent) 256 if (!silent)
253 fs_err(sdp, "can't read superblock\n"); 257 fs_err(sdp, "can't read superblock\n");
254 return error; 258 return error;
255 } 259 }
256 260
257 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
258 if (error)
259 return error;
260
261 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - 261 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
262 GFS2_BASIC_BLOCK_SHIFT; 262 GFS2_BASIC_BLOCK_SHIFT;
263 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; 263 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
@@ -340,14 +340,10 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
340 /* Try to autodetect */ 340 /* Try to autodetect */
341 341
342 if (!proto[0] || !table[0]) { 342 if (!proto[0] || !table[0]) {
343 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); 343 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
344 if (error) 344 if (error)
345 return error; 345 return error;
346 346
347 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
348 if (error)
349 goto out;
350
351 if (!proto[0]) 347 if (!proto[0])
352 proto = sdp->sd_sb.sb_lockproto; 348 proto = sdp->sd_sb.sb_lockproto;
353 if (!table[0]) 349 if (!table[0])
@@ -364,7 +360,6 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
364 while ((table = strchr(table, '/'))) 360 while ((table = strchr(table, '/')))
365 *table = '_'; 361 *table = '_';
366 362
367out:
368 return error; 363 return error;
369} 364}
370 365
@@ -1119,8 +1114,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
1119 if (sdp->sd_args.ar_statfs_quantum) { 1114 if (sdp->sd_args.ar_statfs_quantum) {
1120 sdp->sd_tune.gt_statfs_slow = 0; 1115 sdp->sd_tune.gt_statfs_slow = 0;
1121 sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum; 1116 sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum;
1122 } 1117 } else {
1123 else {
1124 sdp->sd_tune.gt_statfs_slow = 1; 1118 sdp->sd_tune.gt_statfs_slow = 1;
1125 sdp->sd_tune.gt_statfs_quantum = 30; 1119 sdp->sd_tune.gt_statfs_quantum = 30;
1126 } 1120 }
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
deleted file mode 100644
index 09e436a50723..000000000000
--- a/fs/gfs2/ops_inode.c
+++ /dev/null
@@ -1,1344 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/slab.h>
11#include <linux/spinlock.h>
12#include <linux/completion.h>
13#include <linux/buffer_head.h>
14#include <linux/namei.h>
15#include <linux/mm.h>
16#include <linux/xattr.h>
17#include <linux/posix_acl.h>
18#include <linux/gfs2_ondisk.h>
19#include <linux/crc32.h>
20#include <linux/fiemap.h>
21#include <asm/uaccess.h>
22
23#include "gfs2.h"
24#include "incore.h"
25#include "acl.h"
26#include "bmap.h"
27#include "dir.h"
28#include "xattr.h"
29#include "glock.h"
30#include "inode.h"
31#include "meta_io.h"
32#include "quota.h"
33#include "rgrp.h"
34#include "trans.h"
35#include "util.h"
36#include "super.h"
37
38/**
39 * gfs2_create - Create a file
40 * @dir: The directory in which to create the file
41 * @dentry: The dentry of the new file
42 * @mode: The mode of the new file
43 *
44 * Returns: errno
45 */
46
47static int gfs2_create(struct inode *dir, struct dentry *dentry,
48 int mode, struct nameidata *nd)
49{
50 struct gfs2_inode *dip = GFS2_I(dir);
51 struct gfs2_sbd *sdp = GFS2_SB(dir);
52 struct gfs2_holder ghs[2];
53 struct inode *inode;
54
55 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
56
57 for (;;) {
58 inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0);
59 if (!IS_ERR(inode)) {
60 gfs2_trans_end(sdp);
61 if (dip->i_alloc->al_rgd)
62 gfs2_inplace_release(dip);
63 gfs2_quota_unlock(dip);
64 gfs2_alloc_put(dip);
65 gfs2_glock_dq_uninit_m(2, ghs);
66 mark_inode_dirty(inode);
67 break;
68 } else if (PTR_ERR(inode) != -EEXIST ||
69 (nd && nd->flags & LOOKUP_EXCL)) {
70 gfs2_holder_uninit(ghs);
71 return PTR_ERR(inode);
72 }
73
74 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
75 if (inode) {
76 if (!IS_ERR(inode)) {
77 gfs2_holder_uninit(ghs);
78 break;
79 } else {
80 gfs2_holder_uninit(ghs);
81 return PTR_ERR(inode);
82 }
83 }
84 }
85
86 d_instantiate(dentry, inode);
87
88 return 0;
89}
90
91/**
92 * gfs2_lookup - Look up a filename in a directory and return its inode
93 * @dir: The directory inode
94 * @dentry: The dentry of the new inode
95 * @nd: passed from Linux VFS, ignored by us
96 *
97 * Called by the VFS layer. Lock dir and call gfs2_lookupi()
98 *
99 * Returns: errno
100 */
101
102static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
103 struct nameidata *nd)
104{
105 struct inode *inode = NULL;
106
107 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
108 if (inode && IS_ERR(inode))
109 return ERR_CAST(inode);
110
111 if (inode) {
112 struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
113 struct gfs2_holder gh;
114 int error;
115 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
116 if (error) {
117 iput(inode);
118 return ERR_PTR(error);
119 }
120 gfs2_glock_dq_uninit(&gh);
121 return d_splice_alias(inode, dentry);
122 }
123 d_add(dentry, inode);
124
125 return NULL;
126}
127
128/**
129 * gfs2_link - Link to a file
130 * @old_dentry: The inode to link
131 * @dir: Add link to this directory
132 * @dentry: The name of the link
133 *
134 * Link the inode in "old_dentry" into the directory "dir" with the
135 * name in "dentry".
136 *
137 * Returns: errno
138 */
139
140static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
141 struct dentry *dentry)
142{
143 struct gfs2_inode *dip = GFS2_I(dir);
144 struct gfs2_sbd *sdp = GFS2_SB(dir);
145 struct inode *inode = old_dentry->d_inode;
146 struct gfs2_inode *ip = GFS2_I(inode);
147 struct gfs2_holder ghs[2];
148 int alloc_required;
149 int error;
150
151 if (S_ISDIR(inode->i_mode))
152 return -EPERM;
153
154 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
155 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
156
157 error = gfs2_glock_nq(ghs); /* parent */
158 if (error)
159 goto out_parent;
160
161 error = gfs2_glock_nq(ghs + 1); /* child */
162 if (error)
163 goto out_child;
164
165 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
166 if (error)
167 goto out_gunlock;
168
169 error = gfs2_dir_check(dir, &dentry->d_name, NULL);
170 switch (error) {
171 case -ENOENT:
172 break;
173 case 0:
174 error = -EEXIST;
175 default:
176 goto out_gunlock;
177 }
178
179 error = -EINVAL;
180 if (!dip->i_inode.i_nlink)
181 goto out_gunlock;
182 error = -EFBIG;
183 if (dip->i_entries == (u32)-1)
184 goto out_gunlock;
185 error = -EPERM;
186 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
187 goto out_gunlock;
188 error = -EINVAL;
189 if (!ip->i_inode.i_nlink)
190 goto out_gunlock;
191 error = -EMLINK;
192 if (ip->i_inode.i_nlink == (u32)-1)
193 goto out_gunlock;
194
195 alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
196 if (error < 0)
197 goto out_gunlock;
198 error = 0;
199
200 if (alloc_required) {
201 struct gfs2_alloc *al = gfs2_alloc_get(dip);
202 if (!al) {
203 error = -ENOMEM;
204 goto out_gunlock;
205 }
206
207 error = gfs2_quota_lock_check(dip);
208 if (error)
209 goto out_alloc;
210
211 al->al_requested = sdp->sd_max_dirres;
212
213 error = gfs2_inplace_reserve(dip);
214 if (error)
215 goto out_gunlock_q;
216
217 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
218 gfs2_rg_blocks(al) +
219 2 * RES_DINODE + RES_STATFS +
220 RES_QUOTA, 0);
221 if (error)
222 goto out_ipres;
223 } else {
224 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
225 if (error)
226 goto out_ipres;
227 }
228
229 error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode));
230 if (error)
231 goto out_end_trans;
232
233 error = gfs2_change_nlink(ip, +1);
234
235out_end_trans:
236 gfs2_trans_end(sdp);
237out_ipres:
238 if (alloc_required)
239 gfs2_inplace_release(dip);
240out_gunlock_q:
241 if (alloc_required)
242 gfs2_quota_unlock(dip);
243out_alloc:
244 if (alloc_required)
245 gfs2_alloc_put(dip);
246out_gunlock:
247 gfs2_glock_dq(ghs + 1);
248out_child:
249 gfs2_glock_dq(ghs);
250out_parent:
251 gfs2_holder_uninit(ghs);
252 gfs2_holder_uninit(ghs + 1);
253 if (!error) {
254 ihold(inode);
255 d_instantiate(dentry, inode);
256 mark_inode_dirty(inode);
257 }
258 return error;
259}
260
261/*
262 * gfs2_unlink_ok - check to see that a inode is still in a directory
263 * @dip: the directory
264 * @name: the name of the file
265 * @ip: the inode
266 *
267 * Assumes that the lock on (at least) @dip is held.
268 *
269 * Returns: 0 if the parent/child relationship is correct, errno if it isn't
270 */
271
272static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
273 const struct gfs2_inode *ip)
274{
275 int error;
276
277 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
278 return -EPERM;
279
280 if ((dip->i_inode.i_mode & S_ISVTX) &&
281 dip->i_inode.i_uid != current_fsuid() &&
282 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
283 return -EPERM;
284
285 if (IS_APPEND(&dip->i_inode))
286 return -EPERM;
287
288 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
289 if (error)
290 return error;
291
292 error = gfs2_dir_check(&dip->i_inode, name, ip);
293 if (error)
294 return error;
295
296 return 0;
297}
298
299/**
300 * gfs2_unlink - Unlink a file
301 * @dir: The inode of the directory containing the file to unlink
302 * @dentry: The file itself
303 *
304 * Unlink a file. Call gfs2_unlinki()
305 *
306 * Returns: errno
307 */
308
309static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
310{
311 struct gfs2_inode *dip = GFS2_I(dir);
312 struct gfs2_sbd *sdp = GFS2_SB(dir);
313 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
314 struct gfs2_holder ghs[3];
315 struct gfs2_rgrpd *rgd;
316 struct gfs2_holder ri_gh;
317 int error;
318
319 error = gfs2_rindex_hold(sdp, &ri_gh);
320 if (error)
321 return error;
322
323 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
324 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
325
326 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
327 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
328
329
330 error = gfs2_glock_nq(ghs); /* parent */
331 if (error)
332 goto out_parent;
333
334 error = gfs2_glock_nq(ghs + 1); /* child */
335 if (error)
336 goto out_child;
337
338 error = gfs2_glock_nq(ghs + 2); /* rgrp */
339 if (error)
340 goto out_rgrp;
341
342 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
343 if (error)
344 goto out_gunlock;
345
346 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
347 if (error)
348 goto out_gunlock;
349
350 error = gfs2_dir_del(dip, &dentry->d_name);
351 if (error)
352 goto out_end_trans;
353
354 error = gfs2_change_nlink(ip, -1);
355
356out_end_trans:
357 gfs2_trans_end(sdp);
358out_gunlock:
359 gfs2_glock_dq(ghs + 2);
360out_rgrp:
361 gfs2_holder_uninit(ghs + 2);
362 gfs2_glock_dq(ghs + 1);
363out_child:
364 gfs2_holder_uninit(ghs + 1);
365 gfs2_glock_dq(ghs);
366out_parent:
367 gfs2_holder_uninit(ghs);
368 gfs2_glock_dq_uninit(&ri_gh);
369 return error;
370}
371
372/**
373 * gfs2_symlink - Create a symlink
374 * @dir: The directory to create the symlink in
375 * @dentry: The dentry to put the symlink in
376 * @symname: The thing which the link points to
377 *
378 * Returns: errno
379 */
380
381static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
382 const char *symname)
383{
384 struct gfs2_inode *dip = GFS2_I(dir), *ip;
385 struct gfs2_sbd *sdp = GFS2_SB(dir);
386 struct gfs2_holder ghs[2];
387 struct inode *inode;
388 struct buffer_head *dibh;
389 int size;
390 int error;
391
392 /* Must be stuffed with a null terminator for gfs2_follow_link() */
393 size = strlen(symname);
394 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
395 return -ENAMETOOLONG;
396
397 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
398
399 inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO, 0);
400 if (IS_ERR(inode)) {
401 gfs2_holder_uninit(ghs);
402 return PTR_ERR(inode);
403 }
404
405 ip = ghs[1].gh_gl->gl_object;
406
407 i_size_write(inode, size);
408
409 error = gfs2_meta_inode_buffer(ip, &dibh);
410
411 if (!gfs2_assert_withdraw(sdp, !error)) {
412 gfs2_dinode_out(ip, dibh->b_data);
413 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
414 size);
415 brelse(dibh);
416 }
417
418 gfs2_trans_end(sdp);
419 if (dip->i_alloc->al_rgd)
420 gfs2_inplace_release(dip);
421 gfs2_quota_unlock(dip);
422 gfs2_alloc_put(dip);
423
424 gfs2_glock_dq_uninit_m(2, ghs);
425
426 d_instantiate(dentry, inode);
427 mark_inode_dirty(inode);
428
429 return 0;
430}
431
432/**
433 * gfs2_mkdir - Make a directory
434 * @dir: The parent directory of the new one
435 * @dentry: The dentry of the new directory
436 * @mode: The mode of the new directory
437 *
438 * Returns: errno
439 */
440
441static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
442{
443 struct gfs2_inode *dip = GFS2_I(dir), *ip;
444 struct gfs2_sbd *sdp = GFS2_SB(dir);
445 struct gfs2_holder ghs[2];
446 struct inode *inode;
447 struct buffer_head *dibh;
448 int error;
449
450 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
451
452 inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode, 0);
453 if (IS_ERR(inode)) {
454 gfs2_holder_uninit(ghs);
455 return PTR_ERR(inode);
456 }
457
458 ip = ghs[1].gh_gl->gl_object;
459
460 ip->i_inode.i_nlink = 2;
461 i_size_write(inode, sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
462 ip->i_diskflags |= GFS2_DIF_JDATA;
463 ip->i_entries = 2;
464
465 error = gfs2_meta_inode_buffer(ip, &dibh);
466
467 if (!gfs2_assert_withdraw(sdp, !error)) {
468 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
469 struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
470
471 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
472 gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
473 dent->de_inum = di->di_num; /* already GFS2 endian */
474 dent->de_type = cpu_to_be16(DT_DIR);
475 di->di_entries = cpu_to_be32(1);
476
477 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
478 gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
479
480 gfs2_inum_out(dip, dent);
481 dent->de_type = cpu_to_be16(DT_DIR);
482
483 gfs2_dinode_out(ip, di);
484
485 brelse(dibh);
486 }
487
488 error = gfs2_change_nlink(dip, +1);
489 gfs2_assert_withdraw(sdp, !error); /* dip already pinned */
490
491 gfs2_trans_end(sdp);
492 if (dip->i_alloc->al_rgd)
493 gfs2_inplace_release(dip);
494 gfs2_quota_unlock(dip);
495 gfs2_alloc_put(dip);
496
497 gfs2_glock_dq_uninit_m(2, ghs);
498
499 d_instantiate(dentry, inode);
500 mark_inode_dirty(inode);
501
502 return 0;
503}
504
505/**
506 * gfs2_rmdiri - Remove a directory
507 * @dip: The parent directory of the directory to be removed
508 * @name: The name of the directory to be removed
509 * @ip: The GFS2 inode of the directory to be removed
510 *
511 * Assumes Glocks on dip and ip are held
512 *
513 * Returns: errno
514 */
515
516static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
517 struct gfs2_inode *ip)
518{
519 int error;
520
521 if (ip->i_entries != 2) {
522 if (gfs2_consist_inode(ip))
523 gfs2_dinode_print(ip);
524 return -EIO;
525 }
526
527 error = gfs2_dir_del(dip, name);
528 if (error)
529 return error;
530
531 error = gfs2_change_nlink(dip, -1);
532 if (error)
533 return error;
534
535 error = gfs2_dir_del(ip, &gfs2_qdot);
536 if (error)
537 return error;
538
539 error = gfs2_dir_del(ip, &gfs2_qdotdot);
540 if (error)
541 return error;
542
543 /* It looks odd, but it really should be done twice */
544 error = gfs2_change_nlink(ip, -1);
545 if (error)
546 return error;
547
548 error = gfs2_change_nlink(ip, -1);
549 if (error)
550 return error;
551
552 return error;
553}
554
555/**
556 * gfs2_rmdir - Remove a directory
557 * @dir: The parent directory of the directory to be removed
558 * @dentry: The dentry of the directory to remove
559 *
560 * Remove a directory. Call gfs2_rmdiri()
561 *
562 * Returns: errno
563 */
564
565static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
566{
567 struct gfs2_inode *dip = GFS2_I(dir);
568 struct gfs2_sbd *sdp = GFS2_SB(dir);
569 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
570 struct gfs2_holder ghs[3];
571 struct gfs2_rgrpd *rgd;
572 struct gfs2_holder ri_gh;
573 int error;
574
575 error = gfs2_rindex_hold(sdp, &ri_gh);
576 if (error)
577 return error;
578 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
579 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
580
581 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
582 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
583
584 error = gfs2_glock_nq(ghs); /* parent */
585 if (error)
586 goto out_parent;
587
588 error = gfs2_glock_nq(ghs + 1); /* child */
589 if (error)
590 goto out_child;
591
592 error = gfs2_glock_nq(ghs + 2); /* rgrp */
593 if (error)
594 goto out_rgrp;
595
596 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
597 if (error)
598 goto out_gunlock;
599
600 if (ip->i_entries < 2) {
601 if (gfs2_consist_inode(ip))
602 gfs2_dinode_print(ip);
603 error = -EIO;
604 goto out_gunlock;
605 }
606 if (ip->i_entries > 2) {
607 error = -ENOTEMPTY;
608 goto out_gunlock;
609 }
610
611 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF + RES_RG_BIT, 0);
612 if (error)
613 goto out_gunlock;
614
615 error = gfs2_rmdiri(dip, &dentry->d_name, ip);
616
617 gfs2_trans_end(sdp);
618
619out_gunlock:
620 gfs2_glock_dq(ghs + 2);
621out_rgrp:
622 gfs2_holder_uninit(ghs + 2);
623 gfs2_glock_dq(ghs + 1);
624out_child:
625 gfs2_holder_uninit(ghs + 1);
626 gfs2_glock_dq(ghs);
627out_parent:
628 gfs2_holder_uninit(ghs);
629 gfs2_glock_dq_uninit(&ri_gh);
630 return error;
631}
632
633/**
634 * gfs2_mknod - Make a special file
635 * @dir: The directory in which the special file will reside
636 * @dentry: The dentry of the special file
637 * @mode: The mode of the special file
638 * @rdev: The device specification of the special file
639 *
640 */
641
642static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
643 dev_t dev)
644{
645 struct gfs2_inode *dip = GFS2_I(dir);
646 struct gfs2_sbd *sdp = GFS2_SB(dir);
647 struct gfs2_holder ghs[2];
648 struct inode *inode;
649
650 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
651
652 inode = gfs2_createi(ghs, &dentry->d_name, mode, dev);
653 if (IS_ERR(inode)) {
654 gfs2_holder_uninit(ghs);
655 return PTR_ERR(inode);
656 }
657
658 gfs2_trans_end(sdp);
659 if (dip->i_alloc->al_rgd)
660 gfs2_inplace_release(dip);
661 gfs2_quota_unlock(dip);
662 gfs2_alloc_put(dip);
663
664 gfs2_glock_dq_uninit_m(2, ghs);
665
666 d_instantiate(dentry, inode);
667 mark_inode_dirty(inode);
668
669 return 0;
670}
671
672/*
673 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
674 * @this: move this
675 * @to: to here
676 *
677 * Follow @to back to the root and make sure we don't encounter @this
678 * Assumes we already hold the rename lock.
679 *
680 * Returns: errno
681 */
682
683static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
684{
685 struct inode *dir = &to->i_inode;
686 struct super_block *sb = dir->i_sb;
687 struct inode *tmp;
688 int error = 0;
689
690 igrab(dir);
691
692 for (;;) {
693 if (dir == &this->i_inode) {
694 error = -EINVAL;
695 break;
696 }
697 if (dir == sb->s_root->d_inode) {
698 error = 0;
699 break;
700 }
701
702 tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
703 if (IS_ERR(tmp)) {
704 error = PTR_ERR(tmp);
705 break;
706 }
707
708 iput(dir);
709 dir = tmp;
710 }
711
712 iput(dir);
713
714 return error;
715}
716
717/**
718 * gfs2_rename - Rename a file
719 * @odir: Parent directory of old file name
720 * @odentry: The old dentry of the file
721 * @ndir: Parent directory of new file name
722 * @ndentry: The new dentry of the file
723 *
724 * Returns: errno
725 */
726
727static int gfs2_rename(struct inode *odir, struct dentry *odentry,
728 struct inode *ndir, struct dentry *ndentry)
729{
730 struct gfs2_inode *odip = GFS2_I(odir);
731 struct gfs2_inode *ndip = GFS2_I(ndir);
732 struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
733 struct gfs2_inode *nip = NULL;
734 struct gfs2_sbd *sdp = GFS2_SB(odir);
735 struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
736 struct gfs2_rgrpd *nrgd;
737 unsigned int num_gh;
738 int dir_rename = 0;
739 int alloc_required = 0;
740 unsigned int x;
741 int error;
742
743 if (ndentry->d_inode) {
744 nip = GFS2_I(ndentry->d_inode);
745 if (ip == nip)
746 return 0;
747 }
748
749 error = gfs2_rindex_hold(sdp, &ri_gh);
750 if (error)
751 return error;
752
753 if (odip != ndip) {
754 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
755 0, &r_gh);
756 if (error)
757 goto out;
758
759 if (S_ISDIR(ip->i_inode.i_mode)) {
760 dir_rename = 1;
761 /* don't move a dirctory into it's subdir */
762 error = gfs2_ok_to_move(ip, ndip);
763 if (error)
764 goto out_gunlock_r;
765 }
766 }
767
768 num_gh = 1;
769 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
770 if (odip != ndip) {
771 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
772 num_gh++;
773 }
774 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
775 num_gh++;
776
777 if (nip) {
778 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
779 num_gh++;
780 /* grab the resource lock for unlink flag twiddling
781 * this is the case of the target file already existing
782 * so we unlink before doing the rename
783 */
784 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
785 if (nrgd)
786 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
787 }
788
789 for (x = 0; x < num_gh; x++) {
790 error = gfs2_glock_nq(ghs + x);
791 if (error)
792 goto out_gunlock;
793 }
794
795 /* Check out the old directory */
796
797 error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
798 if (error)
799 goto out_gunlock;
800
801 /* Check out the new directory */
802
803 if (nip) {
804 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
805 if (error)
806 goto out_gunlock;
807
808 if (S_ISDIR(nip->i_inode.i_mode)) {
809 if (nip->i_entries < 2) {
810 if (gfs2_consist_inode(nip))
811 gfs2_dinode_print(nip);
812 error = -EIO;
813 goto out_gunlock;
814 }
815 if (nip->i_entries > 2) {
816 error = -ENOTEMPTY;
817 goto out_gunlock;
818 }
819 }
820 } else {
821 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
822 if (error)
823 goto out_gunlock;
824
825 error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
826 switch (error) {
827 case -ENOENT:
828 error = 0;
829 break;
830 case 0:
831 error = -EEXIST;
832 default:
833 goto out_gunlock;
834 };
835
836 if (odip != ndip) {
837 if (!ndip->i_inode.i_nlink) {
838 error = -EINVAL;
839 goto out_gunlock;
840 }
841 if (ndip->i_entries == (u32)-1) {
842 error = -EFBIG;
843 goto out_gunlock;
844 }
845 if (S_ISDIR(ip->i_inode.i_mode) &&
846 ndip->i_inode.i_nlink == (u32)-1) {
847 error = -EMLINK;
848 goto out_gunlock;
849 }
850 }
851 }
852
853 /* Check out the dir to be renamed */
854
855 if (dir_rename) {
856 error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
857 if (error)
858 goto out_gunlock;
859 }
860
861 if (nip == NULL)
862 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
863 error = alloc_required;
864 if (error < 0)
865 goto out_gunlock;
866 error = 0;
867
868 if (alloc_required) {
869 struct gfs2_alloc *al = gfs2_alloc_get(ndip);
870 if (!al) {
871 error = -ENOMEM;
872 goto out_gunlock;
873 }
874
875 error = gfs2_quota_lock_check(ndip);
876 if (error)
877 goto out_alloc;
878
879 al->al_requested = sdp->sd_max_dirres;
880
881 error = gfs2_inplace_reserve_ri(ndip);
882 if (error)
883 goto out_gunlock_q;
884
885 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
886 gfs2_rg_blocks(al) +
887 4 * RES_DINODE + 4 * RES_LEAF +
888 RES_STATFS + RES_QUOTA + 4, 0);
889 if (error)
890 goto out_ipreserv;
891 } else {
892 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
893 5 * RES_LEAF + 4, 0);
894 if (error)
895 goto out_gunlock;
896 }
897
898 /* Remove the target file, if it exists */
899
900 if (nip) {
901 if (S_ISDIR(nip->i_inode.i_mode))
902 error = gfs2_rmdiri(ndip, &ndentry->d_name, nip);
903 else {
904 error = gfs2_dir_del(ndip, &ndentry->d_name);
905 if (error)
906 goto out_end_trans;
907 error = gfs2_change_nlink(nip, -1);
908 }
909 if (error)
910 goto out_end_trans;
911 }
912
913 if (dir_rename) {
914 error = gfs2_change_nlink(ndip, +1);
915 if (error)
916 goto out_end_trans;
917 error = gfs2_change_nlink(odip, -1);
918 if (error)
919 goto out_end_trans;
920
921 error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
922 if (error)
923 goto out_end_trans;
924 } else {
925 struct buffer_head *dibh;
926 error = gfs2_meta_inode_buffer(ip, &dibh);
927 if (error)
928 goto out_end_trans;
929 ip->i_inode.i_ctime = CURRENT_TIME;
930 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
931 gfs2_dinode_out(ip, dibh->b_data);
932 brelse(dibh);
933 }
934
935 error = gfs2_dir_del(odip, &odentry->d_name);
936 if (error)
937 goto out_end_trans;
938
939 error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode));
940 if (error)
941 goto out_end_trans;
942
943out_end_trans:
944 gfs2_trans_end(sdp);
945out_ipreserv:
946 if (alloc_required)
947 gfs2_inplace_release(ndip);
948out_gunlock_q:
949 if (alloc_required)
950 gfs2_quota_unlock(ndip);
951out_alloc:
952 if (alloc_required)
953 gfs2_alloc_put(ndip);
954out_gunlock:
955 while (x--) {
956 gfs2_glock_dq(ghs + x);
957 gfs2_holder_uninit(ghs + x);
958 }
959out_gunlock_r:
960 if (r_gh.gh_gl)
961 gfs2_glock_dq_uninit(&r_gh);
962out:
963 gfs2_glock_dq_uninit(&ri_gh);
964 return error;
965}
966
967/**
968 * gfs2_follow_link - Follow a symbolic link
969 * @dentry: The dentry of the link
970 * @nd: Data that we pass to vfs_follow_link()
971 *
972 * This can handle symlinks of any size.
973 *
974 * Returns: 0 on success or error code
975 */
976
977static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
978{
979 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
980 struct gfs2_holder i_gh;
981 struct buffer_head *dibh;
982 unsigned int x, size;
983 char *buf;
984 int error;
985
986 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
987 error = gfs2_glock_nq(&i_gh);
988 if (error) {
989 gfs2_holder_uninit(&i_gh);
990 nd_set_link(nd, ERR_PTR(error));
991 return NULL;
992 }
993
994 size = (unsigned int)i_size_read(&ip->i_inode);
995 if (size == 0) {
996 gfs2_consist_inode(ip);
997 buf = ERR_PTR(-EIO);
998 goto out;
999 }
1000
1001 error = gfs2_meta_inode_buffer(ip, &dibh);
1002 if (error) {
1003 buf = ERR_PTR(error);
1004 goto out;
1005 }
1006
1007 x = size + 1;
1008 buf = kmalloc(x, GFP_NOFS);
1009 if (!buf)
1010 buf = ERR_PTR(-ENOMEM);
1011 else
1012 memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
1013 brelse(dibh);
1014out:
1015 gfs2_glock_dq_uninit(&i_gh);
1016 nd_set_link(nd, buf);
1017 return NULL;
1018}
1019
1020static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1021{
1022 char *s = nd_get_link(nd);
1023 if (!IS_ERR(s))
1024 kfree(s);
1025}
1026
1027/**
1028 * gfs2_permission -
1029 * @inode: The inode
1030 * @mask: The mask to be tested
1031 * @flags: Indicates whether this is an RCU path walk or not
1032 *
1033 * This may be called from the VFS directly, or from within GFS2 with the
1034 * inode locked, so we look to see if the glock is already locked and only
1035 * lock the glock if its not already been done.
1036 *
1037 * Returns: errno
1038 */
1039
1040int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1041{
1042 struct gfs2_inode *ip;
1043 struct gfs2_holder i_gh;
1044 int error;
1045 int unlock = 0;
1046
1047
1048 ip = GFS2_I(inode);
1049 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1050 if (flags & IPERM_FLAG_RCU)
1051 return -ECHILD;
1052 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1053 if (error)
1054 return error;
1055 unlock = 1;
1056 }
1057
1058 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
1059 error = -EACCES;
1060 else
1061 error = generic_permission(inode, mask, flags, gfs2_check_acl);
1062 if (unlock)
1063 gfs2_glock_dq_uninit(&i_gh);
1064
1065 return error;
1066}
1067
1068static int setattr_chown(struct inode *inode, struct iattr *attr)
1069{
1070 struct gfs2_inode *ip = GFS2_I(inode);
1071 struct gfs2_sbd *sdp = GFS2_SB(inode);
1072 u32 ouid, ogid, nuid, ngid;
1073 int error;
1074
1075 ouid = inode->i_uid;
1076 ogid = inode->i_gid;
1077 nuid = attr->ia_uid;
1078 ngid = attr->ia_gid;
1079
1080 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
1081 ouid = nuid = NO_QUOTA_CHANGE;
1082 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
1083 ogid = ngid = NO_QUOTA_CHANGE;
1084
1085 if (!gfs2_alloc_get(ip))
1086 return -ENOMEM;
1087
1088 error = gfs2_quota_lock(ip, nuid, ngid);
1089 if (error)
1090 goto out_alloc;
1091
1092 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1093 error = gfs2_quota_check(ip, nuid, ngid);
1094 if (error)
1095 goto out_gunlock_q;
1096 }
1097
1098 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
1099 if (error)
1100 goto out_gunlock_q;
1101
1102 error = gfs2_setattr_simple(ip, attr);
1103 if (error)
1104 goto out_end_trans;
1105
1106 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1107 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
1108 gfs2_quota_change(ip, -blocks, ouid, ogid);
1109 gfs2_quota_change(ip, blocks, nuid, ngid);
1110 }
1111
1112out_end_trans:
1113 gfs2_trans_end(sdp);
1114out_gunlock_q:
1115 gfs2_quota_unlock(ip);
1116out_alloc:
1117 gfs2_alloc_put(ip);
1118 return error;
1119}
1120
1121/**
1122 * gfs2_setattr - Change attributes on an inode
1123 * @dentry: The dentry which is changing
1124 * @attr: The structure describing the change
1125 *
1126 * The VFS layer wants to change one or more of an inodes attributes. Write
1127 * that change out to disk.
1128 *
1129 * Returns: errno
1130 */
1131
1132static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
1133{
1134 struct inode *inode = dentry->d_inode;
1135 struct gfs2_inode *ip = GFS2_I(inode);
1136 struct gfs2_holder i_gh;
1137 int error;
1138
1139 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1140 if (error)
1141 return error;
1142
1143 error = -EPERM;
1144 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1145 goto out;
1146
1147 error = inode_change_ok(inode, attr);
1148 if (error)
1149 goto out;
1150
1151 if (attr->ia_valid & ATTR_SIZE)
1152 error = gfs2_setattr_size(inode, attr->ia_size);
1153 else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
1154 error = setattr_chown(inode, attr);
1155 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
1156 error = gfs2_acl_chmod(ip, attr);
1157 else
1158 error = gfs2_setattr_simple(ip, attr);
1159
1160out:
1161 gfs2_glock_dq_uninit(&i_gh);
1162 if (!error)
1163 mark_inode_dirty(inode);
1164 return error;
1165}
1166
1167/**
1168 * gfs2_getattr - Read out an inode's attributes
1169 * @mnt: The vfsmount the inode is being accessed from
1170 * @dentry: The dentry to stat
1171 * @stat: The inode's stats
1172 *
1173 * This may be called from the VFS directly, or from within GFS2 with the
1174 * inode locked, so we look to see if the glock is already locked and only
1175 * lock the glock if its not already been done. Note that its the NFS
1176 * readdirplus operation which causes this to be called (from filldir)
1177 * with the glock already held.
1178 *
1179 * Returns: errno
1180 */
1181
1182static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1183 struct kstat *stat)
1184{
1185 struct inode *inode = dentry->d_inode;
1186 struct gfs2_inode *ip = GFS2_I(inode);
1187 struct gfs2_holder gh;
1188 int error;
1189 int unlock = 0;
1190
1191 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1192 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1193 if (error)
1194 return error;
1195 unlock = 1;
1196 }
1197
1198 generic_fillattr(inode, stat);
1199 if (unlock)
1200 gfs2_glock_dq_uninit(&gh);
1201
1202 return 0;
1203}
1204
1205static int gfs2_setxattr(struct dentry *dentry, const char *name,
1206 const void *data, size_t size, int flags)
1207{
1208 struct inode *inode = dentry->d_inode;
1209 struct gfs2_inode *ip = GFS2_I(inode);
1210 struct gfs2_holder gh;
1211 int ret;
1212
1213 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1214 ret = gfs2_glock_nq(&gh);
1215 if (ret == 0) {
1216 ret = generic_setxattr(dentry, name, data, size, flags);
1217 gfs2_glock_dq(&gh);
1218 }
1219 gfs2_holder_uninit(&gh);
1220 return ret;
1221}
1222
1223static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1224 void *data, size_t size)
1225{
1226 struct inode *inode = dentry->d_inode;
1227 struct gfs2_inode *ip = GFS2_I(inode);
1228 struct gfs2_holder gh;
1229 int ret;
1230
1231 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1232 ret = gfs2_glock_nq(&gh);
1233 if (ret == 0) {
1234 ret = generic_getxattr(dentry, name, data, size);
1235 gfs2_glock_dq(&gh);
1236 }
1237 gfs2_holder_uninit(&gh);
1238 return ret;
1239}
1240
1241static int gfs2_removexattr(struct dentry *dentry, const char *name)
1242{
1243 struct inode *inode = dentry->d_inode;
1244 struct gfs2_inode *ip = GFS2_I(inode);
1245 struct gfs2_holder gh;
1246 int ret;
1247
1248 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1249 ret = gfs2_glock_nq(&gh);
1250 if (ret == 0) {
1251 ret = generic_removexattr(dentry, name);
1252 gfs2_glock_dq(&gh);
1253 }
1254 gfs2_holder_uninit(&gh);
1255 return ret;
1256}
1257
1258static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1259 u64 start, u64 len)
1260{
1261 struct gfs2_inode *ip = GFS2_I(inode);
1262 struct gfs2_holder gh;
1263 int ret;
1264
1265 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
1266 if (ret)
1267 return ret;
1268
1269 mutex_lock(&inode->i_mutex);
1270
1271 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
1272 if (ret)
1273 goto out;
1274
1275 if (gfs2_is_stuffed(ip)) {
1276 u64 phys = ip->i_no_addr << inode->i_blkbits;
1277 u64 size = i_size_read(inode);
1278 u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
1279 FIEMAP_EXTENT_DATA_INLINE;
1280 phys += sizeof(struct gfs2_dinode);
1281 phys += start;
1282 if (start + len > size)
1283 len = size - start;
1284 if (start < size)
1285 ret = fiemap_fill_next_extent(fieinfo, start, phys,
1286 len, flags);
1287 if (ret == 1)
1288 ret = 0;
1289 } else {
1290 ret = __generic_block_fiemap(inode, fieinfo, start, len,
1291 gfs2_block_map);
1292 }
1293
1294 gfs2_glock_dq_uninit(&gh);
1295out:
1296 mutex_unlock(&inode->i_mutex);
1297 return ret;
1298}
1299
1300const struct inode_operations gfs2_file_iops = {
1301 .permission = gfs2_permission,
1302 .setattr = gfs2_setattr,
1303 .getattr = gfs2_getattr,
1304 .setxattr = gfs2_setxattr,
1305 .getxattr = gfs2_getxattr,
1306 .listxattr = gfs2_listxattr,
1307 .removexattr = gfs2_removexattr,
1308 .fiemap = gfs2_fiemap,
1309};
1310
1311const struct inode_operations gfs2_dir_iops = {
1312 .create = gfs2_create,
1313 .lookup = gfs2_lookup,
1314 .link = gfs2_link,
1315 .unlink = gfs2_unlink,
1316 .symlink = gfs2_symlink,
1317 .mkdir = gfs2_mkdir,
1318 .rmdir = gfs2_rmdir,
1319 .mknod = gfs2_mknod,
1320 .rename = gfs2_rename,
1321 .permission = gfs2_permission,
1322 .setattr = gfs2_setattr,
1323 .getattr = gfs2_getattr,
1324 .setxattr = gfs2_setxattr,
1325 .getxattr = gfs2_getxattr,
1326 .listxattr = gfs2_listxattr,
1327 .removexattr = gfs2_removexattr,
1328 .fiemap = gfs2_fiemap,
1329};
1330
1331const struct inode_operations gfs2_symlink_iops = {
1332 .readlink = generic_readlink,
1333 .follow_link = gfs2_follow_link,
1334 .put_link = gfs2_put_link,
1335 .permission = gfs2_permission,
1336 .setattr = gfs2_setattr,
1337 .getattr = gfs2_getattr,
1338 .setxattr = gfs2_setxattr,
1339 .getxattr = gfs2_getxattr,
1340 .listxattr = gfs2_listxattr,
1341 .removexattr = gfs2_removexattr,
1342 .fiemap = gfs2_fiemap,
1343};
1344
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e23d9864c418..42e8d23bc047 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -38,6 +38,7 @@
38 38
39#include <linux/sched.h> 39#include <linux/sched.h>
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/mm.h>
41#include <linux/spinlock.h> 42#include <linux/spinlock.h>
42#include <linux/completion.h> 43#include <linux/completion.h>
43#include <linux/buffer_head.h> 44#include <linux/buffer_head.h>
@@ -77,19 +78,20 @@ static LIST_HEAD(qd_lru_list);
77static atomic_t qd_lru_count = ATOMIC_INIT(0); 78static atomic_t qd_lru_count = ATOMIC_INIT(0);
78static DEFINE_SPINLOCK(qd_lru_lock); 79static DEFINE_SPINLOCK(qd_lru_lock);
79 80
80int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 81int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc)
81{ 82{
82 struct gfs2_quota_data *qd; 83 struct gfs2_quota_data *qd;
83 struct gfs2_sbd *sdp; 84 struct gfs2_sbd *sdp;
85 int nr_to_scan = sc->nr_to_scan;
84 86
85 if (nr == 0) 87 if (nr_to_scan == 0)
86 goto out; 88 goto out;
87 89
88 if (!(gfp_mask & __GFP_FS)) 90 if (!(sc->gfp_mask & __GFP_FS))
89 return -1; 91 return -1;
90 92
91 spin_lock(&qd_lru_lock); 93 spin_lock(&qd_lru_lock);
92 while (nr && !list_empty(&qd_lru_list)) { 94 while (nr_to_scan && !list_empty(&qd_lru_list)) {
93 qd = list_entry(qd_lru_list.next, 95 qd = list_entry(qd_lru_list.next,
94 struct gfs2_quota_data, qd_reclaim); 96 struct gfs2_quota_data, qd_reclaim);
95 sdp = qd->qd_gl->gl_sbd; 97 sdp = qd->qd_gl->gl_sbd;
@@ -110,7 +112,7 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
110 spin_unlock(&qd_lru_lock); 112 spin_unlock(&qd_lru_lock);
111 kmem_cache_free(gfs2_quotad_cachep, qd); 113 kmem_cache_free(gfs2_quotad_cachep, qd);
112 spin_lock(&qd_lru_lock); 114 spin_lock(&qd_lru_lock);
113 nr--; 115 nr_to_scan--;
114 } 116 }
115 spin_unlock(&qd_lru_lock); 117 spin_unlock(&qd_lru_lock);
116 118
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index e7d236ca48bd..90bf1c302a98 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -12,6 +12,7 @@
12 12
13struct gfs2_inode; 13struct gfs2_inode;
14struct gfs2_sbd; 14struct gfs2_sbd;
15struct shrink_control;
15 16
16#define NO_QUOTA_CHANGE ((u32)-1) 17#define NO_QUOTA_CHANGE ((u32)-1)
17 18
@@ -51,7 +52,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
51 return ret; 52 return ret;
52} 53}
53 54
54extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask); 55extern int gfs2_shrink_qd_memory(struct shrinker *shrink,
56 struct shrink_control *sc);
55extern const struct quotactl_ops gfs2_quotactl_ops; 57extern const struct quotactl_ops gfs2_quotactl_ops;
56 58
57#endif /* __QUOTA_DOT_H__ */ 59#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 6fcae8469f6d..9b780df3fd54 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -78,10 +78,11 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
78 78
79static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, 79static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
80 unsigned char *buf2, unsigned int offset, 80 unsigned char *buf2, unsigned int offset,
81 unsigned int buflen, u32 block, 81 struct gfs2_bitmap *bi, u32 block,
82 unsigned char new_state) 82 unsigned char new_state)
83{ 83{
84 unsigned char *byte1, *byte2, *end, cur_state; 84 unsigned char *byte1, *byte2, *end, cur_state;
85 unsigned int buflen = bi->bi_len;
85 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; 86 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
86 87
87 byte1 = buf1 + offset + (block / GFS2_NBBY); 88 byte1 = buf1 + offset + (block / GFS2_NBBY);
@@ -92,6 +93,16 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
92 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; 93 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
93 94
94 if (unlikely(!valid_change[new_state * 4 + cur_state])) { 95 if (unlikely(!valid_change[new_state * 4 + cur_state])) {
96 printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, "
97 "new_state=%d\n",
98 (unsigned long long)block, cur_state, new_state);
99 printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n",
100 (unsigned long long)rgd->rd_addr,
101 (unsigned long)bi->bi_start);
102 printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n",
103 (unsigned long)bi->bi_offset,
104 (unsigned long)bi->bi_len);
105 dump_stack();
95 gfs2_consist_rgrpd(rgd); 106 gfs2_consist_rgrpd(rgd);
96 return; 107 return;
97 } 108 }
@@ -381,6 +392,7 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp)
381 392
382 if (gl) { 393 if (gl) {
383 gl->gl_object = NULL; 394 gl->gl_object = NULL;
395 gfs2_glock_add_to_lru(gl);
384 gfs2_glock_put(gl); 396 gfs2_glock_put(gl);
385 } 397 }
386 398
@@ -1365,7 +1377,7 @@ skip:
1365 1377
1366 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1378 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1367 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1379 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1368 bi->bi_len, blk, new_state); 1380 bi, blk, new_state);
1369 goal = blk; 1381 goal = blk;
1370 while (*n < elen) { 1382 while (*n < elen) {
1371 goal++; 1383 goal++;
@@ -1375,7 +1387,7 @@ skip:
1375 GFS2_BLKST_FREE) 1387 GFS2_BLKST_FREE)
1376 break; 1388 break;
1377 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1389 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1378 bi->bi_len, goal, new_state); 1390 bi, goal, new_state);
1379 (*n)++; 1391 (*n)++;
1380 } 1392 }
1381out: 1393out:
@@ -1432,7 +1444,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1432 } 1444 }
1433 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1445 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1434 gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset, 1446 gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset,
1435 bi->bi_len, buf_blk, new_state); 1447 bi, buf_blk, new_state);
1436 } 1448 }
1437 1449
1438 return rgd; 1450 return rgd;
@@ -1617,6 +1629,10 @@ void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1617 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1629 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1618 1630
1619 gfs2_trans_add_rg(rgd); 1631 gfs2_trans_add_rg(rgd);
1632
1633 /* Directories keep their data in the metadata address space */
1634 if (ip->i_depth)
1635 gfs2_meta_wipe(ip, bstart, blen);
1620} 1636}
1621 1637
1622/** 1638/**
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b9f28e66dad1..ed540e7018be 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -23,6 +23,7 @@
23#include <linux/time.h> 23#include <linux/time.h>
24#include <linux/wait.h> 24#include <linux/wait.h>
25#include <linux/writeback.h> 25#include <linux/writeback.h>
26#include <linux/backing-dev.h>
26 27
27#include "gfs2.h" 28#include "gfs2.h"
28#include "incore.h" 29#include "incore.h"
@@ -700,11 +701,47 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
700 mutex_unlock(&sdp->sd_freeze_lock); 701 mutex_unlock(&sdp->sd_freeze_lock);
701} 702}
702 703
704void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
705{
706 struct gfs2_dinode *str = buf;
707
708 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
709 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
710 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
711 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
712 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
713 str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
714 str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
715 str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
716 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
717 str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
718 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
719 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
720 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
721 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
722
723 str->di_goal_meta = cpu_to_be64(ip->i_goal);
724 str->di_goal_data = cpu_to_be64(ip->i_goal);
725 str->di_generation = cpu_to_be64(ip->i_generation);
726
727 str->di_flags = cpu_to_be32(ip->i_diskflags);
728 str->di_height = cpu_to_be16(ip->i_height);
729 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
730 !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
731 GFS2_FORMAT_DE : 0);
732 str->di_depth = cpu_to_be16(ip->i_depth);
733 str->di_entries = cpu_to_be32(ip->i_entries);
734
735 str->di_eattr = cpu_to_be64(ip->i_eattr);
736 str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
737 str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
738 str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
739}
703 740
704/** 741/**
705 * gfs2_write_inode - Make sure the inode is stable on the disk 742 * gfs2_write_inode - Make sure the inode is stable on the disk
706 * @inode: The inode 743 * @inode: The inode
707 * @sync: synchronous write flag 744 * @wbc: The writeback control structure
708 * 745 *
709 * Returns: errno 746 * Returns: errno
710 */ 747 */
@@ -713,15 +750,17 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
713{ 750{
714 struct gfs2_inode *ip = GFS2_I(inode); 751 struct gfs2_inode *ip = GFS2_I(inode);
715 struct gfs2_sbd *sdp = GFS2_SB(inode); 752 struct gfs2_sbd *sdp = GFS2_SB(inode);
753 struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
754 struct backing_dev_info *bdi = metamapping->backing_dev_info;
716 struct gfs2_holder gh; 755 struct gfs2_holder gh;
717 struct buffer_head *bh; 756 struct buffer_head *bh;
718 struct timespec atime; 757 struct timespec atime;
719 struct gfs2_dinode *di; 758 struct gfs2_dinode *di;
720 int ret = 0; 759 int ret = -EAGAIN;
721 760
722 /* Check this is a "normal" inode, etc */ 761 /* Skip timestamp update, if this is from a memalloc */
723 if (current->flags & PF_MEMALLOC) 762 if (current->flags & PF_MEMALLOC)
724 return 0; 763 goto do_flush;
725 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 764 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
726 if (ret) 765 if (ret)
727 goto do_flush; 766 goto do_flush;
@@ -745,6 +784,13 @@ do_unlock:
745do_flush: 784do_flush:
746 if (wbc->sync_mode == WB_SYNC_ALL) 785 if (wbc->sync_mode == WB_SYNC_ALL)
747 gfs2_log_flush(GFS2_SB(inode), ip->i_gl); 786 gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
787 filemap_fdatawrite(metamapping);
788 if (bdi->dirty_exceeded)
789 gfs2_ail1_flush(sdp, wbc);
790 if (!ret && (wbc->sync_mode == WB_SYNC_ALL))
791 ret = filemap_fdatawait(metamapping);
792 if (ret)
793 mark_inode_dirty_sync(inode);
748 return ret; 794 return ret;
749} 795}
750 796
@@ -874,8 +920,9 @@ restart:
874 920
875static int gfs2_sync_fs(struct super_block *sb, int wait) 921static int gfs2_sync_fs(struct super_block *sb, int wait)
876{ 922{
877 if (wait && sb->s_fs_info) 923 struct gfs2_sbd *sdp = sb->s_fs_info;
878 gfs2_log_flush(sb->s_fs_info, NULL); 924 if (wait && sdp)
925 gfs2_log_flush(sdp, NULL);
879 return 0; 926 return 0;
880} 927}
881 928
@@ -1308,6 +1355,78 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1308 return 0; 1355 return 0;
1309} 1356}
1310 1357
1358static void gfs2_final_release_pages(struct gfs2_inode *ip)
1359{
1360 struct inode *inode = &ip->i_inode;
1361 struct gfs2_glock *gl = ip->i_gl;
1362
1363 truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0);
1364 truncate_inode_pages(&inode->i_data, 0);
1365
1366 if (atomic_read(&gl->gl_revokes) == 0) {
1367 clear_bit(GLF_LFLUSH, &gl->gl_flags);
1368 clear_bit(GLF_DIRTY, &gl->gl_flags);
1369 }
1370}
1371
1372static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1373{
1374 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1375 struct gfs2_alloc *al;
1376 struct gfs2_rgrpd *rgd;
1377 int error;
1378
1379 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
1380 gfs2_consist_inode(ip);
1381 return -EIO;
1382 }
1383
1384 al = gfs2_alloc_get(ip);
1385 if (!al)
1386 return -ENOMEM;
1387
1388 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1389 if (error)
1390 goto out;
1391
1392 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
1393 if (error)
1394 goto out_qs;
1395
1396 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
1397 if (!rgd) {
1398 gfs2_consist_inode(ip);
1399 error = -EIO;
1400 goto out_rindex_relse;
1401 }
1402
1403 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
1404 &al->al_rgd_gh);
1405 if (error)
1406 goto out_rindex_relse;
1407
1408 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
1409 sdp->sd_jdesc->jd_blocks);
1410 if (error)
1411 goto out_rg_gunlock;
1412
1413 gfs2_free_di(rgd, ip);
1414
1415 gfs2_final_release_pages(ip);
1416
1417 gfs2_trans_end(sdp);
1418
1419out_rg_gunlock:
1420 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1421out_rindex_relse:
1422 gfs2_glock_dq_uninit(&al->al_ri_gh);
1423out_qs:
1424 gfs2_quota_unhold(ip);
1425out:
1426 gfs2_alloc_put(ip);
1427 return error;
1428}
1429
1311/* 1430/*
1312 * We have to (at the moment) hold the inodes main lock to cover 1431 * We have to (at the moment) hold the inodes main lock to cover
1313 * the gap between unlocking the shared lock on the iopen lock and 1432 * the gap between unlocking the shared lock on the iopen lock and
@@ -1371,15 +1490,13 @@ static void gfs2_evict_inode(struct inode *inode)
1371 } 1490 }
1372 1491
1373 error = gfs2_dinode_dealloc(ip); 1492 error = gfs2_dinode_dealloc(ip);
1374 if (error) 1493 goto out_unlock;
1375 goto out_unlock;
1376 1494
1377out_truncate: 1495out_truncate:
1378 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); 1496 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
1379 if (error) 1497 if (error)
1380 goto out_unlock; 1498 goto out_unlock;
1381 /* Needs to be done before glock release & also in a transaction */ 1499 gfs2_final_release_pages(ip);
1382 truncate_inode_pages(&inode->i_data, 0);
1383 gfs2_trans_end(sdp); 1500 gfs2_trans_end(sdp);
1384 1501
1385out_unlock: 1502out_unlock:
@@ -1394,6 +1511,7 @@ out:
1394 end_writeback(inode); 1511 end_writeback(inode);
1395 1512
1396 ip->i_gl->gl_object = NULL; 1513 ip->i_gl->gl_object = NULL;
1514 gfs2_glock_add_to_lru(ip->i_gl);
1397 gfs2_glock_put(ip->i_gl); 1515 gfs2_glock_put(ip->i_gl);
1398 ip->i_gl = NULL; 1516 ip->i_gl = NULL;
1399 if (ip->i_iopen_gh.gh_gl) { 1517 if (ip->i_iopen_gh.gh_gl) {
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 748ccb557c18..e20eab37bc80 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -81,7 +81,8 @@ static int gfs2_uuid_valid(const u8 *uuid)
81 81
82static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf) 82static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
83{ 83{
84 const u8 *uuid = sdp->sd_sb.sb_uuid; 84 struct super_block *s = sdp->sd_vfs;
85 const u8 *uuid = s->s_uuid;
85 buf[0] = '\0'; 86 buf[0] = '\0';
86 if (!gfs2_uuid_valid(uuid)) 87 if (!gfs2_uuid_valid(uuid))
87 return 0; 88 return 0;
@@ -616,7 +617,8 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
616 struct kobj_uevent_env *env) 617 struct kobj_uevent_env *env)
617{ 618{
618 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); 619 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
619 const u8 *uuid = sdp->sd_sb.sb_uuid; 620 struct super_block *s = sdp->sd_vfs;
621 const u8 *uuid = s->s_uuid;
620 622
621 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); 623 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
622 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); 624 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index cedb0bb96d96..5d07609ec57d 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -10,6 +10,7 @@
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/dlmconstants.h> 11#include <linux/dlmconstants.h>
12#include <linux/gfs2_ondisk.h> 12#include <linux/gfs2_ondisk.h>
13#include <linux/writeback.h>
13#include "incore.h" 14#include "incore.h"
14#include "glock.h" 15#include "glock.h"
15 16
@@ -40,7 +41,9 @@
40 {(1UL << GLF_REPLY_PENDING), "r" }, \ 41 {(1UL << GLF_REPLY_PENDING), "r" }, \
41 {(1UL << GLF_INITIAL), "I" }, \ 42 {(1UL << GLF_INITIAL), "I" }, \
42 {(1UL << GLF_FROZEN), "F" }, \ 43 {(1UL << GLF_FROZEN), "F" }, \
43 {(1UL << GLF_QUEUED), "q" }) 44 {(1UL << GLF_QUEUED), "q" }, \
45 {(1UL << GLF_LRU), "L" }, \
46 {(1UL << GLF_OBJECT), "o" })
44 47
45#ifndef NUMPTY 48#ifndef NUMPTY
46#define NUMPTY 49#define NUMPTY
@@ -94,7 +97,7 @@ TRACE_EVENT(gfs2_glock_state_change,
94 __entry->new_state = glock_trace_state(new_state); 97 __entry->new_state = glock_trace_state(new_state);
95 __entry->tgt_state = glock_trace_state(gl->gl_target); 98 __entry->tgt_state = glock_trace_state(gl->gl_target);
96 __entry->dmt_state = glock_trace_state(gl->gl_demote_state); 99 __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
97 __entry->flags = gl->gl_flags; 100 __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
98 ), 101 ),
99 102
100 TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s", 103 TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s",
@@ -127,7 +130,7 @@ TRACE_EVENT(gfs2_glock_put,
127 __entry->gltype = gl->gl_name.ln_type; 130 __entry->gltype = gl->gl_name.ln_type;
128 __entry->glnum = gl->gl_name.ln_number; 131 __entry->glnum = gl->gl_name.ln_number;
129 __entry->cur_state = glock_trace_state(gl->gl_state); 132 __entry->cur_state = glock_trace_state(gl->gl_state);
130 __entry->flags = gl->gl_flags; 133 __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
131 ), 134 ),
132 135
133 TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s", 136 TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s",
@@ -161,7 +164,7 @@ TRACE_EVENT(gfs2_demote_rq,
161 __entry->glnum = gl->gl_name.ln_number; 164 __entry->glnum = gl->gl_name.ln_number;
162 __entry->cur_state = glock_trace_state(gl->gl_state); 165 __entry->cur_state = glock_trace_state(gl->gl_state);
163 __entry->dmt_state = glock_trace_state(gl->gl_demote_state); 166 __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
164 __entry->flags = gl->gl_flags; 167 __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
165 ), 168 ),
166 169
167 TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s", 170 TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s",
@@ -318,6 +321,33 @@ TRACE_EVENT(gfs2_log_blocks,
318 MINOR(__entry->dev), __entry->blocks) 321 MINOR(__entry->dev), __entry->blocks)
319); 322);
320 323
324/* Writing back the AIL */
325TRACE_EVENT(gfs2_ail_flush,
326
327 TP_PROTO(const struct gfs2_sbd *sdp, const struct writeback_control *wbc, int start),
328
329 TP_ARGS(sdp, wbc, start),
330
331 TP_STRUCT__entry(
332 __field( dev_t, dev )
333 __field( int, start )
334 __field( int, sync_mode )
335 __field( long, nr_to_write )
336 ),
337
338 TP_fast_assign(
339 __entry->dev = sdp->sd_vfs->s_dev;
340 __entry->start = start;
341 __entry->sync_mode = wbc->sync_mode;
342 __entry->nr_to_write = wbc->nr_to_write;
343 ),
344
345 TP_printk("%u,%u ail flush %s %s %ld", MAJOR(__entry->dev),
346 MINOR(__entry->dev), __entry->start ? "start" : "end",
347 __entry->sync_mode == WB_SYNC_ALL ? "all" : "none",
348 __entry->nr_to_write)
349);
350
321/* Section 3 - bmap 351/* Section 3 - bmap
322 * 352 *
323 * Objectives: 353 * Objectives:
diff --git a/fs/hpfs/Kconfig b/fs/hpfs/Kconfig
index 0c39dc3ef7d7..56bd15c5bf6c 100644
--- a/fs/hpfs/Kconfig
+++ b/fs/hpfs/Kconfig
@@ -1,7 +1,6 @@
1config HPFS_FS 1config HPFS_FS
2 tristate "OS/2 HPFS file system support" 2 tristate "OS/2 HPFS file system support"
3 depends on BLOCK 3 depends on BLOCK
4 depends on BROKEN || !PREEMPT
5 help 4 help
6 OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS 5 OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
7 is the file system used for organizing files on OS/2 hard disk 6 is the file system used for organizing files on OS/2 hard disk
diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c
index 5503e2c28910..7a5eb2c718c8 100644
--- a/fs/hpfs/alloc.c
+++ b/fs/hpfs/alloc.c
@@ -8,8 +8,6 @@
8 8
9#include "hpfs_fn.h" 9#include "hpfs_fn.h"
10 10
11static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec);
12
13/* 11/*
14 * Check if a sector is allocated in bitmap 12 * Check if a sector is allocated in bitmap
15 * This is really slow. Turned on only if chk==2 13 * This is really slow. Turned on only if chk==2
@@ -18,9 +16,9 @@ static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec);
18static int chk_if_allocated(struct super_block *s, secno sec, char *msg) 16static int chk_if_allocated(struct super_block *s, secno sec, char *msg)
19{ 17{
20 struct quad_buffer_head qbh; 18 struct quad_buffer_head qbh;
21 unsigned *bmp; 19 u32 *bmp;
22 if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "chk"))) goto fail; 20 if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "chk"))) goto fail;
23 if ((bmp[(sec & 0x3fff) >> 5] >> (sec & 0x1f)) & 1) { 21 if ((cpu_to_le32(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f)) & 1) {
24 hpfs_error(s, "sector '%s' - %08x not allocated in bitmap", msg, sec); 22 hpfs_error(s, "sector '%s' - %08x not allocated in bitmap", msg, sec);
25 goto fail1; 23 goto fail1;
26 } 24 }
@@ -28,7 +26,7 @@ static int chk_if_allocated(struct super_block *s, secno sec, char *msg)
28 if (sec >= hpfs_sb(s)->sb_dirband_start && sec < hpfs_sb(s)->sb_dirband_start + hpfs_sb(s)->sb_dirband_size) { 26 if (sec >= hpfs_sb(s)->sb_dirband_start && sec < hpfs_sb(s)->sb_dirband_start + hpfs_sb(s)->sb_dirband_size) {
29 unsigned ssec = (sec - hpfs_sb(s)->sb_dirband_start) / 4; 27 unsigned ssec = (sec - hpfs_sb(s)->sb_dirband_start) / 4;
30 if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) goto fail; 28 if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) goto fail;
31 if ((bmp[ssec >> 5] >> (ssec & 0x1f)) & 1) { 29 if ((le32_to_cpu(bmp[ssec >> 5]) >> (ssec & 0x1f)) & 1) {
32 hpfs_error(s, "sector '%s' - %08x not allocated in directory bitmap", msg, sec); 30 hpfs_error(s, "sector '%s' - %08x not allocated in directory bitmap", msg, sec);
33 goto fail1; 31 goto fail1;
34 } 32 }
@@ -75,7 +73,6 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
75 hpfs_error(s, "Bad allocation size: %d", n); 73 hpfs_error(s, "Bad allocation size: %d", n);
76 return 0; 74 return 0;
77 } 75 }
78 lock_super(s);
79 if (bs != ~0x3fff) { 76 if (bs != ~0x3fff) {
80 if (!(bmp = hpfs_map_bitmap(s, near >> 14, &qbh, "aib"))) goto uls; 77 if (!(bmp = hpfs_map_bitmap(s, near >> 14, &qbh, "aib"))) goto uls;
81 } else { 78 } else {
@@ -85,10 +82,6 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
85 ret = bs + nr; 82 ret = bs + nr;
86 goto rt; 83 goto rt;
87 } 84 }
88 /*if (!tstbits(bmp, nr + n, n + forward)) {
89 ret = bs + nr + n;
90 goto rt;
91 }*/
92 q = nr + n; b = 0; 85 q = nr + n; b = 0;
93 while ((a = tstbits(bmp, q, n + forward)) != 0) { 86 while ((a = tstbits(bmp, q, n + forward)) != 0) {
94 q += a; 87 q += a;
@@ -105,14 +98,14 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
105 goto rt; 98 goto rt;
106 } 99 }
107 nr >>= 5; 100 nr >>= 5;
108 /*for (i = nr + 1; i != nr; i++, i &= 0x1ff) {*/ 101 /*for (i = nr + 1; i != nr; i++, i &= 0x1ff) */
109 i = nr; 102 i = nr;
110 do { 103 do {
111 if (!bmp[i]) goto cont; 104 if (!le32_to_cpu(bmp[i])) goto cont;
112 if (n + forward >= 0x3f && bmp[i] != -1) goto cont; 105 if (n + forward >= 0x3f && le32_to_cpu(bmp[i]) != 0xffffffff) goto cont;
113 q = i<<5; 106 q = i<<5;
114 if (i > 0) { 107 if (i > 0) {
115 unsigned k = bmp[i-1]; 108 unsigned k = le32_to_cpu(bmp[i-1]);
116 while (k & 0x80000000) { 109 while (k & 0x80000000) {
117 q--; k <<= 1; 110 q--; k <<= 1;
118 } 111 }
@@ -132,18 +125,17 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
132 } while (i != nr); 125 } while (i != nr);
133 rt: 126 rt:
134 if (ret) { 127 if (ret) {
135 if (hpfs_sb(s)->sb_chk && ((ret >> 14) != (bs >> 14) || (bmp[(ret & 0x3fff) >> 5] | ~(((1 << n) - 1) << (ret & 0x1f))) != 0xffffffff)) { 128 if (hpfs_sb(s)->sb_chk && ((ret >> 14) != (bs >> 14) || (le32_to_cpu(bmp[(ret & 0x3fff) >> 5]) | ~(((1 << n) - 1) << (ret & 0x1f))) != 0xffffffff)) {
136 hpfs_error(s, "Allocation doesn't work! Wanted %d, allocated at %08x", n, ret); 129 hpfs_error(s, "Allocation doesn't work! Wanted %d, allocated at %08x", n, ret);
137 ret = 0; 130 ret = 0;
138 goto b; 131 goto b;
139 } 132 }
140 bmp[(ret & 0x3fff) >> 5] &= ~(((1 << n) - 1) << (ret & 0x1f)); 133 bmp[(ret & 0x3fff) >> 5] &= cpu_to_le32(~(((1 << n) - 1) << (ret & 0x1f)));
141 hpfs_mark_4buffers_dirty(&qbh); 134 hpfs_mark_4buffers_dirty(&qbh);
142 } 135 }
143 b: 136 b:
144 hpfs_brelse4(&qbh); 137 hpfs_brelse4(&qbh);
145 uls: 138 uls:
146 unlock_super(s);
147 return ret; 139 return ret;
148} 140}
149 141
@@ -155,7 +147,7 @@ static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigne
155 * sectors 147 * sectors
156 */ 148 */
157 149
158secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forward, int lock) 150secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forward)
159{ 151{
160 secno sec; 152 secno sec;
161 int i; 153 int i;
@@ -167,7 +159,6 @@ secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forwa
167 forward = -forward; 159 forward = -forward;
168 f_p = 1; 160 f_p = 1;
169 } 161 }
170 if (lock) hpfs_lock_creation(s);
171 n_bmps = (sbi->sb_fs_size + 0x4000 - 1) >> 14; 162 n_bmps = (sbi->sb_fs_size + 0x4000 - 1) >> 14;
172 if (near && near < sbi->sb_fs_size) { 163 if (near && near < sbi->sb_fs_size) {
173 if ((sec = alloc_in_bmp(s, near, n, f_p ? forward : forward/4))) goto ret; 164 if ((sec = alloc_in_bmp(s, near, n, f_p ? forward : forward/4))) goto ret;
@@ -214,18 +205,17 @@ secno hpfs_alloc_sector(struct super_block *s, secno near, unsigned n, int forwa
214 ret: 205 ret:
215 if (sec && f_p) { 206 if (sec && f_p) {
216 for (i = 0; i < forward; i++) { 207 for (i = 0; i < forward; i++) {
217 if (!hpfs_alloc_if_possible_nolock(s, sec + i + 1)) { 208 if (!hpfs_alloc_if_possible(s, sec + i + 1)) {
218 hpfs_error(s, "Prealloc doesn't work! Wanted %d, allocated at %08x, can't allocate %d", forward, sec, i); 209 hpfs_error(s, "Prealloc doesn't work! Wanted %d, allocated at %08x, can't allocate %d", forward, sec, i);
219 sec = 0; 210 sec = 0;
220 break; 211 break;
221 } 212 }
222 } 213 }
223 } 214 }
224 if (lock) hpfs_unlock_creation(s);
225 return sec; 215 return sec;
226} 216}
227 217
228static secno alloc_in_dirband(struct super_block *s, secno near, int lock) 218static secno alloc_in_dirband(struct super_block *s, secno near)
229{ 219{
230 unsigned nr = near; 220 unsigned nr = near;
231 secno sec; 221 secno sec;
@@ -236,49 +226,35 @@ static secno alloc_in_dirband(struct super_block *s, secno near, int lock)
236 nr = sbi->sb_dirband_start + sbi->sb_dirband_size - 4; 226 nr = sbi->sb_dirband_start + sbi->sb_dirband_size - 4;
237 nr -= sbi->sb_dirband_start; 227 nr -= sbi->sb_dirband_start;
238 nr >>= 2; 228 nr >>= 2;
239 if (lock) hpfs_lock_creation(s);
240 sec = alloc_in_bmp(s, (~0x3fff) | nr, 1, 0); 229 sec = alloc_in_bmp(s, (~0x3fff) | nr, 1, 0);
241 if (lock) hpfs_unlock_creation(s);
242 if (!sec) return 0; 230 if (!sec) return 0;
243 return ((sec & 0x3fff) << 2) + sbi->sb_dirband_start; 231 return ((sec & 0x3fff) << 2) + sbi->sb_dirband_start;
244} 232}
245 233
246/* Alloc sector if it's free */ 234/* Alloc sector if it's free */
247 235
248static int hpfs_alloc_if_possible_nolock(struct super_block *s, secno sec) 236int hpfs_alloc_if_possible(struct super_block *s, secno sec)
249{ 237{
250 struct quad_buffer_head qbh; 238 struct quad_buffer_head qbh;
251 unsigned *bmp; 239 u32 *bmp;
252 lock_super(s);
253 if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "aip"))) goto end; 240 if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "aip"))) goto end;
254 if (bmp[(sec & 0x3fff) >> 5] & (1 << (sec & 0x1f))) { 241 if (le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) & (1 << (sec & 0x1f))) {
255 bmp[(sec & 0x3fff) >> 5] &= ~(1 << (sec & 0x1f)); 242 bmp[(sec & 0x3fff) >> 5] &= cpu_to_le32(~(1 << (sec & 0x1f)));
256 hpfs_mark_4buffers_dirty(&qbh); 243 hpfs_mark_4buffers_dirty(&qbh);
257 hpfs_brelse4(&qbh); 244 hpfs_brelse4(&qbh);
258 unlock_super(s);
259 return 1; 245 return 1;
260 } 246 }
261 hpfs_brelse4(&qbh); 247 hpfs_brelse4(&qbh);
262 end: 248 end:
263 unlock_super(s);
264 return 0; 249 return 0;
265} 250}
266 251
267int hpfs_alloc_if_possible(struct super_block *s, secno sec)
268{
269 int r;
270 hpfs_lock_creation(s);
271 r = hpfs_alloc_if_possible_nolock(s, sec);
272 hpfs_unlock_creation(s);
273 return r;
274}
275
276/* Free sectors in bitmaps */ 252/* Free sectors in bitmaps */
277 253
278void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n) 254void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n)
279{ 255{
280 struct quad_buffer_head qbh; 256 struct quad_buffer_head qbh;
281 unsigned *bmp; 257 u32 *bmp;
282 struct hpfs_sb_info *sbi = hpfs_sb(s); 258 struct hpfs_sb_info *sbi = hpfs_sb(s);
283 /*printk("2 - ");*/ 259 /*printk("2 - ");*/
284 if (!n) return; 260 if (!n) return;
@@ -286,26 +262,22 @@ void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n)
286 hpfs_error(s, "Trying to free reserved sector %08x", sec); 262 hpfs_error(s, "Trying to free reserved sector %08x", sec);
287 return; 263 return;
288 } 264 }
289 lock_super(s);
290 sbi->sb_max_fwd_alloc += n > 0xffff ? 0xffff : n; 265 sbi->sb_max_fwd_alloc += n > 0xffff ? 0xffff : n;
291 if (sbi->sb_max_fwd_alloc > 0xffffff) sbi->sb_max_fwd_alloc = 0xffffff; 266 if (sbi->sb_max_fwd_alloc > 0xffffff) sbi->sb_max_fwd_alloc = 0xffffff;
292 new_map: 267 new_map:
293 if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "free"))) { 268 if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "free"))) {
294 unlock_super(s);
295 return; 269 return;
296 } 270 }
297 new_tst: 271 new_tst:
298 if ((bmp[(sec & 0x3fff) >> 5] >> (sec & 0x1f) & 1)) { 272 if ((le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f) & 1)) {
299 hpfs_error(s, "sector %08x not allocated", sec); 273 hpfs_error(s, "sector %08x not allocated", sec);
300 hpfs_brelse4(&qbh); 274 hpfs_brelse4(&qbh);
301 unlock_super(s);
302 return; 275 return;
303 } 276 }
304 bmp[(sec & 0x3fff) >> 5] |= 1 << (sec & 0x1f); 277 bmp[(sec & 0x3fff) >> 5] |= cpu_to_le32(1 << (sec & 0x1f));
305 if (!--n) { 278 if (!--n) {
306 hpfs_mark_4buffers_dirty(&qbh); 279 hpfs_mark_4buffers_dirty(&qbh);
307 hpfs_brelse4(&qbh); 280 hpfs_brelse4(&qbh);
308 unlock_super(s);
309 return; 281 return;
310 } 282 }
311 if (!(++sec & 0x3fff)) { 283 if (!(++sec & 0x3fff)) {
@@ -327,13 +299,13 @@ int hpfs_check_free_dnodes(struct super_block *s, int n)
327 int n_bmps = (hpfs_sb(s)->sb_fs_size + 0x4000 - 1) >> 14; 299 int n_bmps = (hpfs_sb(s)->sb_fs_size + 0x4000 - 1) >> 14;
328 int b = hpfs_sb(s)->sb_c_bitmap & 0x0fffffff; 300 int b = hpfs_sb(s)->sb_c_bitmap & 0x0fffffff;
329 int i, j; 301 int i, j;
330 unsigned *bmp; 302 u32 *bmp;
331 struct quad_buffer_head qbh; 303 struct quad_buffer_head qbh;
332 if ((bmp = hpfs_map_dnode_bitmap(s, &qbh))) { 304 if ((bmp = hpfs_map_dnode_bitmap(s, &qbh))) {
333 for (j = 0; j < 512; j++) { 305 for (j = 0; j < 512; j++) {
334 unsigned k; 306 unsigned k;
335 if (!bmp[j]) continue; 307 if (!le32_to_cpu(bmp[j])) continue;
336 for (k = bmp[j]; k; k >>= 1) if (k & 1) if (!--n) { 308 for (k = le32_to_cpu(bmp[j]); k; k >>= 1) if (k & 1) if (!--n) {
337 hpfs_brelse4(&qbh); 309 hpfs_brelse4(&qbh);
338 return 0; 310 return 0;
339 } 311 }
@@ -352,10 +324,10 @@ int hpfs_check_free_dnodes(struct super_block *s, int n)
352 chk_bmp: 324 chk_bmp:
353 if (bmp) { 325 if (bmp) {
354 for (j = 0; j < 512; j++) { 326 for (j = 0; j < 512; j++) {
355 unsigned k; 327 u32 k;
356 if (!bmp[j]) continue; 328 if (!le32_to_cpu(bmp[j])) continue;
357 for (k = 0xf; k; k <<= 4) 329 for (k = 0xf; k; k <<= 4)
358 if ((bmp[j] & k) == k) { 330 if ((le32_to_cpu(bmp[j]) & k) == k) {
359 if (!--n) { 331 if (!--n) {
360 hpfs_brelse4(&qbh); 332 hpfs_brelse4(&qbh);
361 return 0; 333 return 0;
@@ -379,44 +351,40 @@ void hpfs_free_dnode(struct super_block *s, dnode_secno dno)
379 hpfs_free_sectors(s, dno, 4); 351 hpfs_free_sectors(s, dno, 4);
380 } else { 352 } else {
381 struct quad_buffer_head qbh; 353 struct quad_buffer_head qbh;
382 unsigned *bmp; 354 u32 *bmp;
383 unsigned ssec = (dno - hpfs_sb(s)->sb_dirband_start) / 4; 355 unsigned ssec = (dno - hpfs_sb(s)->sb_dirband_start) / 4;
384 lock_super(s);
385 if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) { 356 if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) {
386 unlock_super(s);
387 return; 357 return;
388 } 358 }
389 bmp[ssec >> 5] |= 1 << (ssec & 0x1f); 359 bmp[ssec >> 5] |= cpu_to_le32(1 << (ssec & 0x1f));
390 hpfs_mark_4buffers_dirty(&qbh); 360 hpfs_mark_4buffers_dirty(&qbh);
391 hpfs_brelse4(&qbh); 361 hpfs_brelse4(&qbh);
392 unlock_super(s);
393 } 362 }
394} 363}
395 364
396struct dnode *hpfs_alloc_dnode(struct super_block *s, secno near, 365struct dnode *hpfs_alloc_dnode(struct super_block *s, secno near,
397 dnode_secno *dno, struct quad_buffer_head *qbh, 366 dnode_secno *dno, struct quad_buffer_head *qbh)
398 int lock)
399{ 367{
400 struct dnode *d; 368 struct dnode *d;
401 if (hpfs_count_one_bitmap(s, hpfs_sb(s)->sb_dmap) > FREE_DNODES_ADD) { 369 if (hpfs_count_one_bitmap(s, hpfs_sb(s)->sb_dmap) > FREE_DNODES_ADD) {
402 if (!(*dno = alloc_in_dirband(s, near, lock))) 370 if (!(*dno = alloc_in_dirband(s, near)))
403 if (!(*dno = hpfs_alloc_sector(s, near, 4, 0, lock))) return NULL; 371 if (!(*dno = hpfs_alloc_sector(s, near, 4, 0))) return NULL;
404 } else { 372 } else {
405 if (!(*dno = hpfs_alloc_sector(s, near, 4, 0, lock))) 373 if (!(*dno = hpfs_alloc_sector(s, near, 4, 0)))
406 if (!(*dno = alloc_in_dirband(s, near, lock))) return NULL; 374 if (!(*dno = alloc_in_dirband(s, near))) return NULL;
407 } 375 }
408 if (!(d = hpfs_get_4sectors(s, *dno, qbh))) { 376 if (!(d = hpfs_get_4sectors(s, *dno, qbh))) {
409 hpfs_free_dnode(s, *dno); 377 hpfs_free_dnode(s, *dno);
410 return NULL; 378 return NULL;
411 } 379 }
412 memset(d, 0, 2048); 380 memset(d, 0, 2048);
413 d->magic = DNODE_MAGIC; 381 d->magic = cpu_to_le32(DNODE_MAGIC);
414 d->first_free = 52; 382 d->first_free = cpu_to_le32(52);
415 d->dirent[0] = 32; 383 d->dirent[0] = 32;
416 d->dirent[2] = 8; 384 d->dirent[2] = 8;
417 d->dirent[30] = 1; 385 d->dirent[30] = 1;
418 d->dirent[31] = 255; 386 d->dirent[31] = 255;
419 d->self = *dno; 387 d->self = cpu_to_le32(*dno);
420 return d; 388 return d;
421} 389}
422 390
@@ -424,16 +392,16 @@ struct fnode *hpfs_alloc_fnode(struct super_block *s, secno near, fnode_secno *f
424 struct buffer_head **bh) 392 struct buffer_head **bh)
425{ 393{
426 struct fnode *f; 394 struct fnode *f;
427 if (!(*fno = hpfs_alloc_sector(s, near, 1, FNODE_ALLOC_FWD, 1))) return NULL; 395 if (!(*fno = hpfs_alloc_sector(s, near, 1, FNODE_ALLOC_FWD))) return NULL;
428 if (!(f = hpfs_get_sector(s, *fno, bh))) { 396 if (!(f = hpfs_get_sector(s, *fno, bh))) {
429 hpfs_free_sectors(s, *fno, 1); 397 hpfs_free_sectors(s, *fno, 1);
430 return NULL; 398 return NULL;
431 } 399 }
432 memset(f, 0, 512); 400 memset(f, 0, 512);
433 f->magic = FNODE_MAGIC; 401 f->magic = cpu_to_le32(FNODE_MAGIC);
434 f->ea_offs = 0xc4; 402 f->ea_offs = cpu_to_le16(0xc4);
435 f->btree.n_free_nodes = 8; 403 f->btree.n_free_nodes = 8;
436 f->btree.first_free = 8; 404 f->btree.first_free = cpu_to_le16(8);
437 return f; 405 return f;
438} 406}
439 407
@@ -441,16 +409,16 @@ struct anode *hpfs_alloc_anode(struct super_block *s, secno near, anode_secno *a
441 struct buffer_head **bh) 409 struct buffer_head **bh)
442{ 410{
443 struct anode *a; 411 struct anode *a;
444 if (!(*ano = hpfs_alloc_sector(s, near, 1, ANODE_ALLOC_FWD, 1))) return NULL; 412 if (!(*ano = hpfs_alloc_sector(s, near, 1, ANODE_ALLOC_FWD))) return NULL;
445 if (!(a = hpfs_get_sector(s, *ano, bh))) { 413 if (!(a = hpfs_get_sector(s, *ano, bh))) {
446 hpfs_free_sectors(s, *ano, 1); 414 hpfs_free_sectors(s, *ano, 1);
447 return NULL; 415 return NULL;
448 } 416 }
449 memset(a, 0, 512); 417 memset(a, 0, 512);
450 a->magic = ANODE_MAGIC; 418 a->magic = cpu_to_le32(ANODE_MAGIC);
451 a->self = *ano; 419 a->self = cpu_to_le32(*ano);
452 a->btree.n_free_nodes = 40; 420 a->btree.n_free_nodes = 40;
453 a->btree.n_used_nodes = 0; 421 a->btree.n_used_nodes = 0;
454 a->btree.first_free = 8; 422 a->btree.first_free = cpu_to_le16(8);
455 return a; 423 return a;
456} 424}
diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c
index 6a2f04bf3df0..08b503e8ed29 100644
--- a/fs/hpfs/anode.c
+++ b/fs/hpfs/anode.c
@@ -22,8 +22,8 @@ secno hpfs_bplus_lookup(struct super_block *s, struct inode *inode,
22 if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, a, &c1, &c2, "hpfs_bplus_lookup")) return -1; 22 if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, a, &c1, &c2, "hpfs_bplus_lookup")) return -1;
23 if (btree->internal) { 23 if (btree->internal) {
24 for (i = 0; i < btree->n_used_nodes; i++) 24 for (i = 0; i < btree->n_used_nodes; i++)
25 if (btree->u.internal[i].file_secno > sec) { 25 if (le32_to_cpu(btree->u.internal[i].file_secno) > sec) {
26 a = btree->u.internal[i].down; 26 a = le32_to_cpu(btree->u.internal[i].down);
27 brelse(bh); 27 brelse(bh);
28 if (!(anode = hpfs_map_anode(s, a, &bh))) return -1; 28 if (!(anode = hpfs_map_anode(s, a, &bh))) return -1;
29 btree = &anode->btree; 29 btree = &anode->btree;
@@ -34,18 +34,18 @@ secno hpfs_bplus_lookup(struct super_block *s, struct inode *inode,
34 return -1; 34 return -1;
35 } 35 }
36 for (i = 0; i < btree->n_used_nodes; i++) 36 for (i = 0; i < btree->n_used_nodes; i++)
37 if (btree->u.external[i].file_secno <= sec && 37 if (le32_to_cpu(btree->u.external[i].file_secno) <= sec &&
38 btree->u.external[i].file_secno + btree->u.external[i].length > sec) { 38 le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) > sec) {
39 a = btree->u.external[i].disk_secno + sec - btree->u.external[i].file_secno; 39 a = le32_to_cpu(btree->u.external[i].disk_secno) + sec - le32_to_cpu(btree->u.external[i].file_secno);
40 if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, a, 1, "data")) { 40 if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, a, 1, "data")) {
41 brelse(bh); 41 brelse(bh);
42 return -1; 42 return -1;
43 } 43 }
44 if (inode) { 44 if (inode) {
45 struct hpfs_inode_info *hpfs_inode = hpfs_i(inode); 45 struct hpfs_inode_info *hpfs_inode = hpfs_i(inode);
46 hpfs_inode->i_file_sec = btree->u.external[i].file_secno; 46 hpfs_inode->i_file_sec = le32_to_cpu(btree->u.external[i].file_secno);
47 hpfs_inode->i_disk_sec = btree->u.external[i].disk_secno; 47 hpfs_inode->i_disk_sec = le32_to_cpu(btree->u.external[i].disk_secno);
48 hpfs_inode->i_n_secs = btree->u.external[i].length; 48 hpfs_inode->i_n_secs = le32_to_cpu(btree->u.external[i].length);
49 } 49 }
50 brelse(bh); 50 brelse(bh);
51 return a; 51 return a;
@@ -83,8 +83,8 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
83 return -1; 83 return -1;
84 } 84 }
85 if (btree->internal) { 85 if (btree->internal) {
86 a = btree->u.internal[n].down; 86 a = le32_to_cpu(btree->u.internal[n].down);
87 btree->u.internal[n].file_secno = -1; 87 btree->u.internal[n].file_secno = cpu_to_le32(-1);
88 mark_buffer_dirty(bh); 88 mark_buffer_dirty(bh);
89 brelse(bh); 89 brelse(bh);
90 if (hpfs_sb(s)->sb_chk) 90 if (hpfs_sb(s)->sb_chk)
@@ -94,15 +94,15 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
94 goto go_down; 94 goto go_down;
95 } 95 }
96 if (n >= 0) { 96 if (n >= 0) {
97 if (btree->u.external[n].file_secno + btree->u.external[n].length != fsecno) { 97 if (le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length) != fsecno) {
98 hpfs_error(s, "allocated size %08x, trying to add sector %08x, %cnode %08x", 98 hpfs_error(s, "allocated size %08x, trying to add sector %08x, %cnode %08x",
99 btree->u.external[n].file_secno + btree->u.external[n].length, fsecno, 99 le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length), fsecno,
100 fnod?'f':'a', node); 100 fnod?'f':'a', node);
101 brelse(bh); 101 brelse(bh);
102 return -1; 102 return -1;
103 } 103 }
104 if (hpfs_alloc_if_possible(s, se = btree->u.external[n].disk_secno + btree->u.external[n].length)) { 104 if (hpfs_alloc_if_possible(s, se = le32_to_cpu(btree->u.external[n].disk_secno) + le32_to_cpu(btree->u.external[n].length))) {
105 btree->u.external[n].length++; 105 btree->u.external[n].length = cpu_to_le32(le32_to_cpu(btree->u.external[n].length) + 1);
106 mark_buffer_dirty(bh); 106 mark_buffer_dirty(bh);
107 brelse(bh); 107 brelse(bh);
108 return se; 108 return se;
@@ -115,20 +115,20 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
115 } 115 }
116 se = !fnod ? node : (node + 16384) & ~16383; 116 se = !fnod ? node : (node + 16384) & ~16383;
117 } 117 }
118 if (!(se = hpfs_alloc_sector(s, se, 1, fsecno*ALLOC_M>ALLOC_FWD_MAX ? ALLOC_FWD_MAX : fsecno*ALLOC_M<ALLOC_FWD_MIN ? ALLOC_FWD_MIN : fsecno*ALLOC_M, 1))) { 118 if (!(se = hpfs_alloc_sector(s, se, 1, fsecno*ALLOC_M>ALLOC_FWD_MAX ? ALLOC_FWD_MAX : fsecno*ALLOC_M<ALLOC_FWD_MIN ? ALLOC_FWD_MIN : fsecno*ALLOC_M))) {
119 brelse(bh); 119 brelse(bh);
120 return -1; 120 return -1;
121 } 121 }
122 fs = n < 0 ? 0 : btree->u.external[n].file_secno + btree->u.external[n].length; 122 fs = n < 0 ? 0 : le32_to_cpu(btree->u.external[n].file_secno) + le32_to_cpu(btree->u.external[n].length);
123 if (!btree->n_free_nodes) { 123 if (!btree->n_free_nodes) {
124 up = a != node ? anode->up : -1; 124 up = a != node ? le32_to_cpu(anode->up) : -1;
125 if (!(anode = hpfs_alloc_anode(s, a, &na, &bh1))) { 125 if (!(anode = hpfs_alloc_anode(s, a, &na, &bh1))) {
126 brelse(bh); 126 brelse(bh);
127 hpfs_free_sectors(s, se, 1); 127 hpfs_free_sectors(s, se, 1);
128 return -1; 128 return -1;
129 } 129 }
130 if (a == node && fnod) { 130 if (a == node && fnod) {
131 anode->up = node; 131 anode->up = cpu_to_le32(node);
132 anode->btree.fnode_parent = 1; 132 anode->btree.fnode_parent = 1;
133 anode->btree.n_used_nodes = btree->n_used_nodes; 133 anode->btree.n_used_nodes = btree->n_used_nodes;
134 anode->btree.first_free = btree->first_free; 134 anode->btree.first_free = btree->first_free;
@@ -137,9 +137,9 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
137 btree->internal = 1; 137 btree->internal = 1;
138 btree->n_free_nodes = 11; 138 btree->n_free_nodes = 11;
139 btree->n_used_nodes = 1; 139 btree->n_used_nodes = 1;
140 btree->first_free = (char *)&(btree->u.internal[1]) - (char *)btree; 140 btree->first_free = cpu_to_le16((char *)&(btree->u.internal[1]) - (char *)btree);
141 btree->u.internal[0].file_secno = -1; 141 btree->u.internal[0].file_secno = cpu_to_le32(-1);
142 btree->u.internal[0].down = na; 142 btree->u.internal[0].down = cpu_to_le32(na);
143 mark_buffer_dirty(bh); 143 mark_buffer_dirty(bh);
144 } else if (!(ranode = hpfs_alloc_anode(s, /*a*/0, &ra, &bh2))) { 144 } else if (!(ranode = hpfs_alloc_anode(s, /*a*/0, &ra, &bh2))) {
145 brelse(bh); 145 brelse(bh);
@@ -153,15 +153,15 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
153 btree = &anode->btree; 153 btree = &anode->btree;
154 } 154 }
155 btree->n_free_nodes--; n = btree->n_used_nodes++; 155 btree->n_free_nodes--; n = btree->n_used_nodes++;
156 btree->first_free += 12; 156 btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 12);
157 btree->u.external[n].disk_secno = se; 157 btree->u.external[n].disk_secno = cpu_to_le32(se);
158 btree->u.external[n].file_secno = fs; 158 btree->u.external[n].file_secno = cpu_to_le32(fs);
159 btree->u.external[n].length = 1; 159 btree->u.external[n].length = cpu_to_le32(1);
160 mark_buffer_dirty(bh); 160 mark_buffer_dirty(bh);
161 brelse(bh); 161 brelse(bh);
162 if ((a == node && fnod) || na == -1) return se; 162 if ((a == node && fnod) || na == -1) return se;
163 c2 = 0; 163 c2 = 0;
164 while (up != -1) { 164 while (up != (anode_secno)-1) {
165 struct anode *new_anode; 165 struct anode *new_anode;
166 if (hpfs_sb(s)->sb_chk) 166 if (hpfs_sb(s)->sb_chk)
167 if (hpfs_stop_cycles(s, up, &c1, &c2, "hpfs_add_sector_to_btree #2")) return -1; 167 if (hpfs_stop_cycles(s, up, &c1, &c2, "hpfs_add_sector_to_btree #2")) return -1;
@@ -174,47 +174,47 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
174 } 174 }
175 if (btree->n_free_nodes) { 175 if (btree->n_free_nodes) {
176 btree->n_free_nodes--; n = btree->n_used_nodes++; 176 btree->n_free_nodes--; n = btree->n_used_nodes++;
177 btree->first_free += 8; 177 btree->first_free = cpu_to_le16(le16_to_cpu(btree->first_free) + 8);
178 btree->u.internal[n].file_secno = -1; 178 btree->u.internal[n].file_secno = cpu_to_le32(-1);
179 btree->u.internal[n].down = na; 179 btree->u.internal[n].down = cpu_to_le32(na);
180 btree->u.internal[n-1].file_secno = fs; 180 btree->u.internal[n-1].file_secno = cpu_to_le32(fs);
181 mark_buffer_dirty(bh); 181 mark_buffer_dirty(bh);
182 brelse(bh); 182 brelse(bh);
183 brelse(bh2); 183 brelse(bh2);
184 hpfs_free_sectors(s, ra, 1); 184 hpfs_free_sectors(s, ra, 1);
185 if ((anode = hpfs_map_anode(s, na, &bh))) { 185 if ((anode = hpfs_map_anode(s, na, &bh))) {
186 anode->up = up; 186 anode->up = cpu_to_le32(up);
187 anode->btree.fnode_parent = up == node && fnod; 187 anode->btree.fnode_parent = up == node && fnod;
188 mark_buffer_dirty(bh); 188 mark_buffer_dirty(bh);
189 brelse(bh); 189 brelse(bh);
190 } 190 }
191 return se; 191 return se;
192 } 192 }
193 up = up != node ? anode->up : -1; 193 up = up != node ? le32_to_cpu(anode->up) : -1;
194 btree->u.internal[btree->n_used_nodes - 1].file_secno = /*fs*/-1; 194 btree->u.internal[btree->n_used_nodes - 1].file_secno = cpu_to_le32(/*fs*/-1);
195 mark_buffer_dirty(bh); 195 mark_buffer_dirty(bh);
196 brelse(bh); 196 brelse(bh);
197 a = na; 197 a = na;
198 if ((new_anode = hpfs_alloc_anode(s, a, &na, &bh))) { 198 if ((new_anode = hpfs_alloc_anode(s, a, &na, &bh))) {
199 anode = new_anode; 199 anode = new_anode;
200 /*anode->up = up != -1 ? up : ra;*/ 200 /*anode->up = cpu_to_le32(up != -1 ? up : ra);*/
201 anode->btree.internal = 1; 201 anode->btree.internal = 1;
202 anode->btree.n_used_nodes = 1; 202 anode->btree.n_used_nodes = 1;
203 anode->btree.n_free_nodes = 59; 203 anode->btree.n_free_nodes = 59;
204 anode->btree.first_free = 16; 204 anode->btree.first_free = cpu_to_le16(16);
205 anode->btree.u.internal[0].down = a; 205 anode->btree.u.internal[0].down = cpu_to_le32(a);
206 anode->btree.u.internal[0].file_secno = -1; 206 anode->btree.u.internal[0].file_secno = cpu_to_le32(-1);
207 mark_buffer_dirty(bh); 207 mark_buffer_dirty(bh);
208 brelse(bh); 208 brelse(bh);
209 if ((anode = hpfs_map_anode(s, a, &bh))) { 209 if ((anode = hpfs_map_anode(s, a, &bh))) {
210 anode->up = na; 210 anode->up = cpu_to_le32(na);
211 mark_buffer_dirty(bh); 211 mark_buffer_dirty(bh);
212 brelse(bh); 212 brelse(bh);
213 } 213 }
214 } else na = a; 214 } else na = a;
215 } 215 }
216 if ((anode = hpfs_map_anode(s, na, &bh))) { 216 if ((anode = hpfs_map_anode(s, na, &bh))) {
217 anode->up = node; 217 anode->up = cpu_to_le32(node);
218 if (fnod) anode->btree.fnode_parent = 1; 218 if (fnod) anode->btree.fnode_parent = 1;
219 mark_buffer_dirty(bh); 219 mark_buffer_dirty(bh);
220 brelse(bh); 220 brelse(bh);
@@ -232,14 +232,14 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
232 } 232 }
233 btree = &fnode->btree; 233 btree = &fnode->btree;
234 } 234 }
235 ranode->up = node; 235 ranode->up = cpu_to_le32(node);
236 memcpy(&ranode->btree, btree, btree->first_free); 236 memcpy(&ranode->btree, btree, le16_to_cpu(btree->first_free));
237 if (fnod) ranode->btree.fnode_parent = 1; 237 if (fnod) ranode->btree.fnode_parent = 1;
238 ranode->btree.n_free_nodes = (ranode->btree.internal ? 60 : 40) - ranode->btree.n_used_nodes; 238 ranode->btree.n_free_nodes = (ranode->btree.internal ? 60 : 40) - ranode->btree.n_used_nodes;
239 if (ranode->btree.internal) for (n = 0; n < ranode->btree.n_used_nodes; n++) { 239 if (ranode->btree.internal) for (n = 0; n < ranode->btree.n_used_nodes; n++) {
240 struct anode *unode; 240 struct anode *unode;
241 if ((unode = hpfs_map_anode(s, ranode->u.internal[n].down, &bh1))) { 241 if ((unode = hpfs_map_anode(s, le32_to_cpu(ranode->u.internal[n].down), &bh1))) {
242 unode->up = ra; 242 unode->up = cpu_to_le32(ra);
243 unode->btree.fnode_parent = 0; 243 unode->btree.fnode_parent = 0;
244 mark_buffer_dirty(bh1); 244 mark_buffer_dirty(bh1);
245 brelse(bh1); 245 brelse(bh1);
@@ -248,11 +248,11 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi
248 btree->internal = 1; 248 btree->internal = 1;
249 btree->n_free_nodes = fnod ? 10 : 58; 249 btree->n_free_nodes = fnod ? 10 : 58;
250 btree->n_used_nodes = 2; 250 btree->n_used_nodes = 2;
251 btree->first_free = (char *)&btree->u.internal[2] - (char *)btree; 251 btree->first_free = cpu_to_le16((char *)&btree->u.internal[2] - (char *)btree);
252 btree->u.internal[0].file_secno = fs; 252 btree->u.internal[0].file_secno = cpu_to_le32(fs);
253 btree->u.internal[0].down = ra; 253 btree->u.internal[0].down = cpu_to_le32(ra);
254 btree->u.internal[1].file_secno = -1; 254 btree->u.internal[1].file_secno = cpu_to_le32(-1);
255 btree->u.internal[1].down = na; 255 btree->u.internal[1].down = cpu_to_le32(na);
256 mark_buffer_dirty(bh); 256 mark_buffer_dirty(bh);
257 brelse(bh); 257 brelse(bh);
258 mark_buffer_dirty(bh2); 258 mark_buffer_dirty(bh2);
@@ -279,7 +279,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree)
279 go_down: 279 go_down:
280 d2 = 0; 280 d2 = 0;
281 while (btree1->internal) { 281 while (btree1->internal) {
282 ano = btree1->u.internal[pos].down; 282 ano = le32_to_cpu(btree1->u.internal[pos].down);
283 if (level) brelse(bh); 283 if (level) brelse(bh);
284 if (hpfs_sb(s)->sb_chk) 284 if (hpfs_sb(s)->sb_chk)
285 if (hpfs_stop_cycles(s, ano, &d1, &d2, "hpfs_remove_btree #1")) 285 if (hpfs_stop_cycles(s, ano, &d1, &d2, "hpfs_remove_btree #1"))
@@ -290,7 +290,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree)
290 pos = 0; 290 pos = 0;
291 } 291 }
292 for (i = 0; i < btree1->n_used_nodes; i++) 292 for (i = 0; i < btree1->n_used_nodes; i++)
293 hpfs_free_sectors(s, btree1->u.external[i].disk_secno, btree1->u.external[i].length); 293 hpfs_free_sectors(s, le32_to_cpu(btree1->u.external[i].disk_secno), le32_to_cpu(btree1->u.external[i].length));
294 go_up: 294 go_up:
295 if (!level) return; 295 if (!level) return;
296 brelse(bh); 296 brelse(bh);
@@ -298,13 +298,13 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree)
298 if (hpfs_stop_cycles(s, ano, &c1, &c2, "hpfs_remove_btree #2")) return; 298 if (hpfs_stop_cycles(s, ano, &c1, &c2, "hpfs_remove_btree #2")) return;
299 hpfs_free_sectors(s, ano, 1); 299 hpfs_free_sectors(s, ano, 1);
300 oano = ano; 300 oano = ano;
301 ano = anode->up; 301 ano = le32_to_cpu(anode->up);
302 if (--level) { 302 if (--level) {
303 if (!(anode = hpfs_map_anode(s, ano, &bh))) return; 303 if (!(anode = hpfs_map_anode(s, ano, &bh))) return;
304 btree1 = &anode->btree; 304 btree1 = &anode->btree;
305 } else btree1 = btree; 305 } else btree1 = btree;
306 for (i = 0; i < btree1->n_used_nodes; i++) { 306 for (i = 0; i < btree1->n_used_nodes; i++) {
307 if (btree1->u.internal[i].down == oano) { 307 if (le32_to_cpu(btree1->u.internal[i].down) == oano) {
308 if ((pos = i + 1) < btree1->n_used_nodes) 308 if ((pos = i + 1) < btree1->n_used_nodes)
309 goto go_down; 309 goto go_down;
310 else 310 else
@@ -411,7 +411,7 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs)
411 if (fno) { 411 if (fno) {
412 btree->n_free_nodes = 8; 412 btree->n_free_nodes = 8;
413 btree->n_used_nodes = 0; 413 btree->n_used_nodes = 0;
414 btree->first_free = 8; 414 btree->first_free = cpu_to_le16(8);
415 btree->internal = 0; 415 btree->internal = 0;
416 mark_buffer_dirty(bh); 416 mark_buffer_dirty(bh);
417 } else hpfs_free_sectors(s, f, 1); 417 } else hpfs_free_sectors(s, f, 1);
@@ -421,22 +421,22 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs)
421 while (btree->internal) { 421 while (btree->internal) {
422 nodes = btree->n_used_nodes + btree->n_free_nodes; 422 nodes = btree->n_used_nodes + btree->n_free_nodes;
423 for (i = 0; i < btree->n_used_nodes; i++) 423 for (i = 0; i < btree->n_used_nodes; i++)
424 if (btree->u.internal[i].file_secno >= secs) goto f; 424 if (le32_to_cpu(btree->u.internal[i].file_secno) >= secs) goto f;
425 brelse(bh); 425 brelse(bh);
426 hpfs_error(s, "internal btree %08x doesn't end with -1", node); 426 hpfs_error(s, "internal btree %08x doesn't end with -1", node);
427 return; 427 return;
428 f: 428 f:
429 for (j = i + 1; j < btree->n_used_nodes; j++) 429 for (j = i + 1; j < btree->n_used_nodes; j++)
430 hpfs_ea_remove(s, btree->u.internal[j].down, 1, 0); 430 hpfs_ea_remove(s, le32_to_cpu(btree->u.internal[j].down), 1, 0);
431 btree->n_used_nodes = i + 1; 431 btree->n_used_nodes = i + 1;
432 btree->n_free_nodes = nodes - btree->n_used_nodes; 432 btree->n_free_nodes = nodes - btree->n_used_nodes;
433 btree->first_free = 8 + 8 * btree->n_used_nodes; 433 btree->first_free = cpu_to_le16(8 + 8 * btree->n_used_nodes);
434 mark_buffer_dirty(bh); 434 mark_buffer_dirty(bh);
435 if (btree->u.internal[i].file_secno == secs) { 435 if (btree->u.internal[i].file_secno == cpu_to_le32(secs)) {
436 brelse(bh); 436 brelse(bh);
437 return; 437 return;
438 } 438 }
439 node = btree->u.internal[i].down; 439 node = le32_to_cpu(btree->u.internal[i].down);
440 brelse(bh); 440 brelse(bh);
441 if (hpfs_sb(s)->sb_chk) 441 if (hpfs_sb(s)->sb_chk)
442 if (hpfs_stop_cycles(s, node, &c1, &c2, "hpfs_truncate_btree")) 442 if (hpfs_stop_cycles(s, node, &c1, &c2, "hpfs_truncate_btree"))
@@ -446,25 +446,25 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs)
446 } 446 }
447 nodes = btree->n_used_nodes + btree->n_free_nodes; 447 nodes = btree->n_used_nodes + btree->n_free_nodes;
448 for (i = 0; i < btree->n_used_nodes; i++) 448 for (i = 0; i < btree->n_used_nodes; i++)
449 if (btree->u.external[i].file_secno + btree->u.external[i].length >= secs) goto ff; 449 if (le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) >= secs) goto ff;
450 brelse(bh); 450 brelse(bh);
451 return; 451 return;
452 ff: 452 ff:
453 if (secs <= btree->u.external[i].file_secno) { 453 if (secs <= le32_to_cpu(btree->u.external[i].file_secno)) {
454 hpfs_error(s, "there is an allocation error in file %08x, sector %08x", f, secs); 454 hpfs_error(s, "there is an allocation error in file %08x, sector %08x", f, secs);
455 if (i) i--; 455 if (i) i--;
456 } 456 }
457 else if (btree->u.external[i].file_secno + btree->u.external[i].length > secs) { 457 else if (le32_to_cpu(btree->u.external[i].file_secno) + le32_to_cpu(btree->u.external[i].length) > secs) {
458 hpfs_free_sectors(s, btree->u.external[i].disk_secno + secs - 458 hpfs_free_sectors(s, le32_to_cpu(btree->u.external[i].disk_secno) + secs -
459 btree->u.external[i].file_secno, btree->u.external[i].length 459 le32_to_cpu(btree->u.external[i].file_secno), le32_to_cpu(btree->u.external[i].length)
460 - secs + btree->u.external[i].file_secno); /* I hope gcc optimizes this :-) */ 460 - secs + le32_to_cpu(btree->u.external[i].file_secno)); /* I hope gcc optimizes this :-) */
461 btree->u.external[i].length = secs - btree->u.external[i].file_secno; 461 btree->u.external[i].length = cpu_to_le32(secs - le32_to_cpu(btree->u.external[i].file_secno));
462 } 462 }
463 for (j = i + 1; j < btree->n_used_nodes; j++) 463 for (j = i + 1; j < btree->n_used_nodes; j++)
464 hpfs_free_sectors(s, btree->u.external[j].disk_secno, btree->u.external[j].length); 464 hpfs_free_sectors(s, le32_to_cpu(btree->u.external[j].disk_secno), le32_to_cpu(btree->u.external[j].length));
465 btree->n_used_nodes = i + 1; 465 btree->n_used_nodes = i + 1;
466 btree->n_free_nodes = nodes - btree->n_used_nodes; 466 btree->n_free_nodes = nodes - btree->n_used_nodes;
467 btree->first_free = 8 + 12 * btree->n_used_nodes; 467 btree->first_free = cpu_to_le16(8 + 12 * btree->n_used_nodes);
468 mark_buffer_dirty(bh); 468 mark_buffer_dirty(bh);
469 brelse(bh); 469 brelse(bh);
470} 470}
@@ -480,12 +480,12 @@ void hpfs_remove_fnode(struct super_block *s, fnode_secno fno)
480 struct extended_attribute *ea_end; 480 struct extended_attribute *ea_end;
481 if (!(fnode = hpfs_map_fnode(s, fno, &bh))) return; 481 if (!(fnode = hpfs_map_fnode(s, fno, &bh))) return;
482 if (!fnode->dirflag) hpfs_remove_btree(s, &fnode->btree); 482 if (!fnode->dirflag) hpfs_remove_btree(s, &fnode->btree);
483 else hpfs_remove_dtree(s, fnode->u.external[0].disk_secno); 483 else hpfs_remove_dtree(s, le32_to_cpu(fnode->u.external[0].disk_secno));
484 ea_end = fnode_end_ea(fnode); 484 ea_end = fnode_end_ea(fnode);
485 for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) 485 for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea))
486 if (ea->indirect) 486 if (ea->indirect)
487 hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea)); 487 hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea));
488 hpfs_ea_ext_remove(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l); 488 hpfs_ea_ext_remove(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l));
489 brelse(bh); 489 brelse(bh);
490 hpfs_free_sectors(s, fno, 1); 490 hpfs_free_sectors(s, fno, 1);
491} 491}
diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c
index 793cb9d943d2..9ecde27d1e29 100644
--- a/fs/hpfs/buffer.c
+++ b/fs/hpfs/buffer.c
@@ -9,22 +9,6 @@
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include "hpfs_fn.h" 10#include "hpfs_fn.h"
11 11
12void hpfs_lock_creation(struct super_block *s)
13{
14#ifdef DEBUG_LOCKS
15 printk("lock creation\n");
16#endif
17 mutex_lock(&hpfs_sb(s)->hpfs_creation_de);
18}
19
20void hpfs_unlock_creation(struct super_block *s)
21{
22#ifdef DEBUG_LOCKS
23 printk("unlock creation\n");
24#endif
25 mutex_unlock(&hpfs_sb(s)->hpfs_creation_de);
26}
27
28/* Map a sector into a buffer and return pointers to it and to the buffer. */ 12/* Map a sector into a buffer and return pointers to it and to the buffer. */
29 13
30void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head **bhp, 14void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head **bhp,
@@ -32,6 +16,8 @@ void *hpfs_map_sector(struct super_block *s, unsigned secno, struct buffer_head
32{ 16{
33 struct buffer_head *bh; 17 struct buffer_head *bh;
34 18
19 hpfs_lock_assert(s);
20
35 cond_resched(); 21 cond_resched();
36 22
37 *bhp = bh = sb_bread(s, secno); 23 *bhp = bh = sb_bread(s, secno);
@@ -50,6 +36,8 @@ void *hpfs_get_sector(struct super_block *s, unsigned secno, struct buffer_head
50 struct buffer_head *bh; 36 struct buffer_head *bh;
51 /*return hpfs_map_sector(s, secno, bhp, 0);*/ 37 /*return hpfs_map_sector(s, secno, bhp, 0);*/
52 38
39 hpfs_lock_assert(s);
40
53 cond_resched(); 41 cond_resched();
54 42
55 if ((*bhp = bh = sb_getblk(s, secno)) != NULL) { 43 if ((*bhp = bh = sb_getblk(s, secno)) != NULL) {
@@ -70,6 +58,8 @@ void *hpfs_map_4sectors(struct super_block *s, unsigned secno, struct quad_buffe
70 struct buffer_head *bh; 58 struct buffer_head *bh;
71 char *data; 59 char *data;
72 60
61 hpfs_lock_assert(s);
62
73 cond_resched(); 63 cond_resched();
74 64
75 if (secno & 3) { 65 if (secno & 3) {
@@ -125,6 +115,8 @@ void *hpfs_get_4sectors(struct super_block *s, unsigned secno,
125{ 115{
126 cond_resched(); 116 cond_resched();
127 117
118 hpfs_lock_assert(s);
119
128 if (secno & 3) { 120 if (secno & 3) {
129 printk("HPFS: hpfs_get_4sectors: unaligned read\n"); 121 printk("HPFS: hpfs_get_4sectors: unaligned read\n");
130 return NULL; 122 return NULL;
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index b3d7c0ddb609..f46ae025bfb5 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -88,9 +88,9 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
88 hpfs_error(inode->i_sb, "not a directory, fnode %08lx", 88 hpfs_error(inode->i_sb, "not a directory, fnode %08lx",
89 (unsigned long)inode->i_ino); 89 (unsigned long)inode->i_ino);
90 } 90 }
91 if (hpfs_inode->i_dno != fno->u.external[0].disk_secno) { 91 if (hpfs_inode->i_dno != le32_to_cpu(fno->u.external[0].disk_secno)) {
92 e = 1; 92 e = 1;
93 hpfs_error(inode->i_sb, "corrupted inode: i_dno == %08x, fnode -> dnode == %08x", hpfs_inode->i_dno, fno->u.external[0].disk_secno); 93 hpfs_error(inode->i_sb, "corrupted inode: i_dno == %08x, fnode -> dnode == %08x", hpfs_inode->i_dno, le32_to_cpu(fno->u.external[0].disk_secno));
94 } 94 }
95 brelse(bh); 95 brelse(bh);
96 if (e) { 96 if (e) {
@@ -156,7 +156,7 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
156 goto again; 156 goto again;
157 } 157 }
158 tempname = hpfs_translate_name(inode->i_sb, de->name, de->namelen, lc, de->not_8x3); 158 tempname = hpfs_translate_name(inode->i_sb, de->name, de->namelen, lc, de->not_8x3);
159 if (filldir(dirent, tempname, de->namelen, old_pos, de->fnode, DT_UNKNOWN) < 0) { 159 if (filldir(dirent, tempname, de->namelen, old_pos, le32_to_cpu(de->fnode), DT_UNKNOWN) < 0) {
160 filp->f_pos = old_pos; 160 filp->f_pos = old_pos;
161 if (tempname != de->name) kfree(tempname); 161 if (tempname != de->name) kfree(tempname);
162 hpfs_brelse4(&qbh); 162 hpfs_brelse4(&qbh);
@@ -221,7 +221,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
221 * Get inode number, what we're after. 221 * Get inode number, what we're after.
222 */ 222 */
223 223
224 ino = de->fnode; 224 ino = le32_to_cpu(de->fnode);
225 225
226 /* 226 /*
227 * Go find or make an inode. 227 * Go find or make an inode.
@@ -236,7 +236,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
236 hpfs_init_inode(result); 236 hpfs_init_inode(result);
237 if (de->directory) 237 if (de->directory)
238 hpfs_read_inode(result); 238 hpfs_read_inode(result);
239 else if (de->ea_size && hpfs_sb(dir->i_sb)->sb_eas) 239 else if (le32_to_cpu(de->ea_size) && hpfs_sb(dir->i_sb)->sb_eas)
240 hpfs_read_inode(result); 240 hpfs_read_inode(result);
241 else { 241 else {
242 result->i_mode |= S_IFREG; 242 result->i_mode |= S_IFREG;
@@ -250,8 +250,6 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
250 hpfs_result = hpfs_i(result); 250 hpfs_result = hpfs_i(result);
251 if (!de->directory) hpfs_result->i_parent_dir = dir->i_ino; 251 if (!de->directory) hpfs_result->i_parent_dir = dir->i_ino;
252 252
253 hpfs_decide_conv(result, name, len);
254
255 if (de->has_acl || de->has_xtd_perm) if (!(dir->i_sb->s_flags & MS_RDONLY)) { 253 if (de->has_acl || de->has_xtd_perm) if (!(dir->i_sb->s_flags & MS_RDONLY)) {
256 hpfs_error(result->i_sb, "ACLs or XPERM found. This is probably HPFS386. This driver doesn't support it now. Send me some info on these structures"); 254 hpfs_error(result->i_sb, "ACLs or XPERM found. This is probably HPFS386. This driver doesn't support it now. Send me some info on these structures");
257 goto bail1; 255 goto bail1;
@@ -263,19 +261,19 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
263 */ 261 */
264 262
265 if (!result->i_ctime.tv_sec) { 263 if (!result->i_ctime.tv_sec) {
266 if (!(result->i_ctime.tv_sec = local_to_gmt(dir->i_sb, de->creation_date))) 264 if (!(result->i_ctime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->creation_date))))
267 result->i_ctime.tv_sec = 1; 265 result->i_ctime.tv_sec = 1;
268 result->i_ctime.tv_nsec = 0; 266 result->i_ctime.tv_nsec = 0;
269 result->i_mtime.tv_sec = local_to_gmt(dir->i_sb, de->write_date); 267 result->i_mtime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->write_date));
270 result->i_mtime.tv_nsec = 0; 268 result->i_mtime.tv_nsec = 0;
271 result->i_atime.tv_sec = local_to_gmt(dir->i_sb, de->read_date); 269 result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->read_date));
272 result->i_atime.tv_nsec = 0; 270 result->i_atime.tv_nsec = 0;
273 hpfs_result->i_ea_size = de->ea_size; 271 hpfs_result->i_ea_size = le32_to_cpu(de->ea_size);
274 if (!hpfs_result->i_ea_mode && de->read_only) 272 if (!hpfs_result->i_ea_mode && de->read_only)
275 result->i_mode &= ~0222; 273 result->i_mode &= ~0222;
276 if (!de->directory) { 274 if (!de->directory) {
277 if (result->i_size == -1) { 275 if (result->i_size == -1) {
278 result->i_size = de->file_size; 276 result->i_size = le32_to_cpu(de->file_size);
279 result->i_data.a_ops = &hpfs_aops; 277 result->i_data.a_ops = &hpfs_aops;
280 hpfs_i(result)->mmu_private = result->i_size; 278 hpfs_i(result)->mmu_private = result->i_size;
281 /* 279 /*
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index 9b2ffadfc8c4..1e0e2ac30fd3 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -14,11 +14,11 @@ static loff_t get_pos(struct dnode *d, struct hpfs_dirent *fde)
14 struct hpfs_dirent *de_end = dnode_end_de(d); 14 struct hpfs_dirent *de_end = dnode_end_de(d);
15 int i = 1; 15 int i = 1;
16 for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) { 16 for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) {
17 if (de == fde) return ((loff_t) d->self << 4) | (loff_t)i; 17 if (de == fde) return ((loff_t) le32_to_cpu(d->self) << 4) | (loff_t)i;
18 i++; 18 i++;
19 } 19 }
20 printk("HPFS: get_pos: not_found\n"); 20 printk("HPFS: get_pos: not_found\n");
21 return ((loff_t)d->self << 4) | (loff_t)1; 21 return ((loff_t)le32_to_cpu(d->self) << 4) | (loff_t)1;
22} 22}
23 23
24void hpfs_add_pos(struct inode *inode, loff_t *pos) 24void hpfs_add_pos(struct inode *inode, loff_t *pos)
@@ -130,29 +130,30 @@ static void set_last_pointer(struct super_block *s, struct dnode *d, dnode_secno
130{ 130{
131 struct hpfs_dirent *de; 131 struct hpfs_dirent *de;
132 if (!(de = dnode_last_de(d))) { 132 if (!(de = dnode_last_de(d))) {
133 hpfs_error(s, "set_last_pointer: empty dnode %08x", d->self); 133 hpfs_error(s, "set_last_pointer: empty dnode %08x", le32_to_cpu(d->self));
134 return; 134 return;
135 } 135 }
136 if (hpfs_sb(s)->sb_chk) { 136 if (hpfs_sb(s)->sb_chk) {
137 if (de->down) { 137 if (de->down) {
138 hpfs_error(s, "set_last_pointer: dnode %08x has already last pointer %08x", 138 hpfs_error(s, "set_last_pointer: dnode %08x has already last pointer %08x",
139 d->self, de_down_pointer(de)); 139 le32_to_cpu(d->self), de_down_pointer(de));
140 return; 140 return;
141 } 141 }
142 if (de->length != 32) { 142 if (le16_to_cpu(de->length) != 32) {
143 hpfs_error(s, "set_last_pointer: bad last dirent in dnode %08x", d->self); 143 hpfs_error(s, "set_last_pointer: bad last dirent in dnode %08x", le32_to_cpu(d->self));
144 return; 144 return;
145 } 145 }
146 } 146 }
147 if (ptr) { 147 if (ptr) {
148 if ((d->first_free += 4) > 2048) { 148 d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + 4);
149 hpfs_error(s,"set_last_pointer: too long dnode %08x", d->self); 149 if (le32_to_cpu(d->first_free) > 2048) {
150 d->first_free -= 4; 150 hpfs_error(s, "set_last_pointer: too long dnode %08x", le32_to_cpu(d->self));
151 d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) - 4);
151 return; 152 return;
152 } 153 }
153 de->length = 36; 154 de->length = cpu_to_le16(36);
154 de->down = 1; 155 de->down = 1;
155 *(dnode_secno *)((char *)de + 32) = ptr; 156 *(dnode_secno *)((char *)de + 32) = cpu_to_le32(ptr);
156 } 157 }
157} 158}
158 159
@@ -168,7 +169,7 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d,
168 for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) { 169 for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) {
169 int c = hpfs_compare_names(s, name, namelen, de->name, de->namelen, de->last); 170 int c = hpfs_compare_names(s, name, namelen, de->name, de->namelen, de->last);
170 if (!c) { 171 if (!c) {
171 hpfs_error(s, "name (%c,%d) already exists in dnode %08x", *name, namelen, d->self); 172 hpfs_error(s, "name (%c,%d) already exists in dnode %08x", *name, namelen, le32_to_cpu(d->self));
172 return NULL; 173 return NULL;
173 } 174 }
174 if (c < 0) break; 175 if (c < 0) break;
@@ -176,15 +177,14 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d,
176 memmove((char *)de + d_size, de, (char *)de_end - (char *)de); 177 memmove((char *)de + d_size, de, (char *)de_end - (char *)de);
177 memset(de, 0, d_size); 178 memset(de, 0, d_size);
178 if (down_ptr) { 179 if (down_ptr) {
179 *(int *)((char *)de + d_size - 4) = down_ptr; 180 *(dnode_secno *)((char *)de + d_size - 4) = cpu_to_le32(down_ptr);
180 de->down = 1; 181 de->down = 1;
181 } 182 }
182 de->length = d_size; 183 de->length = cpu_to_le16(d_size);
183 if (down_ptr) de->down = 1;
184 de->not_8x3 = hpfs_is_name_long(name, namelen); 184 de->not_8x3 = hpfs_is_name_long(name, namelen);
185 de->namelen = namelen; 185 de->namelen = namelen;
186 memcpy(de->name, name, namelen); 186 memcpy(de->name, name, namelen);
187 d->first_free += d_size; 187 d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) + d_size);
188 return de; 188 return de;
189} 189}
190 190
@@ -194,25 +194,25 @@ static void hpfs_delete_de(struct super_block *s, struct dnode *d,
194 struct hpfs_dirent *de) 194 struct hpfs_dirent *de)
195{ 195{
196 if (de->last) { 196 if (de->last) {
197 hpfs_error(s, "attempt to delete last dirent in dnode %08x", d->self); 197 hpfs_error(s, "attempt to delete last dirent in dnode %08x", le32_to_cpu(d->self));
198 return; 198 return;
199 } 199 }
200 d->first_free -= de->length; 200 d->first_free = cpu_to_le32(le32_to_cpu(d->first_free) - le16_to_cpu(de->length));
201 memmove(de, de_next_de(de), d->first_free + (char *)d - (char *)de); 201 memmove(de, de_next_de(de), le32_to_cpu(d->first_free) + (char *)d - (char *)de);
202} 202}
203 203
204static void fix_up_ptrs(struct super_block *s, struct dnode *d) 204static void fix_up_ptrs(struct super_block *s, struct dnode *d)
205{ 205{
206 struct hpfs_dirent *de; 206 struct hpfs_dirent *de;
207 struct hpfs_dirent *de_end = dnode_end_de(d); 207 struct hpfs_dirent *de_end = dnode_end_de(d);
208 dnode_secno dno = d->self; 208 dnode_secno dno = le32_to_cpu(d->self);
209 for (de = dnode_first_de(d); de < de_end; de = de_next_de(de)) 209 for (de = dnode_first_de(d); de < de_end; de = de_next_de(de))
210 if (de->down) { 210 if (de->down) {
211 struct quad_buffer_head qbh; 211 struct quad_buffer_head qbh;
212 struct dnode *dd; 212 struct dnode *dd;
213 if ((dd = hpfs_map_dnode(s, de_down_pointer(de), &qbh))) { 213 if ((dd = hpfs_map_dnode(s, de_down_pointer(de), &qbh))) {
214 if (dd->up != dno || dd->root_dnode) { 214 if (le32_to_cpu(dd->up) != dno || dd->root_dnode) {
215 dd->up = dno; 215 dd->up = cpu_to_le32(dno);
216 dd->root_dnode = 0; 216 dd->root_dnode = 0;
217 hpfs_mark_4buffers_dirty(&qbh); 217 hpfs_mark_4buffers_dirty(&qbh);
218 } 218 }
@@ -262,7 +262,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
262 kfree(nname); 262 kfree(nname);
263 return 1; 263 return 1;
264 } 264 }
265 if (d->first_free + de_size(namelen, down_ptr) <= 2048) { 265 if (le32_to_cpu(d->first_free) + de_size(namelen, down_ptr) <= 2048) {
266 loff_t t; 266 loff_t t;
267 copy_de(de=hpfs_add_de(i->i_sb, d, name, namelen, down_ptr), new_de); 267 copy_de(de=hpfs_add_de(i->i_sb, d, name, namelen, down_ptr), new_de);
268 t = get_pos(d, de); 268 t = get_pos(d, de);
@@ -286,11 +286,11 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
286 kfree(nname); 286 kfree(nname);
287 return 1; 287 return 1;
288 } 288 }
289 memcpy(nd, d, d->first_free); 289 memcpy(nd, d, le32_to_cpu(d->first_free));
290 copy_de(de = hpfs_add_de(i->i_sb, nd, name, namelen, down_ptr), new_de); 290 copy_de(de = hpfs_add_de(i->i_sb, nd, name, namelen, down_ptr), new_de);
291 for_all_poss(i, hpfs_pos_ins, get_pos(nd, de), 1); 291 for_all_poss(i, hpfs_pos_ins, get_pos(nd, de), 1);
292 h = ((char *)dnode_last_de(nd) - (char *)nd) / 2 + 10; 292 h = ((char *)dnode_last_de(nd) - (char *)nd) / 2 + 10;
293 if (!(ad = hpfs_alloc_dnode(i->i_sb, d->up, &adno, &qbh1, 0))) { 293 if (!(ad = hpfs_alloc_dnode(i->i_sb, le32_to_cpu(d->up), &adno, &qbh1))) {
294 hpfs_error(i->i_sb, "unable to alloc dnode - dnode tree will be corrupted"); 294 hpfs_error(i->i_sb, "unable to alloc dnode - dnode tree will be corrupted");
295 hpfs_brelse4(&qbh); 295 hpfs_brelse4(&qbh);
296 kfree(nd); 296 kfree(nd);
@@ -313,20 +313,21 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
313 down_ptr = adno; 313 down_ptr = adno;
314 set_last_pointer(i->i_sb, ad, de->down ? de_down_pointer(de) : 0); 314 set_last_pointer(i->i_sb, ad, de->down ? de_down_pointer(de) : 0);
315 de = de_next_de(de); 315 de = de_next_de(de);
316 memmove((char *)nd + 20, de, nd->first_free + (char *)nd - (char *)de); 316 memmove((char *)nd + 20, de, le32_to_cpu(nd->first_free) + (char *)nd - (char *)de);
317 nd->first_free -= (char *)de - (char *)nd - 20; 317 nd->first_free = cpu_to_le32(le32_to_cpu(nd->first_free) - ((char *)de - (char *)nd - 20));
318 memcpy(d, nd, nd->first_free); 318 memcpy(d, nd, le32_to_cpu(nd->first_free));
319 for_all_poss(i, hpfs_pos_del, (loff_t)dno << 4, pos); 319 for_all_poss(i, hpfs_pos_del, (loff_t)dno << 4, pos);
320 fix_up_ptrs(i->i_sb, ad); 320 fix_up_ptrs(i->i_sb, ad);
321 if (!d->root_dnode) { 321 if (!d->root_dnode) {
322 dno = ad->up = d->up; 322 ad->up = d->up;
323 dno = le32_to_cpu(ad->up);
323 hpfs_mark_4buffers_dirty(&qbh); 324 hpfs_mark_4buffers_dirty(&qbh);
324 hpfs_brelse4(&qbh); 325 hpfs_brelse4(&qbh);
325 hpfs_mark_4buffers_dirty(&qbh1); 326 hpfs_mark_4buffers_dirty(&qbh1);
326 hpfs_brelse4(&qbh1); 327 hpfs_brelse4(&qbh1);
327 goto go_up; 328 goto go_up;
328 } 329 }
329 if (!(rd = hpfs_alloc_dnode(i->i_sb, d->up, &rdno, &qbh2, 0))) { 330 if (!(rd = hpfs_alloc_dnode(i->i_sb, le32_to_cpu(d->up), &rdno, &qbh2))) {
330 hpfs_error(i->i_sb, "unable to alloc dnode - dnode tree will be corrupted"); 331 hpfs_error(i->i_sb, "unable to alloc dnode - dnode tree will be corrupted");
331 hpfs_brelse4(&qbh); 332 hpfs_brelse4(&qbh);
332 hpfs_brelse4(&qbh1); 333 hpfs_brelse4(&qbh1);
@@ -338,7 +339,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
338 i->i_blocks += 4; 339 i->i_blocks += 4;
339 rd->root_dnode = 1; 340 rd->root_dnode = 1;
340 rd->up = d->up; 341 rd->up = d->up;
341 if (!(fnode = hpfs_map_fnode(i->i_sb, d->up, &bh))) { 342 if (!(fnode = hpfs_map_fnode(i->i_sb, le32_to_cpu(d->up), &bh))) {
342 hpfs_free_dnode(i->i_sb, rdno); 343 hpfs_free_dnode(i->i_sb, rdno);
343 hpfs_brelse4(&qbh); 344 hpfs_brelse4(&qbh);
344 hpfs_brelse4(&qbh1); 345 hpfs_brelse4(&qbh1);
@@ -347,10 +348,11 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
347 kfree(nname); 348 kfree(nname);
348 return 1; 349 return 1;
349 } 350 }
350 fnode->u.external[0].disk_secno = rdno; 351 fnode->u.external[0].disk_secno = cpu_to_le32(rdno);
351 mark_buffer_dirty(bh); 352 mark_buffer_dirty(bh);
352 brelse(bh); 353 brelse(bh);
353 d->up = ad->up = hpfs_i(i)->i_dno = rdno; 354 hpfs_i(i)->i_dno = rdno;
355 d->up = ad->up = cpu_to_le32(rdno);
354 d->root_dnode = ad->root_dnode = 0; 356 d->root_dnode = ad->root_dnode = 0;
355 hpfs_mark_4buffers_dirty(&qbh); 357 hpfs_mark_4buffers_dirty(&qbh);
356 hpfs_brelse4(&qbh); 358 hpfs_brelse4(&qbh);
@@ -373,7 +375,7 @@ static int hpfs_add_to_dnode(struct inode *i, dnode_secno dno,
373 375
374int hpfs_add_dirent(struct inode *i, 376int hpfs_add_dirent(struct inode *i,
375 const unsigned char *name, unsigned namelen, 377 const unsigned char *name, unsigned namelen,
376 struct hpfs_dirent *new_de, int cdepth) 378 struct hpfs_dirent *new_de)
377{ 379{
378 struct hpfs_inode_info *hpfs_inode = hpfs_i(i); 380 struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
379 struct dnode *d; 381 struct dnode *d;
@@ -403,7 +405,6 @@ int hpfs_add_dirent(struct inode *i,
403 } 405 }
404 } 406 }
405 hpfs_brelse4(&qbh); 407 hpfs_brelse4(&qbh);
406 if (!cdepth) hpfs_lock_creation(i->i_sb);
407 if (hpfs_check_free_dnodes(i->i_sb, FREE_DNODES_ADD)) { 408 if (hpfs_check_free_dnodes(i->i_sb, FREE_DNODES_ADD)) {
408 c = 1; 409 c = 1;
409 goto ret; 410 goto ret;
@@ -411,7 +412,6 @@ int hpfs_add_dirent(struct inode *i,
411 i->i_version++; 412 i->i_version++;
412 c = hpfs_add_to_dnode(i, dno, name, namelen, new_de, 0); 413 c = hpfs_add_to_dnode(i, dno, name, namelen, new_de, 0);
413 ret: 414 ret:
414 if (!cdepth) hpfs_unlock_creation(i->i_sb);
415 return c; 415 return c;
416} 416}
417 417
@@ -437,9 +437,9 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
437 return 0; 437 return 0;
438 if (!(dnode = hpfs_map_dnode(i->i_sb, dno, &qbh))) return 0; 438 if (!(dnode = hpfs_map_dnode(i->i_sb, dno, &qbh))) return 0;
439 if (hpfs_sb(i->i_sb)->sb_chk) { 439 if (hpfs_sb(i->i_sb)->sb_chk) {
440 if (dnode->up != chk_up) { 440 if (le32_to_cpu(dnode->up) != chk_up) {
441 hpfs_error(i->i_sb, "move_to_top: up pointer from %08x should be %08x, is %08x", 441 hpfs_error(i->i_sb, "move_to_top: up pointer from %08x should be %08x, is %08x",
442 dno, chk_up, dnode->up); 442 dno, chk_up, le32_to_cpu(dnode->up));
443 hpfs_brelse4(&qbh); 443 hpfs_brelse4(&qbh);
444 return 0; 444 return 0;
445 } 445 }
@@ -455,7 +455,7 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
455 hpfs_brelse4(&qbh); 455 hpfs_brelse4(&qbh);
456 } 456 }
457 while (!(de = dnode_pre_last_de(dnode))) { 457 while (!(de = dnode_pre_last_de(dnode))) {
458 dnode_secno up = dnode->up; 458 dnode_secno up = le32_to_cpu(dnode->up);
459 hpfs_brelse4(&qbh); 459 hpfs_brelse4(&qbh);
460 hpfs_free_dnode(i->i_sb, dno); 460 hpfs_free_dnode(i->i_sb, dno);
461 i->i_size -= 2048; 461 i->i_size -= 2048;
@@ -474,8 +474,8 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
474 hpfs_brelse4(&qbh); 474 hpfs_brelse4(&qbh);
475 return 0; 475 return 0;
476 } 476 }
477 dnode->first_free -= 4; 477 dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4);
478 de->length -= 4; 478 de->length = cpu_to_le16(le16_to_cpu(de->length) - 4);
479 de->down = 0; 479 de->down = 0;
480 hpfs_mark_4buffers_dirty(&qbh); 480 hpfs_mark_4buffers_dirty(&qbh);
481 dno = up; 481 dno = up;
@@ -483,12 +483,12 @@ static secno move_to_top(struct inode *i, dnode_secno from, dnode_secno to)
483 t = get_pos(dnode, de); 483 t = get_pos(dnode, de);
484 for_all_poss(i, hpfs_pos_subst, t, 4); 484 for_all_poss(i, hpfs_pos_subst, t, 4);
485 for_all_poss(i, hpfs_pos_subst, t + 1, 5); 485 for_all_poss(i, hpfs_pos_subst, t + 1, 5);
486 if (!(nde = kmalloc(de->length, GFP_NOFS))) { 486 if (!(nde = kmalloc(le16_to_cpu(de->length), GFP_NOFS))) {
487 hpfs_error(i->i_sb, "out of memory for dirent - directory will be corrupted"); 487 hpfs_error(i->i_sb, "out of memory for dirent - directory will be corrupted");
488 hpfs_brelse4(&qbh); 488 hpfs_brelse4(&qbh);
489 return 0; 489 return 0;
490 } 490 }
491 memcpy(nde, de, de->length); 491 memcpy(nde, de, le16_to_cpu(de->length));
492 ddno = de->down ? de_down_pointer(de) : 0; 492 ddno = de->down ? de_down_pointer(de) : 0;
493 hpfs_delete_de(i->i_sb, dnode, de); 493 hpfs_delete_de(i->i_sb, dnode, de);
494 set_last_pointer(i->i_sb, dnode, ddno); 494 set_last_pointer(i->i_sb, dnode, ddno);
@@ -517,11 +517,11 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
517 try_it_again: 517 try_it_again:
518 if (hpfs_stop_cycles(i->i_sb, dno, &c1, &c2, "delete_empty_dnode")) return; 518 if (hpfs_stop_cycles(i->i_sb, dno, &c1, &c2, "delete_empty_dnode")) return;
519 if (!(dnode = hpfs_map_dnode(i->i_sb, dno, &qbh))) return; 519 if (!(dnode = hpfs_map_dnode(i->i_sb, dno, &qbh))) return;
520 if (dnode->first_free > 56) goto end; 520 if (le32_to_cpu(dnode->first_free) > 56) goto end;
521 if (dnode->first_free == 52 || dnode->first_free == 56) { 521 if (le32_to_cpu(dnode->first_free) == 52 || le32_to_cpu(dnode->first_free) == 56) {
522 struct hpfs_dirent *de_end; 522 struct hpfs_dirent *de_end;
523 int root = dnode->root_dnode; 523 int root = dnode->root_dnode;
524 up = dnode->up; 524 up = le32_to_cpu(dnode->up);
525 de = dnode_first_de(dnode); 525 de = dnode_first_de(dnode);
526 down = de->down ? de_down_pointer(de) : 0; 526 down = de->down ? de_down_pointer(de) : 0;
527 if (hpfs_sb(i->i_sb)->sb_chk) if (root && !down) { 527 if (hpfs_sb(i->i_sb)->sb_chk) if (root && !down) {
@@ -545,13 +545,13 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
545 return; 545 return;
546 } 546 }
547 if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) { 547 if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
548 d1->up = up; 548 d1->up = cpu_to_le32(up);
549 d1->root_dnode = 1; 549 d1->root_dnode = 1;
550 hpfs_mark_4buffers_dirty(&qbh1); 550 hpfs_mark_4buffers_dirty(&qbh1);
551 hpfs_brelse4(&qbh1); 551 hpfs_brelse4(&qbh1);
552 } 552 }
553 if ((fnode = hpfs_map_fnode(i->i_sb, up, &bh))) { 553 if ((fnode = hpfs_map_fnode(i->i_sb, up, &bh))) {
554 fnode->u.external[0].disk_secno = down; 554 fnode->u.external[0].disk_secno = cpu_to_le32(down);
555 mark_buffer_dirty(bh); 555 mark_buffer_dirty(bh);
556 brelse(bh); 556 brelse(bh);
557 } 557 }
@@ -570,22 +570,22 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
570 for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | 1, ((loff_t)up << 4) | p); 570 for_all_poss(i, hpfs_pos_subst, ((loff_t)dno << 4) | 1, ((loff_t)up << 4) | p);
571 if (!down) { 571 if (!down) {
572 de->down = 0; 572 de->down = 0;
573 de->length -= 4; 573 de->length = cpu_to_le16(le16_to_cpu(de->length) - 4);
574 dnode->first_free -= 4; 574 dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) - 4);
575 memmove(de_next_de(de), (char *)de_next_de(de) + 4, 575 memmove(de_next_de(de), (char *)de_next_de(de) + 4,
576 (char *)dnode + dnode->first_free - (char *)de_next_de(de)); 576 (char *)dnode + le32_to_cpu(dnode->first_free) - (char *)de_next_de(de));
577 } else { 577 } else {
578 struct dnode *d1; 578 struct dnode *d1;
579 struct quad_buffer_head qbh1; 579 struct quad_buffer_head qbh1;
580 *(dnode_secno *) ((void *) de + de->length - 4) = down; 580 *(dnode_secno *) ((void *) de + le16_to_cpu(de->length) - 4) = down;
581 if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) { 581 if ((d1 = hpfs_map_dnode(i->i_sb, down, &qbh1))) {
582 d1->up = up; 582 d1->up = cpu_to_le32(up);
583 hpfs_mark_4buffers_dirty(&qbh1); 583 hpfs_mark_4buffers_dirty(&qbh1);
584 hpfs_brelse4(&qbh1); 584 hpfs_brelse4(&qbh1);
585 } 585 }
586 } 586 }
587 } else { 587 } else {
588 hpfs_error(i->i_sb, "delete_empty_dnode: dnode %08x, first_free == %03x", dno, dnode->first_free); 588 hpfs_error(i->i_sb, "delete_empty_dnode: dnode %08x, first_free == %03x", dno, le32_to_cpu(dnode->first_free));
589 goto end; 589 goto end;
590 } 590 }
591 591
@@ -596,18 +596,18 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
596 struct quad_buffer_head qbh1; 596 struct quad_buffer_head qbh1;
597 if (!de_next->down) goto endm; 597 if (!de_next->down) goto endm;
598 ndown = de_down_pointer(de_next); 598 ndown = de_down_pointer(de_next);
599 if (!(de_cp = kmalloc(de->length, GFP_NOFS))) { 599 if (!(de_cp = kmalloc(le16_to_cpu(de->length), GFP_NOFS))) {
600 printk("HPFS: out of memory for dtree balancing\n"); 600 printk("HPFS: out of memory for dtree balancing\n");
601 goto endm; 601 goto endm;
602 } 602 }
603 memcpy(de_cp, de, de->length); 603 memcpy(de_cp, de, le16_to_cpu(de->length));
604 hpfs_delete_de(i->i_sb, dnode, de); 604 hpfs_delete_de(i->i_sb, dnode, de);
605 hpfs_mark_4buffers_dirty(&qbh); 605 hpfs_mark_4buffers_dirty(&qbh);
606 hpfs_brelse4(&qbh); 606 hpfs_brelse4(&qbh);
607 for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | p, 4); 607 for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | p, 4);
608 for_all_poss(i, hpfs_pos_del, ((loff_t)up << 4) | p, 1); 608 for_all_poss(i, hpfs_pos_del, ((loff_t)up << 4) | p, 1);
609 if (de_cp->down) if ((d1 = hpfs_map_dnode(i->i_sb, de_down_pointer(de_cp), &qbh1))) { 609 if (de_cp->down) if ((d1 = hpfs_map_dnode(i->i_sb, de_down_pointer(de_cp), &qbh1))) {
610 d1->up = ndown; 610 d1->up = cpu_to_le32(ndown);
611 hpfs_mark_4buffers_dirty(&qbh1); 611 hpfs_mark_4buffers_dirty(&qbh1);
612 hpfs_brelse4(&qbh1); 612 hpfs_brelse4(&qbh1);
613 } 613 }
@@ -635,7 +635,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
635 struct hpfs_dirent *del = dnode_last_de(d1); 635 struct hpfs_dirent *del = dnode_last_de(d1);
636 dlp = del->down ? de_down_pointer(del) : 0; 636 dlp = del->down ? de_down_pointer(del) : 0;
637 if (!dlp && down) { 637 if (!dlp && down) {
638 if (d1->first_free > 2044) { 638 if (le32_to_cpu(d1->first_free) > 2044) {
639 if (hpfs_sb(i->i_sb)->sb_chk >= 2) { 639 if (hpfs_sb(i->i_sb)->sb_chk >= 2) {
640 printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n"); 640 printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n");
641 printk("HPFS: warning: terminating balancing operation\n"); 641 printk("HPFS: warning: terminating balancing operation\n");
@@ -647,38 +647,38 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno)
647 printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n"); 647 printk("HPFS: warning: unbalanced dnode tree, see hpfs.txt 4 more info\n");
648 printk("HPFS: warning: goin'on\n"); 648 printk("HPFS: warning: goin'on\n");
649 } 649 }
650 del->length += 4; 650 del->length = cpu_to_le16(le16_to_cpu(del->length) + 4);
651 del->down = 1; 651 del->down = 1;
652 d1->first_free += 4; 652 d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) + 4);
653 } 653 }
654 if (dlp && !down) { 654 if (dlp && !down) {
655 del->length -= 4; 655 del->length = cpu_to_le16(le16_to_cpu(del->length) - 4);
656 del->down = 0; 656 del->down = 0;
657 d1->first_free -= 4; 657 d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) - 4);
658 } else if (down) 658 } else if (down)
659 *(dnode_secno *) ((void *) del + del->length - 4) = down; 659 *(dnode_secno *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down);
660 } else goto endm; 660 } else goto endm;
661 if (!(de_cp = kmalloc(de_prev->length, GFP_NOFS))) { 661 if (!(de_cp = kmalloc(le16_to_cpu(de_prev->length), GFP_NOFS))) {
662 printk("HPFS: out of memory for dtree balancing\n"); 662 printk("HPFS: out of memory for dtree balancing\n");
663 hpfs_brelse4(&qbh1); 663 hpfs_brelse4(&qbh1);
664 goto endm; 664 goto endm;
665 } 665 }
666 hpfs_mark_4buffers_dirty(&qbh1); 666 hpfs_mark_4buffers_dirty(&qbh1);
667 hpfs_brelse4(&qbh1); 667 hpfs_brelse4(&qbh1);
668 memcpy(de_cp, de_prev, de_prev->length); 668 memcpy(de_cp, de_prev, le16_to_cpu(de_prev->length));
669 hpfs_delete_de(i->i_sb, dnode, de_prev); 669 hpfs_delete_de(i->i_sb, dnode, de_prev);
670 if (!de_prev->down) { 670 if (!de_prev->down) {
671 de_prev->length += 4; 671 de_prev->length = cpu_to_le16(le16_to_cpu(de_prev->length) + 4);
672 de_prev->down = 1; 672 de_prev->down = 1;
673 dnode->first_free += 4; 673 dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) + 4);
674 } 674 }
675 *(dnode_secno *) ((void *) de_prev + de_prev->length - 4) = ndown; 675 *(dnode_secno *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown);
676 hpfs_mark_4buffers_dirty(&qbh); 676 hpfs_mark_4buffers_dirty(&qbh);
677 hpfs_brelse4(&qbh); 677 hpfs_brelse4(&qbh);
678 for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | (p - 1), 4); 678 for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | (p - 1), 4);
679 for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | p, ((loff_t)up << 4) | (p - 1)); 679 for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | p, ((loff_t)up << 4) | (p - 1));
680 if (down) if ((d1 = hpfs_map_dnode(i->i_sb, de_down_pointer(de), &qbh1))) { 680 if (down) if ((d1 = hpfs_map_dnode(i->i_sb, de_down_pointer(de), &qbh1))) {
681 d1->up = ndown; 681 d1->up = cpu_to_le32(ndown);
682 hpfs_mark_4buffers_dirty(&qbh1); 682 hpfs_mark_4buffers_dirty(&qbh1);
683 hpfs_brelse4(&qbh1); 683 hpfs_brelse4(&qbh1);
684 } 684 }
@@ -701,7 +701,6 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
701{ 701{
702 struct dnode *dnode = qbh->data; 702 struct dnode *dnode = qbh->data;
703 dnode_secno down = 0; 703 dnode_secno down = 0;
704 int lock = 0;
705 loff_t t; 704 loff_t t;
706 if (de->first || de->last) { 705 if (de->first || de->last) {
707 hpfs_error(i->i_sb, "hpfs_remove_dirent: attempt to delete first or last dirent in dnode %08x", dno); 706 hpfs_error(i->i_sb, "hpfs_remove_dirent: attempt to delete first or last dirent in dnode %08x", dno);
@@ -710,11 +709,8 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
710 } 709 }
711 if (de->down) down = de_down_pointer(de); 710 if (de->down) down = de_down_pointer(de);
712 if (depth && (de->down || (de == dnode_first_de(dnode) && de_next_de(de)->last))) { 711 if (depth && (de->down || (de == dnode_first_de(dnode) && de_next_de(de)->last))) {
713 lock = 1;
714 hpfs_lock_creation(i->i_sb);
715 if (hpfs_check_free_dnodes(i->i_sb, FREE_DNODES_DEL)) { 712 if (hpfs_check_free_dnodes(i->i_sb, FREE_DNODES_DEL)) {
716 hpfs_brelse4(qbh); 713 hpfs_brelse4(qbh);
717 hpfs_unlock_creation(i->i_sb);
718 return 2; 714 return 2;
719 } 715 }
720 } 716 }
@@ -727,11 +723,9 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
727 dnode_secno a = move_to_top(i, down, dno); 723 dnode_secno a = move_to_top(i, down, dno);
728 for_all_poss(i, hpfs_pos_subst, 5, t); 724 for_all_poss(i, hpfs_pos_subst, 5, t);
729 if (a) delete_empty_dnode(i, a); 725 if (a) delete_empty_dnode(i, a);
730 if (lock) hpfs_unlock_creation(i->i_sb);
731 return !a; 726 return !a;
732 } 727 }
733 delete_empty_dnode(i, dno); 728 delete_empty_dnode(i, dno);
734 if (lock) hpfs_unlock_creation(i->i_sb);
735 return 0; 729 return 0;
736} 730}
737 731
@@ -751,8 +745,8 @@ void hpfs_count_dnodes(struct super_block *s, dnode_secno dno, int *n_dnodes,
751 ptr = 0; 745 ptr = 0;
752 go_up: 746 go_up:
753 if (!(dnode = hpfs_map_dnode(s, dno, &qbh))) return; 747 if (!(dnode = hpfs_map_dnode(s, dno, &qbh))) return;
754 if (hpfs_sb(s)->sb_chk) if (odno && odno != -1 && dnode->up != odno) 748 if (hpfs_sb(s)->sb_chk) if (odno && odno != -1 && le32_to_cpu(dnode->up) != odno)
755 hpfs_error(s, "hpfs_count_dnodes: bad up pointer; dnode %08x, down %08x points to %08x", odno, dno, dnode->up); 749 hpfs_error(s, "hpfs_count_dnodes: bad up pointer; dnode %08x, down %08x points to %08x", odno, dno, le32_to_cpu(dnode->up));
756 de = dnode_first_de(dnode); 750 de = dnode_first_de(dnode);
757 if (ptr) while(1) { 751 if (ptr) while(1) {
758 if (de->down) if (de_down_pointer(de) == ptr) goto process_de; 752 if (de->down) if (de_down_pointer(de) == ptr) goto process_de;
@@ -776,7 +770,7 @@ void hpfs_count_dnodes(struct super_block *s, dnode_secno dno, int *n_dnodes,
776 if (!de->first && !de->last && n_items) (*n_items)++; 770 if (!de->first && !de->last && n_items) (*n_items)++;
777 if ((de = de_next_de(de)) < dnode_end_de(dnode)) goto next_de; 771 if ((de = de_next_de(de)) < dnode_end_de(dnode)) goto next_de;
778 ptr = dno; 772 ptr = dno;
779 dno = dnode->up; 773 dno = le32_to_cpu(dnode->up);
780 if (dnode->root_dnode) { 774 if (dnode->root_dnode) {
781 hpfs_brelse4(&qbh); 775 hpfs_brelse4(&qbh);
782 return; 776 return;
@@ -824,8 +818,8 @@ dnode_secno hpfs_de_as_down_as_possible(struct super_block *s, dnode_secno dno)
824 return d; 818 return d;
825 if (!(de = map_nth_dirent(s, d, 1, &qbh, NULL))) return dno; 819 if (!(de = map_nth_dirent(s, d, 1, &qbh, NULL))) return dno;
826 if (hpfs_sb(s)->sb_chk) 820 if (hpfs_sb(s)->sb_chk)
827 if (up && ((struct dnode *)qbh.data)->up != up) 821 if (up && le32_to_cpu(((struct dnode *)qbh.data)->up) != up)
828 hpfs_error(s, "hpfs_de_as_down_as_possible: bad up pointer; dnode %08x, down %08x points to %08x", up, d, ((struct dnode *)qbh.data)->up); 822 hpfs_error(s, "hpfs_de_as_down_as_possible: bad up pointer; dnode %08x, down %08x points to %08x", up, d, le32_to_cpu(((struct dnode *)qbh.data)->up));
829 if (!de->down) { 823 if (!de->down) {
830 hpfs_brelse4(&qbh); 824 hpfs_brelse4(&qbh);
831 return d; 825 return d;
@@ -874,7 +868,7 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp,
874 /* Going up */ 868 /* Going up */
875 if (dnode->root_dnode) goto bail; 869 if (dnode->root_dnode) goto bail;
876 870
877 if (!(up_dnode = hpfs_map_dnode(inode->i_sb, dnode->up, &qbh0))) 871 if (!(up_dnode = hpfs_map_dnode(inode->i_sb, le32_to_cpu(dnode->up), &qbh0)))
878 goto bail; 872 goto bail;
879 873
880 end_up_de = dnode_end_de(up_dnode); 874 end_up_de = dnode_end_de(up_dnode);
@@ -882,16 +876,16 @@ struct hpfs_dirent *map_pos_dirent(struct inode *inode, loff_t *posp,
882 for (up_de = dnode_first_de(up_dnode); up_de < end_up_de; 876 for (up_de = dnode_first_de(up_dnode); up_de < end_up_de;
883 up_de = de_next_de(up_de)) { 877 up_de = de_next_de(up_de)) {
884 if (!(++c & 077)) hpfs_error(inode->i_sb, 878 if (!(++c & 077)) hpfs_error(inode->i_sb,
885 "map_pos_dirent: pos crossed dnode boundary; dnode = %08x", dnode->up); 879 "map_pos_dirent: pos crossed dnode boundary; dnode = %08x", le32_to_cpu(dnode->up));
886 if (up_de->down && de_down_pointer(up_de) == dno) { 880 if (up_de->down && de_down_pointer(up_de) == dno) {
887 *posp = ((loff_t) dnode->up << 4) + c; 881 *posp = ((loff_t) le32_to_cpu(dnode->up) << 4) + c;
888 hpfs_brelse4(&qbh0); 882 hpfs_brelse4(&qbh0);
889 return de; 883 return de;
890 } 884 }
891 } 885 }
892 886
893 hpfs_error(inode->i_sb, "map_pos_dirent: pointer to dnode %08x not found in parent dnode %08x", 887 hpfs_error(inode->i_sb, "map_pos_dirent: pointer to dnode %08x not found in parent dnode %08x",
894 dno, dnode->up); 888 dno, le32_to_cpu(dnode->up));
895 hpfs_brelse4(&qbh0); 889 hpfs_brelse4(&qbh0);
896 890
897 bail: 891 bail:
@@ -1017,17 +1011,17 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
1017 /*name2[15] = 0xff;*/ 1011 /*name2[15] = 0xff;*/
1018 name1len = 15; name2len = 256; 1012 name1len = 15; name2len = 256;
1019 } 1013 }
1020 if (!(upf = hpfs_map_fnode(s, f->up, &bh))) { 1014 if (!(upf = hpfs_map_fnode(s, le32_to_cpu(f->up), &bh))) {
1021 kfree(name2); 1015 kfree(name2);
1022 return NULL; 1016 return NULL;
1023 } 1017 }
1024 if (!upf->dirflag) { 1018 if (!upf->dirflag) {
1025 brelse(bh); 1019 brelse(bh);
1026 hpfs_error(s, "fnode %08x has non-directory parent %08x", fno, f->up); 1020 hpfs_error(s, "fnode %08x has non-directory parent %08x", fno, le32_to_cpu(f->up));
1027 kfree(name2); 1021 kfree(name2);
1028 return NULL; 1022 return NULL;
1029 } 1023 }
1030 dno = upf->u.external[0].disk_secno; 1024 dno = le32_to_cpu(upf->u.external[0].disk_secno);
1031 brelse(bh); 1025 brelse(bh);
1032 go_down: 1026 go_down:
1033 downd = 0; 1027 downd = 0;
@@ -1049,7 +1043,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
1049 return NULL; 1043 return NULL;
1050 } 1044 }
1051 next_de: 1045 next_de:
1052 if (de->fnode == fno) { 1046 if (le32_to_cpu(de->fnode) == fno) {
1053 kfree(name2); 1047 kfree(name2);
1054 return de; 1048 return de;
1055 } 1049 }
@@ -1065,7 +1059,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
1065 goto go_down; 1059 goto go_down;
1066 } 1060 }
1067 f: 1061 f:
1068 if (de->fnode == fno) { 1062 if (le32_to_cpu(de->fnode) == fno) {
1069 kfree(name2); 1063 kfree(name2);
1070 return de; 1064 return de;
1071 } 1065 }
@@ -1074,7 +1068,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno,
1074 if ((de = de_next_de(de)) < de_end) goto next_de; 1068 if ((de = de_next_de(de)) < de_end) goto next_de;
1075 if (d->root_dnode) goto not_found; 1069 if (d->root_dnode) goto not_found;
1076 downd = dno; 1070 downd = dno;
1077 dno = d->up; 1071 dno = le32_to_cpu(d->up);
1078 hpfs_brelse4(qbh); 1072 hpfs_brelse4(qbh);
1079 if (hpfs_sb(s)->sb_chk) 1073 if (hpfs_sb(s)->sb_chk)
1080 if (hpfs_stop_cycles(s, downd, &d1, &d2, "map_fnode_dirent #2")) { 1074 if (hpfs_stop_cycles(s, downd, &d1, &d2, "map_fnode_dirent #2")) {
diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c
index 45e53d972b42..d8b84d113c89 100644
--- a/fs/hpfs/ea.c
+++ b/fs/hpfs/ea.c
@@ -24,7 +24,7 @@ void hpfs_ea_ext_remove(struct super_block *s, secno a, int ano, unsigned len)
24 } 24 }
25 if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return; 25 if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return;
26 if (ea->indirect) { 26 if (ea->indirect) {
27 if (ea->valuelen != 8) { 27 if (ea_valuelen(ea) != 8) {
28 hpfs_error(s, "ea->indirect set while ea->valuelen!=8, %s %08x, pos %08x", 28 hpfs_error(s, "ea->indirect set while ea->valuelen!=8, %s %08x, pos %08x",
29 ano ? "anode" : "sectors", a, pos); 29 ano ? "anode" : "sectors", a, pos);
30 return; 30 return;
@@ -33,7 +33,7 @@ void hpfs_ea_ext_remove(struct super_block *s, secno a, int ano, unsigned len)
33 return; 33 return;
34 hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea)); 34 hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea));
35 } 35 }
36 pos += ea->namelen + ea->valuelen + 5; 36 pos += ea->namelen + ea_valuelen(ea) + 5;
37 } 37 }
38 if (!ano) hpfs_free_sectors(s, a, (len+511) >> 9); 38 if (!ano) hpfs_free_sectors(s, a, (len+511) >> 9);
39 else { 39 else {
@@ -76,24 +76,24 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key,
76 unsigned pos; 76 unsigned pos;
77 int ano, len; 77 int ano, len;
78 secno a; 78 secno a;
79 char ex[4 + 255 + 1 + 8];
79 struct extended_attribute *ea; 80 struct extended_attribute *ea;
80 struct extended_attribute *ea_end = fnode_end_ea(fnode); 81 struct extended_attribute *ea_end = fnode_end_ea(fnode);
81 for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) 82 for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea))
82 if (!strcmp(ea->name, key)) { 83 if (!strcmp(ea->name, key)) {
83 if (ea->indirect) 84 if (ea->indirect)
84 goto indirect; 85 goto indirect;
85 if (ea->valuelen >= size) 86 if (ea_valuelen(ea) >= size)
86 return -EINVAL; 87 return -EINVAL;
87 memcpy(buf, ea_data(ea), ea->valuelen); 88 memcpy(buf, ea_data(ea), ea_valuelen(ea));
88 buf[ea->valuelen] = 0; 89 buf[ea_valuelen(ea)] = 0;
89 return 0; 90 return 0;
90 } 91 }
91 a = fnode->ea_secno; 92 a = le32_to_cpu(fnode->ea_secno);
92 len = fnode->ea_size_l; 93 len = le32_to_cpu(fnode->ea_size_l);
93 ano = fnode->ea_anode; 94 ano = fnode->ea_anode;
94 pos = 0; 95 pos = 0;
95 while (pos < len) { 96 while (pos < len) {
96 char ex[4 + 255 + 1 + 8];
97 ea = (struct extended_attribute *)ex; 97 ea = (struct extended_attribute *)ex;
98 if (pos + 4 > len) { 98 if (pos + 4 > len) {
99 hpfs_error(s, "EAs don't end correctly, %s %08x, len %08x", 99 hpfs_error(s, "EAs don't end correctly, %s %08x, len %08x",
@@ -106,14 +106,14 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key,
106 if (!strcmp(ea->name, key)) { 106 if (!strcmp(ea->name, key)) {
107 if (ea->indirect) 107 if (ea->indirect)
108 goto indirect; 108 goto indirect;
109 if (ea->valuelen >= size) 109 if (ea_valuelen(ea) >= size)
110 return -EINVAL; 110 return -EINVAL;
111 if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea->valuelen, buf)) 111 if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea_valuelen(ea), buf))
112 return -EIO; 112 return -EIO;
113 buf[ea->valuelen] = 0; 113 buf[ea_valuelen(ea)] = 0;
114 return 0; 114 return 0;
115 } 115 }
116 pos += ea->namelen + ea->valuelen + 5; 116 pos += ea->namelen + ea_valuelen(ea) + 5;
117 } 117 }
118 return -ENOENT; 118 return -ENOENT;
119indirect: 119indirect:
@@ -138,16 +138,16 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si
138 if (!strcmp(ea->name, key)) { 138 if (!strcmp(ea->name, key)) {
139 if (ea->indirect) 139 if (ea->indirect)
140 return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea)); 140 return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea));
141 if (!(ret = kmalloc((*size = ea->valuelen) + 1, GFP_NOFS))) { 141 if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) {
142 printk("HPFS: out of memory for EA\n"); 142 printk("HPFS: out of memory for EA\n");
143 return NULL; 143 return NULL;
144 } 144 }
145 memcpy(ret, ea_data(ea), ea->valuelen); 145 memcpy(ret, ea_data(ea), ea_valuelen(ea));
146 ret[ea->valuelen] = 0; 146 ret[ea_valuelen(ea)] = 0;
147 return ret; 147 return ret;
148 } 148 }
149 a = fnode->ea_secno; 149 a = le32_to_cpu(fnode->ea_secno);
150 len = fnode->ea_size_l; 150 len = le32_to_cpu(fnode->ea_size_l);
151 ano = fnode->ea_anode; 151 ano = fnode->ea_anode;
152 pos = 0; 152 pos = 0;
153 while (pos < len) { 153 while (pos < len) {
@@ -164,18 +164,18 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si
164 if (!strcmp(ea->name, key)) { 164 if (!strcmp(ea->name, key)) {
165 if (ea->indirect) 165 if (ea->indirect)
166 return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea)); 166 return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea));
167 if (!(ret = kmalloc((*size = ea->valuelen) + 1, GFP_NOFS))) { 167 if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) {
168 printk("HPFS: out of memory for EA\n"); 168 printk("HPFS: out of memory for EA\n");
169 return NULL; 169 return NULL;
170 } 170 }
171 if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea->valuelen, ret)) { 171 if (hpfs_ea_read(s, a, ano, pos + 4 + ea->namelen + 1, ea_valuelen(ea), ret)) {
172 kfree(ret); 172 kfree(ret);
173 return NULL; 173 return NULL;
174 } 174 }
175 ret[ea->valuelen] = 0; 175 ret[ea_valuelen(ea)] = 0;
176 return ret; 176 return ret;
177 } 177 }
178 pos += ea->namelen + ea->valuelen + 5; 178 pos += ea->namelen + ea_valuelen(ea) + 5;
179 } 179 }
180 return NULL; 180 return NULL;
181} 181}
@@ -202,13 +202,13 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
202 if (ea->indirect) { 202 if (ea->indirect) {
203 if (ea_len(ea) == size) 203 if (ea_len(ea) == size)
204 set_indirect_ea(s, ea->anode, ea_sec(ea), data, size); 204 set_indirect_ea(s, ea->anode, ea_sec(ea), data, size);
205 } else if (ea->valuelen == size) { 205 } else if (ea_valuelen(ea) == size) {
206 memcpy(ea_data(ea), data, size); 206 memcpy(ea_data(ea), data, size);
207 } 207 }
208 return; 208 return;
209 } 209 }
210 a = fnode->ea_secno; 210 a = le32_to_cpu(fnode->ea_secno);
211 len = fnode->ea_size_l; 211 len = le32_to_cpu(fnode->ea_size_l);
212 ano = fnode->ea_anode; 212 ano = fnode->ea_anode;
213 pos = 0; 213 pos = 0;
214 while (pos < len) { 214 while (pos < len) {
@@ -228,68 +228,70 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
228 set_indirect_ea(s, ea->anode, ea_sec(ea), data, size); 228 set_indirect_ea(s, ea->anode, ea_sec(ea), data, size);
229 } 229 }
230 else { 230 else {
231 if (ea->valuelen == size) 231 if (ea_valuelen(ea) == size)
232 hpfs_ea_write(s, a, ano, pos + 4 + ea->namelen + 1, size, data); 232 hpfs_ea_write(s, a, ano, pos + 4 + ea->namelen + 1, size, data);
233 } 233 }
234 return; 234 return;
235 } 235 }
236 pos += ea->namelen + ea->valuelen + 5; 236 pos += ea->namelen + ea_valuelen(ea) + 5;
237 } 237 }
238 if (!fnode->ea_offs) { 238 if (!le16_to_cpu(fnode->ea_offs)) {
239 /*if (fnode->ea_size_s) { 239 /*if (le16_to_cpu(fnode->ea_size_s)) {
240 hpfs_error(s, "fnode %08x: ea_size_s == %03x, ea_offs == 0", 240 hpfs_error(s, "fnode %08x: ea_size_s == %03x, ea_offs == 0",
241 inode->i_ino, fnode->ea_size_s); 241 inode->i_ino, le16_to_cpu(fnode->ea_size_s));
242 return; 242 return;
243 }*/ 243 }*/
244 fnode->ea_offs = 0xc4; 244 fnode->ea_offs = cpu_to_le16(0xc4);
245 } 245 }
246 if (fnode->ea_offs < 0xc4 || fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200) { 246 if (le16_to_cpu(fnode->ea_offs) < 0xc4 || le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200) {
247 hpfs_error(s, "fnode %08lx: ea_offs == %03x, ea_size_s == %03x", 247 hpfs_error(s, "fnode %08lx: ea_offs == %03x, ea_size_s == %03x",
248 (unsigned long)inode->i_ino, 248 (unsigned long)inode->i_ino,
249 fnode->ea_offs, fnode->ea_size_s); 249 le32_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s));
250 return; 250 return;
251 } 251 }
252 if ((fnode->ea_size_s || !fnode->ea_size_l) && 252 if ((le16_to_cpu(fnode->ea_size_s) || !le32_to_cpu(fnode->ea_size_l)) &&
253 fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s + strlen(key) + size + 5 <= 0x200) { 253 le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) + strlen(key) + size + 5 <= 0x200) {
254 ea = fnode_end_ea(fnode); 254 ea = fnode_end_ea(fnode);
255 *(char *)ea = 0; 255 *(char *)ea = 0;
256 ea->namelen = strlen(key); 256 ea->namelen = strlen(key);
257 ea->valuelen = size; 257 ea->valuelen_lo = size;
258 ea->valuelen_hi = size >> 8;
258 strcpy(ea->name, key); 259 strcpy(ea->name, key);
259 memcpy(ea_data(ea), data, size); 260 memcpy(ea_data(ea), data, size);
260 fnode->ea_size_s += strlen(key) + size + 5; 261 fnode->ea_size_s = cpu_to_le16(le16_to_cpu(fnode->ea_size_s) + strlen(key) + size + 5);
261 goto ret; 262 goto ret;
262 } 263 }
263 /* Most the code here is 99.9993422% unused. I hope there are no bugs. 264 /* Most the code here is 99.9993422% unused. I hope there are no bugs.
264 But what .. HPFS.IFS has also bugs in ea management. */ 265 But what .. HPFS.IFS has also bugs in ea management. */
265 if (fnode->ea_size_s && !fnode->ea_size_l) { 266 if (le16_to_cpu(fnode->ea_size_s) && !le32_to_cpu(fnode->ea_size_l)) {
266 secno n; 267 secno n;
267 struct buffer_head *bh; 268 struct buffer_head *bh;
268 char *data; 269 char *data;
269 if (!(n = hpfs_alloc_sector(s, fno, 1, 0, 1))) return; 270 if (!(n = hpfs_alloc_sector(s, fno, 1, 0))) return;
270 if (!(data = hpfs_get_sector(s, n, &bh))) { 271 if (!(data = hpfs_get_sector(s, n, &bh))) {
271 hpfs_free_sectors(s, n, 1); 272 hpfs_free_sectors(s, n, 1);
272 return; 273 return;
273 } 274 }
274 memcpy(data, fnode_ea(fnode), fnode->ea_size_s); 275 memcpy(data, fnode_ea(fnode), le16_to_cpu(fnode->ea_size_s));
275 fnode->ea_size_l = fnode->ea_size_s; 276 fnode->ea_size_l = cpu_to_le32(le16_to_cpu(fnode->ea_size_s));
276 fnode->ea_size_s = 0; 277 fnode->ea_size_s = cpu_to_le16(0);
277 fnode->ea_secno = n; 278 fnode->ea_secno = cpu_to_le32(n);
278 fnode->ea_anode = 0; 279 fnode->ea_anode = cpu_to_le32(0);
279 mark_buffer_dirty(bh); 280 mark_buffer_dirty(bh);
280 brelse(bh); 281 brelse(bh);
281 } 282 }
282 pos = fnode->ea_size_l + 5 + strlen(key) + size; 283 pos = le32_to_cpu(fnode->ea_size_l) + 5 + strlen(key) + size;
283 len = (fnode->ea_size_l + 511) >> 9; 284 len = (le32_to_cpu(fnode->ea_size_l) + 511) >> 9;
284 if (pos >= 30000) goto bail; 285 if (pos >= 30000) goto bail;
285 while (((pos + 511) >> 9) > len) { 286 while (((pos + 511) >> 9) > len) {
286 if (!len) { 287 if (!len) {
287 if (!(fnode->ea_secno = hpfs_alloc_sector(s, fno, 1, 0, 1))) 288 secno q = hpfs_alloc_sector(s, fno, 1, 0);
288 goto bail; 289 if (!q) goto bail;
290 fnode->ea_secno = cpu_to_le32(q);
289 fnode->ea_anode = 0; 291 fnode->ea_anode = 0;
290 len++; 292 len++;
291 } else if (!fnode->ea_anode) { 293 } else if (!fnode->ea_anode) {
292 if (hpfs_alloc_if_possible(s, fnode->ea_secno + len)) { 294 if (hpfs_alloc_if_possible(s, le32_to_cpu(fnode->ea_secno) + len)) {
293 len++; 295 len++;
294 } else { 296 } else {
295 /* Aargh... don't know how to create ea anodes :-( */ 297 /* Aargh... don't know how to create ea anodes :-( */
@@ -298,26 +300,26 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
298 anode_secno a_s; 300 anode_secno a_s;
299 if (!(anode = hpfs_alloc_anode(s, fno, &a_s, &bh))) 301 if (!(anode = hpfs_alloc_anode(s, fno, &a_s, &bh)))
300 goto bail; 302 goto bail;
301 anode->up = fno; 303 anode->up = cpu_to_le32(fno);
302 anode->btree.fnode_parent = 1; 304 anode->btree.fnode_parent = 1;
303 anode->btree.n_free_nodes--; 305 anode->btree.n_free_nodes--;
304 anode->btree.n_used_nodes++; 306 anode->btree.n_used_nodes++;
305 anode->btree.first_free += 12; 307 anode->btree.first_free = cpu_to_le16(le16_to_cpu(anode->btree.first_free) + 12);
306 anode->u.external[0].disk_secno = fnode->ea_secno; 308 anode->u.external[0].disk_secno = cpu_to_le32(le32_to_cpu(fnode->ea_secno));
307 anode->u.external[0].file_secno = 0; 309 anode->u.external[0].file_secno = cpu_to_le32(0);
308 anode->u.external[0].length = len; 310 anode->u.external[0].length = cpu_to_le32(len);
309 mark_buffer_dirty(bh); 311 mark_buffer_dirty(bh);
310 brelse(bh); 312 brelse(bh);
311 fnode->ea_anode = 1; 313 fnode->ea_anode = 1;
312 fnode->ea_secno = a_s;*/ 314 fnode->ea_secno = cpu_to_le32(a_s);*/
313 secno new_sec; 315 secno new_sec;
314 int i; 316 int i;
315 if (!(new_sec = hpfs_alloc_sector(s, fno, 1, 1 - ((pos + 511) >> 9), 1))) 317 if (!(new_sec = hpfs_alloc_sector(s, fno, 1, 1 - ((pos + 511) >> 9))))
316 goto bail; 318 goto bail;
317 for (i = 0; i < len; i++) { 319 for (i = 0; i < len; i++) {
318 struct buffer_head *bh1, *bh2; 320 struct buffer_head *bh1, *bh2;
319 void *b1, *b2; 321 void *b1, *b2;
320 if (!(b1 = hpfs_map_sector(s, fnode->ea_secno + i, &bh1, len - i - 1))) { 322 if (!(b1 = hpfs_map_sector(s, le32_to_cpu(fnode->ea_secno) + i, &bh1, len - i - 1))) {
321 hpfs_free_sectors(s, new_sec, (pos + 511) >> 9); 323 hpfs_free_sectors(s, new_sec, (pos + 511) >> 9);
322 goto bail; 324 goto bail;
323 } 325 }
@@ -331,13 +333,13 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
331 mark_buffer_dirty(bh2); 333 mark_buffer_dirty(bh2);
332 brelse(bh2); 334 brelse(bh2);
333 } 335 }
334 hpfs_free_sectors(s, fnode->ea_secno, len); 336 hpfs_free_sectors(s, le32_to_cpu(fnode->ea_secno), len);
335 fnode->ea_secno = new_sec; 337 fnode->ea_secno = cpu_to_le32(new_sec);
336 len = (pos + 511) >> 9; 338 len = (pos + 511) >> 9;
337 } 339 }
338 } 340 }
339 if (fnode->ea_anode) { 341 if (fnode->ea_anode) {
340 if (hpfs_add_sector_to_btree(s, fnode->ea_secno, 342 if (hpfs_add_sector_to_btree(s, le32_to_cpu(fnode->ea_secno),
341 0, len) != -1) { 343 0, len) != -1) {
342 len++; 344 len++;
343 } else { 345 } else {
@@ -349,17 +351,17 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key,
349 h[1] = strlen(key); 351 h[1] = strlen(key);
350 h[2] = size & 0xff; 352 h[2] = size & 0xff;
351 h[3] = size >> 8; 353 h[3] = size >> 8;
352 if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l, 4, h)) goto bail; 354 if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l), 4, h)) goto bail;
353 if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l + 4, h[1] + 1, key)) goto bail; 355 if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 4, h[1] + 1, key)) goto bail;
354 if (hpfs_ea_write(s, fnode->ea_secno, fnode->ea_anode, fnode->ea_size_l + 5 + h[1], size, data)) goto bail; 356 if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 5 + h[1], size, data)) goto bail;
355 fnode->ea_size_l = pos; 357 fnode->ea_size_l = cpu_to_le32(pos);
356 ret: 358 ret:
357 hpfs_i(inode)->i_ea_size += 5 + strlen(key) + size; 359 hpfs_i(inode)->i_ea_size += 5 + strlen(key) + size;
358 return; 360 return;
359 bail: 361 bail:
360 if (fnode->ea_secno) 362 if (le32_to_cpu(fnode->ea_secno))
361 if (fnode->ea_anode) hpfs_truncate_btree(s, fnode->ea_secno, 1, (fnode->ea_size_l + 511) >> 9); 363 if (fnode->ea_anode) hpfs_truncate_btree(s, le32_to_cpu(fnode->ea_secno), 1, (le32_to_cpu(fnode->ea_size_l) + 511) >> 9);
362 else hpfs_free_sectors(s, fnode->ea_secno + ((fnode->ea_size_l + 511) >> 9), len - ((fnode->ea_size_l + 511) >> 9)); 364 else hpfs_free_sectors(s, le32_to_cpu(fnode->ea_secno) + ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9), len - ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9));
363 else fnode->ea_secno = fnode->ea_size_l = 0; 365 else fnode->ea_secno = fnode->ea_size_l = cpu_to_le32(0);
364} 366}
365 367
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 9b9eb6933e43..89c500ee5213 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -20,8 +20,8 @@ static int hpfs_file_release(struct inode *inode, struct file *file)
20 20
21int hpfs_file_fsync(struct file *file, int datasync) 21int hpfs_file_fsync(struct file *file, int datasync)
22{ 22{
23 /*return file_fsync(file, datasync);*/ 23 struct inode *inode = file->f_mapping->host;
24 return 0; /* Don't fsync :-) */ 24 return sync_blockdev(inode->i_sb->s_bdev);
25} 25}
26 26
27/* 27/*
@@ -48,38 +48,46 @@ static secno hpfs_bmap(struct inode *inode, unsigned file_secno)
48static void hpfs_truncate(struct inode *i) 48static void hpfs_truncate(struct inode *i)
49{ 49{
50 if (IS_IMMUTABLE(i)) return /*-EPERM*/; 50 if (IS_IMMUTABLE(i)) return /*-EPERM*/;
51 hpfs_lock(i->i_sb); 51 hpfs_lock_assert(i->i_sb);
52
52 hpfs_i(i)->i_n_secs = 0; 53 hpfs_i(i)->i_n_secs = 0;
53 i->i_blocks = 1 + ((i->i_size + 511) >> 9); 54 i->i_blocks = 1 + ((i->i_size + 511) >> 9);
54 hpfs_i(i)->mmu_private = i->i_size; 55 hpfs_i(i)->mmu_private = i->i_size;
55 hpfs_truncate_btree(i->i_sb, i->i_ino, 1, ((i->i_size + 511) >> 9)); 56 hpfs_truncate_btree(i->i_sb, i->i_ino, 1, ((i->i_size + 511) >> 9));
56 hpfs_write_inode(i); 57 hpfs_write_inode(i);
57 hpfs_i(i)->i_n_secs = 0; 58 hpfs_i(i)->i_n_secs = 0;
58 hpfs_unlock(i->i_sb);
59} 59}
60 60
61static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) 61static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
62{ 62{
63 int r;
63 secno s; 64 secno s;
65 hpfs_lock(inode->i_sb);
64 s = hpfs_bmap(inode, iblock); 66 s = hpfs_bmap(inode, iblock);
65 if (s) { 67 if (s) {
66 map_bh(bh_result, inode->i_sb, s); 68 map_bh(bh_result, inode->i_sb, s);
67 return 0; 69 goto ret_0;
68 } 70 }
69 if (!create) return 0; 71 if (!create) goto ret_0;
70 if (iblock<<9 != hpfs_i(inode)->mmu_private) { 72 if (iblock<<9 != hpfs_i(inode)->mmu_private) {
71 BUG(); 73 BUG();
72 return -EIO; 74 r = -EIO;
75 goto ret_r;
73 } 76 }
74 if ((s = hpfs_add_sector_to_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1)) == -1) { 77 if ((s = hpfs_add_sector_to_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1)) == -1) {
75 hpfs_truncate_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1); 78 hpfs_truncate_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1);
76 return -ENOSPC; 79 r = -ENOSPC;
80 goto ret_r;
77 } 81 }
78 inode->i_blocks++; 82 inode->i_blocks++;
79 hpfs_i(inode)->mmu_private += 512; 83 hpfs_i(inode)->mmu_private += 512;
80 set_buffer_new(bh_result); 84 set_buffer_new(bh_result);
81 map_bh(bh_result, inode->i_sb, s); 85 map_bh(bh_result, inode->i_sb, s);
82 return 0; 86 ret_0:
87 r = 0;
88 ret_r:
89 hpfs_unlock(inode->i_sb);
90 return r;
83} 91}
84 92
85static int hpfs_writepage(struct page *page, struct writeback_control *wbc) 93static int hpfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -130,8 +138,11 @@ static ssize_t hpfs_file_write(struct file *file, const char __user *buf,
130 ssize_t retval; 138 ssize_t retval;
131 139
132 retval = do_sync_write(file, buf, count, ppos); 140 retval = do_sync_write(file, buf, count, ppos);
133 if (retval > 0) 141 if (retval > 0) {
142 hpfs_lock(file->f_path.dentry->d_sb);
134 hpfs_i(file->f_path.dentry->d_inode)->i_dirty = 1; 143 hpfs_i(file->f_path.dentry->d_inode)->i_dirty = 1;
144 hpfs_unlock(file->f_path.dentry->d_sb);
145 }
135 return retval; 146 return retval;
136} 147}
137 148
diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h
index 0e84c73cd9c4..8b0650aae328 100644
--- a/fs/hpfs/hpfs.h
+++ b/fs/hpfs/hpfs.h
@@ -19,9 +19,13 @@
19 For definitive information on HPFS, ask somebody else -- this is guesswork. 19 For definitive information on HPFS, ask somebody else -- this is guesswork.
20 There are certain to be many mistakes. */ 20 There are certain to be many mistakes. */
21 21
22#if !defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN)
23#error unknown endian
24#endif
25
22/* Notation */ 26/* Notation */
23 27
24typedef unsigned secno; /* sector number, partition relative */ 28typedef u32 secno; /* sector number, partition relative */
25 29
26typedef secno dnode_secno; /* sector number of a dnode */ 30typedef secno dnode_secno; /* sector number of a dnode */
27typedef secno fnode_secno; /* sector number of an fnode */ 31typedef secno fnode_secno; /* sector number of an fnode */
@@ -38,28 +42,28 @@ typedef u32 time32_t; /* 32-bit time_t type */
38 42
39struct hpfs_boot_block 43struct hpfs_boot_block
40{ 44{
41 unsigned char jmp[3]; 45 u8 jmp[3];
42 unsigned char oem_id[8]; 46 u8 oem_id[8];
43 unsigned char bytes_per_sector[2]; /* 512 */ 47 u8 bytes_per_sector[2]; /* 512 */
44 unsigned char sectors_per_cluster; 48 u8 sectors_per_cluster;
45 unsigned char n_reserved_sectors[2]; 49 u8 n_reserved_sectors[2];
46 unsigned char n_fats; 50 u8 n_fats;
47 unsigned char n_rootdir_entries[2]; 51 u8 n_rootdir_entries[2];
48 unsigned char n_sectors_s[2]; 52 u8 n_sectors_s[2];
49 unsigned char media_byte; 53 u8 media_byte;
50 unsigned short sectors_per_fat; 54 u16 sectors_per_fat;
51 unsigned short sectors_per_track; 55 u16 sectors_per_track;
52 unsigned short heads_per_cyl; 56 u16 heads_per_cyl;
53 unsigned int n_hidden_sectors; 57 u32 n_hidden_sectors;
54 unsigned int n_sectors_l; /* size of partition */ 58 u32 n_sectors_l; /* size of partition */
55 unsigned char drive_number; 59 u8 drive_number;
56 unsigned char mbz; 60 u8 mbz;
57 unsigned char sig_28h; /* 28h */ 61 u8 sig_28h; /* 28h */
58 unsigned char vol_serno[4]; 62 u8 vol_serno[4];
59 unsigned char vol_label[11]; 63 u8 vol_label[11];
60 unsigned char sig_hpfs[8]; /* "HPFS " */ 64 u8 sig_hpfs[8]; /* "HPFS " */
61 unsigned char pad[448]; 65 u8 pad[448];
62 unsigned short magic; /* aa55 */ 66 u16 magic; /* aa55 */
63}; 67};
64 68
65 69
@@ -71,31 +75,29 @@ struct hpfs_boot_block
71 75
72struct hpfs_super_block 76struct hpfs_super_block
73{ 77{
74 unsigned magic; /* f995 e849 */ 78 u32 magic; /* f995 e849 */
75 unsigned magic1; /* fa53 e9c5, more magic? */ 79 u32 magic1; /* fa53 e9c5, more magic? */
76 /*unsigned huh202;*/ /* ?? 202 = N. of B. in 1.00390625 S.*/ 80 u8 version; /* version of a filesystem usually 2 */
77 char version; /* version of a filesystem usually 2 */ 81 u8 funcversion; /* functional version - oldest version
78 char funcversion; /* functional version - oldest version
79 of filesystem that can understand 82 of filesystem that can understand
80 this disk */ 83 this disk */
81 unsigned short int zero; /* 0 */ 84 u16 zero; /* 0 */
82 fnode_secno root; /* fnode of root directory */ 85 fnode_secno root; /* fnode of root directory */
83 secno n_sectors; /* size of filesystem */ 86 secno n_sectors; /* size of filesystem */
84 unsigned n_badblocks; /* number of bad blocks */ 87 u32 n_badblocks; /* number of bad blocks */
85 secno bitmaps; /* pointers to free space bit maps */ 88 secno bitmaps; /* pointers to free space bit maps */
86 unsigned zero1; /* 0 */ 89 u32 zero1; /* 0 */
87 secno badblocks; /* bad block list */ 90 secno badblocks; /* bad block list */
88 unsigned zero3; /* 0 */ 91 u32 zero3; /* 0 */
89 time32_t last_chkdsk; /* date last checked, 0 if never */ 92 time32_t last_chkdsk; /* date last checked, 0 if never */
90 /*unsigned zero4;*/ /* 0 */ 93 time32_t last_optimize; /* date last optimized, 0 if never */
91 time32_t last_optimize; /* date last optimized, 0 if never */
92 secno n_dir_band; /* number of sectors in dir band */ 94 secno n_dir_band; /* number of sectors in dir band */
93 secno dir_band_start; /* first sector in dir band */ 95 secno dir_band_start; /* first sector in dir band */
94 secno dir_band_end; /* last sector in dir band */ 96 secno dir_band_end; /* last sector in dir band */
95 secno dir_band_bitmap; /* free space map, 1 dnode per bit */ 97 secno dir_band_bitmap; /* free space map, 1 dnode per bit */
96 char volume_name[32]; /* not used */ 98 u8 volume_name[32]; /* not used */
97 secno user_id_table; /* 8 preallocated sectors - user id */ 99 secno user_id_table; /* 8 preallocated sectors - user id */
98 unsigned zero6[103]; /* 0 */ 100 u32 zero6[103]; /* 0 */
99}; 101};
100 102
101 103
@@ -107,44 +109,65 @@ struct hpfs_super_block
107 109
108struct hpfs_spare_block 110struct hpfs_spare_block
109{ 111{
110 unsigned magic; /* f991 1849 */ 112 u32 magic; /* f991 1849 */
111 unsigned magic1; /* fa52 29c5, more magic? */ 113 u32 magic1; /* fa52 29c5, more magic? */
112 114
113 unsigned dirty: 1; /* 0 clean, 1 "improperly stopped" */ 115#ifdef __LITTLE_ENDIAN
114 /*unsigned flag1234: 4;*/ /* unknown flags */ 116 u8 dirty: 1; /* 0 clean, 1 "improperly stopped" */
115 unsigned sparedir_used: 1; /* spare dirblks used */ 117 u8 sparedir_used: 1; /* spare dirblks used */
116 unsigned hotfixes_used: 1; /* hotfixes used */ 118 u8 hotfixes_used: 1; /* hotfixes used */
117 unsigned bad_sector: 1; /* bad sector, corrupted disk (???) */ 119 u8 bad_sector: 1; /* bad sector, corrupted disk (???) */
118 unsigned bad_bitmap: 1; /* bad bitmap */ 120 u8 bad_bitmap: 1; /* bad bitmap */
119 unsigned fast: 1; /* partition was fast formatted */ 121 u8 fast: 1; /* partition was fast formatted */
120 unsigned old_wrote: 1; /* old version wrote to partion */ 122 u8 old_wrote: 1; /* old version wrote to partion */
121 unsigned old_wrote_1: 1; /* old version wrote to partion (?) */ 123 u8 old_wrote_1: 1; /* old version wrote to partion (?) */
122 unsigned install_dasd_limits: 1; /* HPFS386 flags */ 124#else
123 unsigned resynch_dasd_limits: 1; 125 u8 old_wrote_1: 1; /* old version wrote to partion (?) */
124 unsigned dasd_limits_operational: 1; 126 u8 old_wrote: 1; /* old version wrote to partion */
125 unsigned multimedia_active: 1; 127 u8 fast: 1; /* partition was fast formatted */
126 unsigned dce_acls_active: 1; 128 u8 bad_bitmap: 1; /* bad bitmap */
127 unsigned dasd_limits_dirty: 1; 129 u8 bad_sector: 1; /* bad sector, corrupted disk (???) */
128 unsigned flag67: 2; 130 u8 hotfixes_used: 1; /* hotfixes used */
129 unsigned char mm_contlgulty; 131 u8 sparedir_used: 1; /* spare dirblks used */
130 unsigned char unused; 132 u8 dirty: 1; /* 0 clean, 1 "improperly stopped" */
133#endif
134
135#ifdef __LITTLE_ENDIAN
136 u8 install_dasd_limits: 1; /* HPFS386 flags */
137 u8 resynch_dasd_limits: 1;
138 u8 dasd_limits_operational: 1;
139 u8 multimedia_active: 1;
140 u8 dce_acls_active: 1;
141 u8 dasd_limits_dirty: 1;
142 u8 flag67: 2;
143#else
144 u8 flag67: 2;
145 u8 dasd_limits_dirty: 1;
146 u8 dce_acls_active: 1;
147 u8 multimedia_active: 1;
148 u8 dasd_limits_operational: 1;
149 u8 resynch_dasd_limits: 1;
150 u8 install_dasd_limits: 1; /* HPFS386 flags */
151#endif
152
153 u8 mm_contlgulty;
154 u8 unused;
131 155
132 secno hotfix_map; /* info about remapped bad sectors */ 156 secno hotfix_map; /* info about remapped bad sectors */
133 unsigned n_spares_used; /* number of hotfixes */ 157 u32 n_spares_used; /* number of hotfixes */
134 unsigned n_spares; /* number of spares in hotfix map */ 158 u32 n_spares; /* number of spares in hotfix map */
135 unsigned n_dnode_spares_free; /* spare dnodes unused */ 159 u32 n_dnode_spares_free; /* spare dnodes unused */
136 unsigned n_dnode_spares; /* length of spare_dnodes[] list, 160 u32 n_dnode_spares; /* length of spare_dnodes[] list,
137 follows in this block*/ 161 follows in this block*/
138 secno code_page_dir; /* code page directory block */ 162 secno code_page_dir; /* code page directory block */
139 unsigned n_code_pages; /* number of code pages */ 163 u32 n_code_pages; /* number of code pages */
140 /*unsigned large_numbers[2];*/ /* ?? */ 164 u32 super_crc; /* on HPFS386 and LAN Server this is
141 unsigned super_crc; /* on HPFS386 and LAN Server this is
142 checksum of superblock, on normal 165 checksum of superblock, on normal
143 OS/2 unused */ 166 OS/2 unused */
144 unsigned spare_crc; /* on HPFS386 checksum of spareblock */ 167 u32 spare_crc; /* on HPFS386 checksum of spareblock */
145 unsigned zero1[15]; /* unused */ 168 u32 zero1[15]; /* unused */
146 dnode_secno spare_dnodes[100]; /* emergency free dnode list */ 169 dnode_secno spare_dnodes[100]; /* emergency free dnode list */
147 unsigned zero2[1]; /* room for more? */ 170 u32 zero2[1]; /* room for more? */
148}; 171};
149 172
150/* The bad block list is 4 sectors long. The first word must be zero, 173/* The bad block list is 4 sectors long. The first word must be zero,
@@ -179,18 +202,18 @@ struct hpfs_spare_block
179 202
180struct code_page_directory 203struct code_page_directory
181{ 204{
182 unsigned magic; /* 4945 21f7 */ 205 u32 magic; /* 4945 21f7 */
183 unsigned n_code_pages; /* number of pointers following */ 206 u32 n_code_pages; /* number of pointers following */
184 unsigned zero1[2]; 207 u32 zero1[2];
185 struct { 208 struct {
186 unsigned short ix; /* index */ 209 u16 ix; /* index */
187 unsigned short code_page_number; /* code page number */ 210 u16 code_page_number; /* code page number */
188 unsigned bounds; /* matches corresponding word 211 u32 bounds; /* matches corresponding word
189 in data block */ 212 in data block */
190 secno code_page_data; /* sector number of a code_page_data 213 secno code_page_data; /* sector number of a code_page_data
191 containing c.p. array */ 214 containing c.p. array */
192 unsigned short index; /* index in c.p. array in that sector*/ 215 u16 index; /* index in c.p. array in that sector*/
193 unsigned short unknown; /* some unknown value; usually 0; 216 u16 unknown; /* some unknown value; usually 0;
194 2 in Japanese version */ 217 2 in Japanese version */
195 } array[31]; /* unknown length */ 218 } array[31]; /* unknown length */
196}; 219};
@@ -201,21 +224,21 @@ struct code_page_directory
201 224
202struct code_page_data 225struct code_page_data
203{ 226{
204 unsigned magic; /* 8945 21f7 */ 227 u32 magic; /* 8945 21f7 */
205 unsigned n_used; /* # elements used in c_p_data[] */ 228 u32 n_used; /* # elements used in c_p_data[] */
206 unsigned bounds[3]; /* looks a bit like 229 u32 bounds[3]; /* looks a bit like
207 (beg1,end1), (beg2,end2) 230 (beg1,end1), (beg2,end2)
208 one byte each */ 231 one byte each */
209 unsigned short offs[3]; /* offsets from start of sector 232 u16 offs[3]; /* offsets from start of sector
210 to start of c_p_data[ix] */ 233 to start of c_p_data[ix] */
211 struct { 234 struct {
212 unsigned short ix; /* index */ 235 u16 ix; /* index */
213 unsigned short code_page_number; /* code page number */ 236 u16 code_page_number; /* code page number */
214 unsigned short unknown; /* the same as in cp directory */ 237 u16 unknown; /* the same as in cp directory */
215 unsigned char map[128]; /* upcase table for chars 80..ff */ 238 u8 map[128]; /* upcase table for chars 80..ff */
216 unsigned short zero2; 239 u16 zero2;
217 } code_page[3]; 240 } code_page[3];
218 unsigned char incognita[78]; 241 u8 incognita[78];
219}; 242};
220 243
221 244
@@ -255,50 +278,84 @@ struct code_page_data
255#define DNODE_MAGIC 0x77e40aae 278#define DNODE_MAGIC 0x77e40aae
256 279
257struct dnode { 280struct dnode {
258 unsigned magic; /* 77e4 0aae */ 281 u32 magic; /* 77e4 0aae */
259 unsigned first_free; /* offset from start of dnode to 282 u32 first_free; /* offset from start of dnode to
260 first free dir entry */ 283 first free dir entry */
261 unsigned root_dnode:1; /* Is it root dnode? */ 284#ifdef __LITTLE_ENDIAN
262 unsigned increment_me:31; /* some kind of activity counter? 285 u8 root_dnode: 1; /* Is it root dnode? */
263 Neither HPFS.IFS nor CHKDSK cares 286 u8 increment_me: 7; /* some kind of activity counter? */
287 /* Neither HPFS.IFS nor CHKDSK cares
288 if you change this word */
289#else
290 u8 increment_me: 7; /* some kind of activity counter? */
291 /* Neither HPFS.IFS nor CHKDSK cares
264 if you change this word */ 292 if you change this word */
293 u8 root_dnode: 1; /* Is it root dnode? */
294#endif
295 u8 increment_me2[3];
265 secno up; /* (root dnode) directory's fnode 296 secno up; /* (root dnode) directory's fnode
266 (nonroot) parent dnode */ 297 (nonroot) parent dnode */
267 dnode_secno self; /* pointer to this dnode */ 298 dnode_secno self; /* pointer to this dnode */
268 unsigned char dirent[2028]; /* one or more dirents */ 299 u8 dirent[2028]; /* one or more dirents */
269}; 300};
270 301
271struct hpfs_dirent { 302struct hpfs_dirent {
272 unsigned short length; /* offset to next dirent */ 303 u16 length; /* offset to next dirent */
273 unsigned first: 1; /* set on phony ^A^A (".") entry */ 304
274 unsigned has_acl: 1; 305#ifdef __LITTLE_ENDIAN
275 unsigned down: 1; /* down pointer present (after name) */ 306 u8 first: 1; /* set on phony ^A^A (".") entry */
276 unsigned last: 1; /* set on phony \377 entry */ 307 u8 has_acl: 1;
277 unsigned has_ea: 1; /* entry has EA */ 308 u8 down: 1; /* down pointer present (after name) */
278 unsigned has_xtd_perm: 1; /* has extended perm list (???) */ 309 u8 last: 1; /* set on phony \377 entry */
279 unsigned has_explicit_acl: 1; 310 u8 has_ea: 1; /* entry has EA */
280 unsigned has_needea: 1; /* ?? some EA has NEEDEA set 311 u8 has_xtd_perm: 1; /* has extended perm list (???) */
312 u8 has_explicit_acl: 1;
313 u8 has_needea: 1; /* ?? some EA has NEEDEA set
314 I have no idea why this is
315 interesting in a dir entry */
316#else
317 u8 has_needea: 1; /* ?? some EA has NEEDEA set
281 I have no idea why this is 318 I have no idea why this is
282 interesting in a dir entry */ 319 interesting in a dir entry */
283 unsigned read_only: 1; /* dos attrib */ 320 u8 has_explicit_acl: 1;
284 unsigned hidden: 1; /* dos attrib */ 321 u8 has_xtd_perm: 1; /* has extended perm list (???) */
285 unsigned system: 1; /* dos attrib */ 322 u8 has_ea: 1; /* entry has EA */
286 unsigned flag11: 1; /* would be volume label dos attrib */ 323 u8 last: 1; /* set on phony \377 entry */
287 unsigned directory: 1; /* dos attrib */ 324 u8 down: 1; /* down pointer present (after name) */
288 unsigned archive: 1; /* dos attrib */ 325 u8 has_acl: 1;
289 unsigned not_8x3: 1; /* name is not 8.3 */ 326 u8 first: 1; /* set on phony ^A^A (".") entry */
290 unsigned flag15: 1; 327#endif
328
329#ifdef __LITTLE_ENDIAN
330 u8 read_only: 1; /* dos attrib */
331 u8 hidden: 1; /* dos attrib */
332 u8 system: 1; /* dos attrib */
333 u8 flag11: 1; /* would be volume label dos attrib */
334 u8 directory: 1; /* dos attrib */
335 u8 archive: 1; /* dos attrib */
336 u8 not_8x3: 1; /* name is not 8.3 */
337 u8 flag15: 1;
338#else
339 u8 flag15: 1;
340 u8 not_8x3: 1; /* name is not 8.3 */
341 u8 archive: 1; /* dos attrib */
342 u8 directory: 1; /* dos attrib */
343 u8 flag11: 1; /* would be volume label dos attrib */
344 u8 system: 1; /* dos attrib */
345 u8 hidden: 1; /* dos attrib */
346 u8 read_only: 1; /* dos attrib */
347#endif
348
291 fnode_secno fnode; /* fnode giving allocation info */ 349 fnode_secno fnode; /* fnode giving allocation info */
292 time32_t write_date; /* mtime */ 350 time32_t write_date; /* mtime */
293 unsigned file_size; /* file length, bytes */ 351 u32 file_size; /* file length, bytes */
294 time32_t read_date; /* atime */ 352 time32_t read_date; /* atime */
295 time32_t creation_date; /* ctime */ 353 time32_t creation_date; /* ctime */
296 unsigned ea_size; /* total EA length, bytes */ 354 u32 ea_size; /* total EA length, bytes */
297 unsigned char no_of_acls : 3; /* number of ACL's */ 355 u8 no_of_acls; /* number of ACL's (low 3 bits) */
298 unsigned char reserver : 5; 356 u8 ix; /* code page index (of filename), see
299 unsigned char ix; /* code page index (of filename), see
300 struct code_page_data */ 357 struct code_page_data */
301 unsigned char namelen, name[1]; /* file name */ 358 u8 namelen, name[1]; /* file name */
302 /* dnode_secno down; btree down pointer, if present, 359 /* dnode_secno down; btree down pointer, if present,
303 follows name on next word boundary, or maybe it 360 follows name on next word boundary, or maybe it
304 precedes next dirent, which is on a word boundary. */ 361 precedes next dirent, which is on a word boundary. */
@@ -318,38 +375,50 @@ struct hpfs_dirent {
318 375
319struct bplus_leaf_node 376struct bplus_leaf_node
320{ 377{
321 unsigned file_secno; /* first file sector in extent */ 378 u32 file_secno; /* first file sector in extent */
322 unsigned length; /* length, sectors */ 379 u32 length; /* length, sectors */
323 secno disk_secno; /* first corresponding disk sector */ 380 secno disk_secno; /* first corresponding disk sector */
324}; 381};
325 382
326struct bplus_internal_node 383struct bplus_internal_node
327{ 384{
328 unsigned file_secno; /* subtree maps sectors < this */ 385 u32 file_secno; /* subtree maps sectors < this */
329 anode_secno down; /* pointer to subtree */ 386 anode_secno down; /* pointer to subtree */
330}; 387};
331 388
332struct bplus_header 389struct bplus_header
333{ 390{
334 unsigned hbff: 1; /* high bit of first free entry offset */ 391#ifdef __LITTLE_ENDIAN
335 unsigned flag1: 1; 392 u8 hbff: 1; /* high bit of first free entry offset */
336 unsigned flag2: 1; 393 u8 flag1234: 4;
337 unsigned flag3: 1; 394 u8 fnode_parent: 1; /* ? we're pointed to by an fnode,
338 unsigned flag4: 1;
339 unsigned fnode_parent: 1; /* ? we're pointed to by an fnode,
340 the data btree or some ea or the 395 the data btree or some ea or the
341 main ea bootage pointer ea_secno */ 396 main ea bootage pointer ea_secno */
342 /* also can get set in fnodes, which 397 /* also can get set in fnodes, which
343 may be a chkdsk glitch or may mean 398 may be a chkdsk glitch or may mean
344 this bit is irrelevant in fnodes, 399 this bit is irrelevant in fnodes,
345 or this interpretation is all wet */ 400 or this interpretation is all wet */
346 unsigned binary_search: 1; /* suggest binary search (unused) */ 401 u8 binary_search: 1; /* suggest binary search (unused) */
347 unsigned internal: 1; /* 1 -> (internal) tree of anodes 402 u8 internal: 1; /* 1 -> (internal) tree of anodes
403 0 -> (leaf) list of extents */
404#else
405 u8 internal: 1; /* 1 -> (internal) tree of anodes
348 0 -> (leaf) list of extents */ 406 0 -> (leaf) list of extents */
349 unsigned char fill[3]; 407 u8 binary_search: 1; /* suggest binary search (unused) */
350 unsigned char n_free_nodes; /* free nodes in following array */ 408 u8 fnode_parent: 1; /* ? we're pointed to by an fnode,
351 unsigned char n_used_nodes; /* used nodes in following array */ 409 the data btree or some ea or the
352 unsigned short first_free; /* offset from start of header to 410 main ea bootage pointer ea_secno */
411 /* also can get set in fnodes, which
412 may be a chkdsk glitch or may mean
413 this bit is irrelevant in fnodes,
414 or this interpretation is all wet */
415 u8 flag1234: 4;
416 u8 hbff: 1; /* high bit of first free entry offset */
417#endif
418 u8 fill[3];
419 u8 n_free_nodes; /* free nodes in following array */
420 u8 n_used_nodes; /* used nodes in following array */
421 u16 first_free; /* offset from start of header to
353 first free node in array */ 422 first free node in array */
354 union { 423 union {
355 struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving 424 struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving
@@ -369,37 +438,38 @@ struct bplus_header
369 438
370struct fnode 439struct fnode
371{ 440{
372 unsigned magic; /* f7e4 0aae */ 441 u32 magic; /* f7e4 0aae */
373 unsigned zero1[2]; /* read history */ 442 u32 zero1[2]; /* read history */
374 unsigned char len, name[15]; /* true length, truncated name */ 443 u8 len, name[15]; /* true length, truncated name */
375 fnode_secno up; /* pointer to file's directory fnode */ 444 fnode_secno up; /* pointer to file's directory fnode */
376 /*unsigned zero2[3];*/
377 secno acl_size_l; 445 secno acl_size_l;
378 secno acl_secno; 446 secno acl_secno;
379 unsigned short acl_size_s; 447 u16 acl_size_s;
380 char acl_anode; 448 u8 acl_anode;
381 char zero2; /* history bit count */ 449 u8 zero2; /* history bit count */
382 unsigned ea_size_l; /* length of disk-resident ea's */ 450 u32 ea_size_l; /* length of disk-resident ea's */
383 secno ea_secno; /* first sector of disk-resident ea's*/ 451 secno ea_secno; /* first sector of disk-resident ea's*/
384 unsigned short ea_size_s; /* length of fnode-resident ea's */ 452 u16 ea_size_s; /* length of fnode-resident ea's */
385 453
386 unsigned flag0: 1; 454#ifdef __LITTLE_ENDIAN
387 unsigned ea_anode: 1; /* 1 -> ea_secno is an anode */ 455 u8 flag0: 1;
388 unsigned flag2: 1; 456 u8 ea_anode: 1; /* 1 -> ea_secno is an anode */
389 unsigned flag3: 1; 457 u8 flag234567: 6;
390 unsigned flag4: 1; 458#else
391 unsigned flag5: 1; 459 u8 flag234567: 6;
392 unsigned flag6: 1; 460 u8 ea_anode: 1; /* 1 -> ea_secno is an anode */
393 unsigned flag7: 1; 461 u8 flag0: 1;
394 unsigned dirflag: 1; /* 1 -> directory. first & only extent 462#endif
463
464#ifdef __LITTLE_ENDIAN
465 u8 dirflag: 1; /* 1 -> directory. first & only extent
395 points to dnode. */ 466 points to dnode. */
396 unsigned flag9: 1; 467 u8 flag9012345: 7;
397 unsigned flag10: 1; 468#else
398 unsigned flag11: 1; 469 u8 flag9012345: 7;
399 unsigned flag12: 1; 470 u8 dirflag: 1; /* 1 -> directory. first & only extent
400 unsigned flag13: 1; 471 points to dnode. */
401 unsigned flag14: 1; 472#endif
402 unsigned flag15: 1;
403 473
404 struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */ 474 struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */
405 union { 475 union {
@@ -407,17 +477,16 @@ struct fnode
407 struct bplus_internal_node internal[12]; 477 struct bplus_internal_node internal[12];
408 } u; 478 } u;
409 479
410 unsigned file_size; /* file length, bytes */ 480 u32 file_size; /* file length, bytes */
411 unsigned n_needea; /* number of EA's with NEEDEA set */ 481 u32 n_needea; /* number of EA's with NEEDEA set */
412 char user_id[16]; /* unused */ 482 u8 user_id[16]; /* unused */
413 unsigned short ea_offs; /* offset from start of fnode 483 u16 ea_offs; /* offset from start of fnode
414 to first fnode-resident ea */ 484 to first fnode-resident ea */
415 char dasd_limit_treshhold; 485 u8 dasd_limit_treshhold;
416 char dasd_limit_delta; 486 u8 dasd_limit_delta;
417 unsigned dasd_limit; 487 u32 dasd_limit;
418 unsigned dasd_usage; 488 u32 dasd_usage;
419 /*unsigned zero5[2];*/ 489 u8 ea[316]; /* zero or more EA's, packed together
420 unsigned char ea[316]; /* zero or more EA's, packed together
421 with no alignment padding. 490 with no alignment padding.
422 (Do not use this name, get here 491 (Do not use this name, get here
423 via fnode + ea_offs. I think.) */ 492 via fnode + ea_offs. I think.) */
@@ -430,7 +499,7 @@ struct fnode
430 499
431struct anode 500struct anode
432{ 501{
433 unsigned magic; /* 37e4 0aae */ 502 u32 magic; /* 37e4 0aae */
434 anode_secno self; /* pointer to this anode */ 503 anode_secno self; /* pointer to this anode */
435 secno up; /* parent anode or fnode */ 504 secno up; /* parent anode or fnode */
436 505
@@ -440,7 +509,7 @@ struct anode
440 struct bplus_internal_node internal[60]; 509 struct bplus_internal_node internal[60];
441 } u; 510 } u;
442 511
443 unsigned fill[3]; /* unused */ 512 u32 fill[3]; /* unused */
444}; 513};
445 514
446 515
@@ -461,25 +530,31 @@ struct anode
461 530
462struct extended_attribute 531struct extended_attribute
463{ 532{
464 unsigned indirect: 1; /* 1 -> value gives sector number 533#ifdef __LITTLE_ENDIAN
534 u8 indirect: 1; /* 1 -> value gives sector number
465 where real value starts */ 535 where real value starts */
466 unsigned anode: 1; /* 1 -> sector is an anode 536 u8 anode: 1; /* 1 -> sector is an anode
537 that points to fragmented value */
538 u8 flag23456: 5;
539 u8 needea: 1; /* required ea */
540#else
541 u8 needea: 1; /* required ea */
542 u8 flag23456: 5;
543 u8 anode: 1; /* 1 -> sector is an anode
467 that points to fragmented value */ 544 that points to fragmented value */
468 unsigned flag2: 1; 545 u8 indirect: 1; /* 1 -> value gives sector number
469 unsigned flag3: 1; 546 where real value starts */
470 unsigned flag4: 1; 547#endif
471 unsigned flag5: 1; 548 u8 namelen; /* length of name, bytes */
472 unsigned flag6: 1; 549 u8 valuelen_lo; /* length of value, bytes */
473 unsigned needea: 1; /* required ea */ 550 u8 valuelen_hi; /* length of value, bytes */
474 unsigned char namelen; /* length of name, bytes */ 551 u8 name[0];
475 unsigned short valuelen; /* length of value, bytes */
476 unsigned char name[0];
477 /* 552 /*
478 unsigned char name[namelen]; ascii attrib name 553 u8 name[namelen]; ascii attrib name
479 unsigned char nul; terminating '\0', not counted 554 u8 nul; terminating '\0', not counted
480 unsigned char value[valuelen]; value, arbitrary 555 u8 value[valuelen]; value, arbitrary
481 if this.indirect, valuelen is 8 and the value is 556 if this.indirect, valuelen is 8 and the value is
482 unsigned length; real length of value, bytes 557 u32 length; real length of value, bytes
483 secno secno; sector address where it starts 558 secno secno; sector address where it starts
484 if this.anode, the above sector number is the root of an anode tree 559 if this.anode, the above sector number is the root of an anode tree
485 which points to the value. 560 which points to the value.
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index c15adbca07ff..dd552f862c8f 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -13,6 +13,7 @@
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/buffer_head.h> 14#include <linux/buffer_head.h>
15#include <linux/slab.h> 15#include <linux/slab.h>
16#include <asm/unaligned.h>
16 17
17#include "hpfs.h" 18#include "hpfs.h"
18 19
@@ -51,18 +52,16 @@ struct hpfs_inode_info {
51 unsigned i_disk_sec; /* (files) minimalist cache of alloc info */ 52 unsigned i_disk_sec; /* (files) minimalist cache of alloc info */
52 unsigned i_n_secs; /* (files) minimalist cache of alloc info */ 53 unsigned i_n_secs; /* (files) minimalist cache of alloc info */
53 unsigned i_ea_size; /* size of extended attributes */ 54 unsigned i_ea_size; /* size of extended attributes */
54 unsigned i_conv : 2; /* (files) crlf->newline hackery */
55 unsigned i_ea_mode : 1; /* file's permission is stored in ea */ 55 unsigned i_ea_mode : 1; /* file's permission is stored in ea */
56 unsigned i_ea_uid : 1; /* file's uid is stored in ea */ 56 unsigned i_ea_uid : 1; /* file's uid is stored in ea */
57 unsigned i_ea_gid : 1; /* file's gid is stored in ea */ 57 unsigned i_ea_gid : 1; /* file's gid is stored in ea */
58 unsigned i_dirty : 1; 58 unsigned i_dirty : 1;
59 struct mutex i_mutex;
60 struct mutex i_parent_mutex;
61 loff_t **i_rddir_off; 59 loff_t **i_rddir_off;
62 struct inode vfs_inode; 60 struct inode vfs_inode;
63}; 61};
64 62
65struct hpfs_sb_info { 63struct hpfs_sb_info {
64 struct mutex hpfs_mutex; /* global hpfs lock */
66 ino_t sb_root; /* inode number of root dir */ 65 ino_t sb_root; /* inode number of root dir */
67 unsigned sb_fs_size; /* file system size, sectors */ 66 unsigned sb_fs_size; /* file system size, sectors */
68 unsigned sb_bitmaps; /* sector number of bitmap list */ 67 unsigned sb_bitmaps; /* sector number of bitmap list */
@@ -74,7 +73,6 @@ struct hpfs_sb_info {
74 uid_t sb_uid; /* uid from mount options */ 73 uid_t sb_uid; /* uid from mount options */
75 gid_t sb_gid; /* gid from mount options */ 74 gid_t sb_gid; /* gid from mount options */
76 umode_t sb_mode; /* mode from mount options */ 75 umode_t sb_mode; /* mode from mount options */
77 unsigned sb_conv : 2; /* crlf->newline hackery */
78 unsigned sb_eas : 2; /* eas: 0-ignore, 1-ro, 2-rw */ 76 unsigned sb_eas : 2; /* eas: 0-ignore, 1-ro, 2-rw */
79 unsigned sb_err : 2; /* on errs: 0-cont, 1-ro, 2-panic */ 77 unsigned sb_err : 2; /* on errs: 0-cont, 1-ro, 2-panic */
80 unsigned sb_chk : 2; /* checks: 0-no, 1-normal, 2-strict */ 78 unsigned sb_chk : 2; /* checks: 0-no, 1-normal, 2-strict */
@@ -87,20 +85,9 @@ struct hpfs_sb_info {
87 unsigned *sb_bmp_dir; /* main bitmap directory */ 85 unsigned *sb_bmp_dir; /* main bitmap directory */
88 unsigned sb_c_bitmap; /* current bitmap */ 86 unsigned sb_c_bitmap; /* current bitmap */
89 unsigned sb_max_fwd_alloc; /* max forwad allocation */ 87 unsigned sb_max_fwd_alloc; /* max forwad allocation */
90 struct mutex hpfs_creation_de; /* when creating dirents, nobody else
91 can alloc blocks */
92 /*unsigned sb_mounting : 1;*/
93 int sb_timeshift; 88 int sb_timeshift;
94}; 89};
95 90
96/*
97 * conv= options
98 */
99
100#define CONV_BINARY 0 /* no conversion */
101#define CONV_TEXT 1 /* crlf->newline */
102#define CONV_AUTO 2 /* decide based on file contents */
103
104/* Four 512-byte buffers and the 2k block obtained by concatenating them */ 91/* Four 512-byte buffers and the 2k block obtained by concatenating them */
105 92
106struct quad_buffer_head { 93struct quad_buffer_head {
@@ -113,7 +100,7 @@ struct quad_buffer_head {
113static inline dnode_secno de_down_pointer (struct hpfs_dirent *de) 100static inline dnode_secno de_down_pointer (struct hpfs_dirent *de)
114{ 101{
115 CHKCOND(de->down,("HPFS: de_down_pointer: !de->down\n")); 102 CHKCOND(de->down,("HPFS: de_down_pointer: !de->down\n"));
116 return *(dnode_secno *) ((void *) de + de->length - 4); 103 return le32_to_cpu(*(dnode_secno *) ((void *) de + le16_to_cpu(de->length) - 4));
117} 104}
118 105
119/* The first dir entry in a dnode */ 106/* The first dir entry in a dnode */
@@ -127,41 +114,46 @@ static inline struct hpfs_dirent *dnode_first_de (struct dnode *dnode)
127 114
128static inline struct hpfs_dirent *dnode_end_de (struct dnode *dnode) 115static inline struct hpfs_dirent *dnode_end_de (struct dnode *dnode)
129{ 116{
130 CHKCOND(dnode->first_free>=0x14 && dnode->first_free<=0xa00,("HPFS: dnode_end_de: dnode->first_free = %d\n",(int)dnode->first_free)); 117 CHKCOND(le32_to_cpu(dnode->first_free)>=0x14 && le32_to_cpu(dnode->first_free)<=0xa00,("HPFS: dnode_end_de: dnode->first_free = %x\n",(unsigned)le32_to_cpu(dnode->first_free)));
131 return (void *) dnode + dnode->first_free; 118 return (void *) dnode + le32_to_cpu(dnode->first_free);
132} 119}
133 120
134/* The dir entry after dir entry de */ 121/* The dir entry after dir entry de */
135 122
136static inline struct hpfs_dirent *de_next_de (struct hpfs_dirent *de) 123static inline struct hpfs_dirent *de_next_de (struct hpfs_dirent *de)
137{ 124{
138 CHKCOND(de->length>=0x20 && de->length<0x800,("HPFS: de_next_de: de->length = %d\n",(int)de->length)); 125 CHKCOND(le16_to_cpu(de->length)>=0x20 && le16_to_cpu(de->length)<0x800,("HPFS: de_next_de: de->length = %x\n",(unsigned)le16_to_cpu(de->length)));
139 return (void *) de + de->length; 126 return (void *) de + le16_to_cpu(de->length);
140} 127}
141 128
142static inline struct extended_attribute *fnode_ea(struct fnode *fnode) 129static inline struct extended_attribute *fnode_ea(struct fnode *fnode)
143{ 130{
144 return (struct extended_attribute *)((char *)fnode + fnode->ea_offs + fnode->acl_size_s); 131 return (struct extended_attribute *)((char *)fnode + le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s));
145} 132}
146 133
147static inline struct extended_attribute *fnode_end_ea(struct fnode *fnode) 134static inline struct extended_attribute *fnode_end_ea(struct fnode *fnode)
148{ 135{
149 return (struct extended_attribute *)((char *)fnode + fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s); 136 return (struct extended_attribute *)((char *)fnode + le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s));
137}
138
139static unsigned ea_valuelen(struct extended_attribute *ea)
140{
141 return ea->valuelen_lo + 256 * ea->valuelen_hi;
150} 142}
151 143
152static inline struct extended_attribute *next_ea(struct extended_attribute *ea) 144static inline struct extended_attribute *next_ea(struct extended_attribute *ea)
153{ 145{
154 return (struct extended_attribute *)((char *)ea + 5 + ea->namelen + ea->valuelen); 146 return (struct extended_attribute *)((char *)ea + 5 + ea->namelen + ea_valuelen(ea));
155} 147}
156 148
157static inline secno ea_sec(struct extended_attribute *ea) 149static inline secno ea_sec(struct extended_attribute *ea)
158{ 150{
159 return *(secno *)((char *)ea + 9 + ea->namelen); 151 return le32_to_cpu(get_unaligned((secno *)((char *)ea + 9 + ea->namelen)));
160} 152}
161 153
162static inline secno ea_len(struct extended_attribute *ea) 154static inline secno ea_len(struct extended_attribute *ea)
163{ 155{
164 return *(secno *)((char *)ea + 5 + ea->namelen); 156 return le32_to_cpu(get_unaligned((secno *)((char *)ea + 5 + ea->namelen)));
165} 157}
166 158
167static inline char *ea_data(struct extended_attribute *ea) 159static inline char *ea_data(struct extended_attribute *ea)
@@ -186,13 +178,13 @@ static inline void copy_de(struct hpfs_dirent *dst, struct hpfs_dirent *src)
186 dst->not_8x3 = n; 178 dst->not_8x3 = n;
187} 179}
188 180
189static inline unsigned tstbits(unsigned *bmp, unsigned b, unsigned n) 181static inline unsigned tstbits(u32 *bmp, unsigned b, unsigned n)
190{ 182{
191 int i; 183 int i;
192 if ((b >= 0x4000) || (b + n - 1 >= 0x4000)) return n; 184 if ((b >= 0x4000) || (b + n - 1 >= 0x4000)) return n;
193 if (!((bmp[(b & 0x3fff) >> 5] >> (b & 0x1f)) & 1)) return 1; 185 if (!((le32_to_cpu(bmp[(b & 0x3fff) >> 5]) >> (b & 0x1f)) & 1)) return 1;
194 for (i = 1; i < n; i++) 186 for (i = 1; i < n; i++)
195 if (/*b+i < 0x4000 &&*/ !((bmp[((b+i) & 0x3fff) >> 5] >> ((b+i) & 0x1f)) & 1)) 187 if (!((le32_to_cpu(bmp[((b+i) & 0x3fff) >> 5]) >> ((b+i) & 0x1f)) & 1))
196 return i + 1; 188 return i + 1;
197 return 0; 189 return 0;
198} 190}
@@ -200,12 +192,12 @@ static inline unsigned tstbits(unsigned *bmp, unsigned b, unsigned n)
200/* alloc.c */ 192/* alloc.c */
201 193
202int hpfs_chk_sectors(struct super_block *, secno, int, char *); 194int hpfs_chk_sectors(struct super_block *, secno, int, char *);
203secno hpfs_alloc_sector(struct super_block *, secno, unsigned, int, int); 195secno hpfs_alloc_sector(struct super_block *, secno, unsigned, int);
204int hpfs_alloc_if_possible(struct super_block *, secno); 196int hpfs_alloc_if_possible(struct super_block *, secno);
205void hpfs_free_sectors(struct super_block *, secno, unsigned); 197void hpfs_free_sectors(struct super_block *, secno, unsigned);
206int hpfs_check_free_dnodes(struct super_block *, int); 198int hpfs_check_free_dnodes(struct super_block *, int);
207void hpfs_free_dnode(struct super_block *, secno); 199void hpfs_free_dnode(struct super_block *, secno);
208struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *, int); 200struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *);
209struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **); 201struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **);
210struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **); 202struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **);
211 203
@@ -222,8 +214,6 @@ void hpfs_remove_fnode(struct super_block *, fnode_secno fno);
222 214
223/* buffer.c */ 215/* buffer.c */
224 216
225void hpfs_lock_creation(struct super_block *);
226void hpfs_unlock_creation(struct super_block *);
227void *hpfs_map_sector(struct super_block *, unsigned, struct buffer_head **, int); 217void *hpfs_map_sector(struct super_block *, unsigned, struct buffer_head **, int);
228void *hpfs_get_sector(struct super_block *, unsigned, struct buffer_head **); 218void *hpfs_get_sector(struct super_block *, unsigned, struct buffer_head **);
229void *hpfs_map_4sectors(struct super_block *, unsigned, struct quad_buffer_head *, int); 219void *hpfs_map_4sectors(struct super_block *, unsigned, struct quad_buffer_head *, int);
@@ -247,7 +237,7 @@ void hpfs_del_pos(struct inode *, loff_t *);
247struct hpfs_dirent *hpfs_add_de(struct super_block *, struct dnode *, 237struct hpfs_dirent *hpfs_add_de(struct super_block *, struct dnode *,
248 const unsigned char *, unsigned, secno); 238 const unsigned char *, unsigned, secno);
249int hpfs_add_dirent(struct inode *, const unsigned char *, unsigned, 239int hpfs_add_dirent(struct inode *, const unsigned char *, unsigned,
250 struct hpfs_dirent *, int); 240 struct hpfs_dirent *);
251int hpfs_remove_dirent(struct inode *, dnode_secno, struct hpfs_dirent *, struct quad_buffer_head *, int); 241int hpfs_remove_dirent(struct inode *, dnode_secno, struct hpfs_dirent *, struct quad_buffer_head *, int);
252void hpfs_count_dnodes(struct super_block *, dnode_secno, int *, int *, int *); 242void hpfs_count_dnodes(struct super_block *, dnode_secno, int *, int *, int *);
253dnode_secno hpfs_de_as_down_as_possible(struct super_block *, dnode_secno dno); 243dnode_secno hpfs_de_as_down_as_possible(struct super_block *, dnode_secno dno);
@@ -303,7 +293,6 @@ int hpfs_compare_names(struct super_block *, const unsigned char *, unsigned,
303 const unsigned char *, unsigned, int); 293 const unsigned char *, unsigned, int);
304int hpfs_is_name_long(const unsigned char *, unsigned); 294int hpfs_is_name_long(const unsigned char *, unsigned);
305void hpfs_adjust_length(const unsigned char *, unsigned *); 295void hpfs_adjust_length(const unsigned char *, unsigned *);
306void hpfs_decide_conv(struct inode *, const unsigned char *, unsigned);
307 296
308/* namei.c */ 297/* namei.c */
309 298
@@ -346,21 +335,26 @@ static inline time32_t gmt_to_local(struct super_block *s, time_t t)
346/* 335/*
347 * Locking: 336 * Locking:
348 * 337 *
349 * hpfs_lock() is a leftover from the big kernel lock. 338 * hpfs_lock() locks the whole filesystem. It must be taken
350 * Right now, these functions are empty and only left 339 * on any method called by the VFS.
351 * for documentation purposes. The file system no longer
352 * works on SMP systems, so the lock is not needed
353 * any more.
354 * 340 *
355 * If someone is interested in making it work again, this 341 * We don't do any per-file locking anymore, it is hard to
356 * would be the place to start by adding a per-superblock 342 * review and HPFS is not performance-sensitive anyway.
357 * mutex and fixing all the bugs and performance issues
358 * caused by that.
359 */ 343 */
360static inline void hpfs_lock(struct super_block *s) 344static inline void hpfs_lock(struct super_block *s)
361{ 345{
346 struct hpfs_sb_info *sbi = hpfs_sb(s);
347 mutex_lock(&sbi->hpfs_mutex);
362} 348}
363 349
364static inline void hpfs_unlock(struct super_block *s) 350static inline void hpfs_unlock(struct super_block *s)
365{ 351{
352 struct hpfs_sb_info *sbi = hpfs_sb(s);
353 mutex_unlock(&sbi->hpfs_mutex);
354}
355
356static inline void hpfs_lock_assert(struct super_block *s)
357{
358 struct hpfs_sb_info *sbi = hpfs_sb(s);
359 WARN_ON(!mutex_is_locked(&sbi->hpfs_mutex));
366} 360}
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 87f1f787e767..338cd8368451 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -17,7 +17,6 @@ void hpfs_init_inode(struct inode *i)
17 i->i_uid = hpfs_sb(sb)->sb_uid; 17 i->i_uid = hpfs_sb(sb)->sb_uid;
18 i->i_gid = hpfs_sb(sb)->sb_gid; 18 i->i_gid = hpfs_sb(sb)->sb_gid;
19 i->i_mode = hpfs_sb(sb)->sb_mode; 19 i->i_mode = hpfs_sb(sb)->sb_mode;
20 hpfs_inode->i_conv = hpfs_sb(sb)->sb_conv;
21 i->i_size = -1; 20 i->i_size = -1;
22 i->i_blocks = -1; 21 i->i_blocks = -1;
23 22
@@ -116,8 +115,8 @@ void hpfs_read_inode(struct inode *i)
116 i->i_mode |= S_IFDIR; 115 i->i_mode |= S_IFDIR;
117 i->i_op = &hpfs_dir_iops; 116 i->i_op = &hpfs_dir_iops;
118 i->i_fop = &hpfs_dir_ops; 117 i->i_fop = &hpfs_dir_ops;
119 hpfs_inode->i_parent_dir = fnode->up; 118 hpfs_inode->i_parent_dir = le32_to_cpu(fnode->up);
120 hpfs_inode->i_dno = fnode->u.external[0].disk_secno; 119 hpfs_inode->i_dno = le32_to_cpu(fnode->u.external[0].disk_secno);
121 if (hpfs_sb(sb)->sb_chk >= 2) { 120 if (hpfs_sb(sb)->sb_chk >= 2) {
122 struct buffer_head *bh0; 121 struct buffer_head *bh0;
123 if (hpfs_map_fnode(sb, hpfs_inode->i_parent_dir, &bh0)) brelse(bh0); 122 if (hpfs_map_fnode(sb, hpfs_inode->i_parent_dir, &bh0)) brelse(bh0);
@@ -133,7 +132,7 @@ void hpfs_read_inode(struct inode *i)
133 i->i_op = &hpfs_file_iops; 132 i->i_op = &hpfs_file_iops;
134 i->i_fop = &hpfs_file_ops; 133 i->i_fop = &hpfs_file_ops;
135 i->i_nlink = 1; 134 i->i_nlink = 1;
136 i->i_size = fnode->file_size; 135 i->i_size = le32_to_cpu(fnode->file_size);
137 i->i_blocks = ((i->i_size + 511) >> 9) + 1; 136 i->i_blocks = ((i->i_size + 511) >> 9) + 1;
138 i->i_data.a_ops = &hpfs_aops; 137 i->i_data.a_ops = &hpfs_aops;
139 hpfs_i(i)->mmu_private = i->i_size; 138 hpfs_i(i)->mmu_private = i->i_size;
@@ -144,7 +143,7 @@ void hpfs_read_inode(struct inode *i)
144static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode) 143static void hpfs_write_inode_ea(struct inode *i, struct fnode *fnode)
145{ 144{
146 struct hpfs_inode_info *hpfs_inode = hpfs_i(i); 145 struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
147 /*if (fnode->acl_size_l || fnode->acl_size_s) { 146 /*if (le32_to_cpu(fnode->acl_size_l) || le16_to_cpu(fnode->acl_size_s)) {
148 Some unknown structures like ACL may be in fnode, 147 Some unknown structures like ACL may be in fnode,
149 we'd better not overwrite them 148 we'd better not overwrite them
150 hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino); 149 hpfs_error(i->i_sb, "fnode %08x has some unknown HPFS386 stuctures", i->i_ino);
@@ -187,9 +186,7 @@ void hpfs_write_inode(struct inode *i)
187 kfree(hpfs_inode->i_rddir_off); 186 kfree(hpfs_inode->i_rddir_off);
188 hpfs_inode->i_rddir_off = NULL; 187 hpfs_inode->i_rddir_off = NULL;
189 } 188 }
190 mutex_lock(&hpfs_inode->i_parent_mutex);
191 if (!i->i_nlink) { 189 if (!i->i_nlink) {
192 mutex_unlock(&hpfs_inode->i_parent_mutex);
193 return; 190 return;
194 } 191 }
195 parent = iget_locked(i->i_sb, hpfs_inode->i_parent_dir); 192 parent = iget_locked(i->i_sb, hpfs_inode->i_parent_dir);
@@ -200,14 +197,9 @@ void hpfs_write_inode(struct inode *i)
200 hpfs_read_inode(parent); 197 hpfs_read_inode(parent);
201 unlock_new_inode(parent); 198 unlock_new_inode(parent);
202 } 199 }
203 mutex_lock(&hpfs_inode->i_mutex);
204 hpfs_write_inode_nolock(i); 200 hpfs_write_inode_nolock(i);
205 mutex_unlock(&hpfs_inode->i_mutex);
206 iput(parent); 201 iput(parent);
207 } else {
208 mark_inode_dirty(i);
209 } 202 }
210 mutex_unlock(&hpfs_inode->i_parent_mutex);
211} 203}
212 204
213void hpfs_write_inode_nolock(struct inode *i) 205void hpfs_write_inode_nolock(struct inode *i)
@@ -226,30 +218,30 @@ void hpfs_write_inode_nolock(struct inode *i)
226 } 218 }
227 } else de = NULL; 219 } else de = NULL;
228 if (S_ISREG(i->i_mode)) { 220 if (S_ISREG(i->i_mode)) {
229 fnode->file_size = i->i_size; 221 fnode->file_size = cpu_to_le32(i->i_size);
230 if (de) de->file_size = i->i_size; 222 if (de) de->file_size = cpu_to_le32(i->i_size);
231 } else if (S_ISDIR(i->i_mode)) { 223 } else if (S_ISDIR(i->i_mode)) {
232 fnode->file_size = 0; 224 fnode->file_size = cpu_to_le32(0);
233 if (de) de->file_size = 0; 225 if (de) de->file_size = cpu_to_le32(0);
234 } 226 }
235 hpfs_write_inode_ea(i, fnode); 227 hpfs_write_inode_ea(i, fnode);
236 if (de) { 228 if (de) {
237 de->write_date = gmt_to_local(i->i_sb, i->i_mtime.tv_sec); 229 de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec));
238 de->read_date = gmt_to_local(i->i_sb, i->i_atime.tv_sec); 230 de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec));
239 de->creation_date = gmt_to_local(i->i_sb, i->i_ctime.tv_sec); 231 de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec));
240 de->read_only = !(i->i_mode & 0222); 232 de->read_only = !(i->i_mode & 0222);
241 de->ea_size = hpfs_inode->i_ea_size; 233 de->ea_size = cpu_to_le32(hpfs_inode->i_ea_size);
242 hpfs_mark_4buffers_dirty(&qbh); 234 hpfs_mark_4buffers_dirty(&qbh);
243 hpfs_brelse4(&qbh); 235 hpfs_brelse4(&qbh);
244 } 236 }
245 if (S_ISDIR(i->i_mode)) { 237 if (S_ISDIR(i->i_mode)) {
246 if ((de = map_dirent(i, hpfs_inode->i_dno, "\001\001", 2, NULL, &qbh))) { 238 if ((de = map_dirent(i, hpfs_inode->i_dno, "\001\001", 2, NULL, &qbh))) {
247 de->write_date = gmt_to_local(i->i_sb, i->i_mtime.tv_sec); 239 de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec));
248 de->read_date = gmt_to_local(i->i_sb, i->i_atime.tv_sec); 240 de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec));
249 de->creation_date = gmt_to_local(i->i_sb, i->i_ctime.tv_sec); 241 de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec));
250 de->read_only = !(i->i_mode & 0222); 242 de->read_only = !(i->i_mode & 0222);
251 de->ea_size = /*hpfs_inode->i_ea_size*/0; 243 de->ea_size = cpu_to_le32(/*hpfs_inode->i_ea_size*/0);
252 de->file_size = 0; 244 de->file_size = cpu_to_le32(0);
253 hpfs_mark_4buffers_dirty(&qbh); 245 hpfs_mark_4buffers_dirty(&qbh);
254 hpfs_brelse4(&qbh); 246 hpfs_brelse4(&qbh);
255 } else 247 } else
@@ -269,6 +261,10 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
269 hpfs_lock(inode->i_sb); 261 hpfs_lock(inode->i_sb);
270 if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root) 262 if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root)
271 goto out_unlock; 263 goto out_unlock;
264 if ((attr->ia_valid & ATTR_UID) && attr->ia_uid >= 0x10000)
265 goto out_unlock;
266 if ((attr->ia_valid & ATTR_GID) && attr->ia_gid >= 0x10000)
267 goto out_unlock;
272 if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size) 268 if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size)
273 goto out_unlock; 269 goto out_unlock;
274 270
@@ -284,7 +280,6 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
284 } 280 }
285 281
286 setattr_copy(inode, attr); 282 setattr_copy(inode, attr);
287 mark_inode_dirty(inode);
288 283
289 hpfs_write_inode(inode); 284 hpfs_write_inode(inode);
290 285
diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c
index 840d033ecee8..a790821366a7 100644
--- a/fs/hpfs/map.c
+++ b/fs/hpfs/map.c
@@ -21,7 +21,7 @@ unsigned int *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block,
21 hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id); 21 hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id);
22 return NULL; 22 return NULL;
23 } 23 }
24 sec = hpfs_sb(s)->sb_bmp_dir[bmp_block]; 24 sec = le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[bmp_block]);
25 if (!sec || sec > hpfs_sb(s)->sb_fs_size-4) { 25 if (!sec || sec > hpfs_sb(s)->sb_fs_size-4) {
26 hpfs_error(s, "invalid bitmap block pointer %08x -> %08x at %s", bmp_block, sec, id); 26 hpfs_error(s, "invalid bitmap block pointer %08x -> %08x at %s", bmp_block, sec, id);
27 return NULL; 27 return NULL;
@@ -46,18 +46,18 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps)
46 struct code_page_data *cpd; 46 struct code_page_data *cpd;
47 struct code_page_directory *cp = hpfs_map_sector(s, cps, &bh, 0); 47 struct code_page_directory *cp = hpfs_map_sector(s, cps, &bh, 0);
48 if (!cp) return NULL; 48 if (!cp) return NULL;
49 if (cp->magic != CP_DIR_MAGIC) { 49 if (le32_to_cpu(cp->magic) != CP_DIR_MAGIC) {
50 printk("HPFS: Code page directory magic doesn't match (magic = %08x)\n", cp->magic); 50 printk("HPFS: Code page directory magic doesn't match (magic = %08x)\n", le32_to_cpu(cp->magic));
51 brelse(bh); 51 brelse(bh);
52 return NULL; 52 return NULL;
53 } 53 }
54 if (!cp->n_code_pages) { 54 if (!le32_to_cpu(cp->n_code_pages)) {
55 printk("HPFS: n_code_pages == 0\n"); 55 printk("HPFS: n_code_pages == 0\n");
56 brelse(bh); 56 brelse(bh);
57 return NULL; 57 return NULL;
58 } 58 }
59 cpds = cp->array[0].code_page_data; 59 cpds = le32_to_cpu(cp->array[0].code_page_data);
60 cpi = cp->array[0].index; 60 cpi = le16_to_cpu(cp->array[0].index);
61 brelse(bh); 61 brelse(bh);
62 62
63 if (cpi >= 3) { 63 if (cpi >= 3) {
@@ -66,12 +66,12 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps)
66 } 66 }
67 67
68 if (!(cpd = hpfs_map_sector(s, cpds, &bh, 0))) return NULL; 68 if (!(cpd = hpfs_map_sector(s, cpds, &bh, 0))) return NULL;
69 if ((unsigned)cpd->offs[cpi] > 0x178) { 69 if (le16_to_cpu(cpd->offs[cpi]) > 0x178) {
70 printk("HPFS: Code page index out of sector\n"); 70 printk("HPFS: Code page index out of sector\n");
71 brelse(bh); 71 brelse(bh);
72 return NULL; 72 return NULL;
73 } 73 }
74 ptr = (unsigned char *)cpd + cpd->offs[cpi] + 6; 74 ptr = (unsigned char *)cpd + le16_to_cpu(cpd->offs[cpi]) + 6;
75 if (!(cp_table = kmalloc(256, GFP_KERNEL))) { 75 if (!(cp_table = kmalloc(256, GFP_KERNEL))) {
76 printk("HPFS: out of memory for code page table\n"); 76 printk("HPFS: out of memory for code page table\n");
77 brelse(bh); 77 brelse(bh);
@@ -125,7 +125,7 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea
125 if (hpfs_sb(s)->sb_chk) { 125 if (hpfs_sb(s)->sb_chk) {
126 struct extended_attribute *ea; 126 struct extended_attribute *ea;
127 struct extended_attribute *ea_end; 127 struct extended_attribute *ea_end;
128 if (fnode->magic != FNODE_MAGIC) { 128 if (le32_to_cpu(fnode->magic) != FNODE_MAGIC) {
129 hpfs_error(s, "bad magic on fnode %08lx", 129 hpfs_error(s, "bad magic on fnode %08lx",
130 (unsigned long)ino); 130 (unsigned long)ino);
131 goto bail; 131 goto bail;
@@ -138,7 +138,7 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea
138 (unsigned long)ino); 138 (unsigned long)ino);
139 goto bail; 139 goto bail;
140 } 140 }
141 if (fnode->btree.first_free != 141 if (le16_to_cpu(fnode->btree.first_free) !=
142 8 + fnode->btree.n_used_nodes * (fnode->btree.internal ? 8 : 12)) { 142 8 + fnode->btree.n_used_nodes * (fnode->btree.internal ? 8 : 12)) {
143 hpfs_error(s, 143 hpfs_error(s,
144 "bad first_free pointer in fnode %08lx", 144 "bad first_free pointer in fnode %08lx",
@@ -146,12 +146,12 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea
146 goto bail; 146 goto bail;
147 } 147 }
148 } 148 }
149 if (fnode->ea_size_s && ((signed int)fnode->ea_offs < 0xc4 || 149 if (le16_to_cpu(fnode->ea_size_s) && (le16_to_cpu(fnode->ea_offs) < 0xc4 ||
150 (signed int)fnode->ea_offs + fnode->acl_size_s + fnode->ea_size_s > 0x200)) { 150 le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200)) {
151 hpfs_error(s, 151 hpfs_error(s,
152 "bad EA info in fnode %08lx: ea_offs == %04x ea_size_s == %04x", 152 "bad EA info in fnode %08lx: ea_offs == %04x ea_size_s == %04x",
153 (unsigned long)ino, 153 (unsigned long)ino,
154 fnode->ea_offs, fnode->ea_size_s); 154 le16_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s));
155 goto bail; 155 goto bail;
156 } 156 }
157 ea = fnode_ea(fnode); 157 ea = fnode_ea(fnode);
@@ -178,16 +178,20 @@ struct anode *hpfs_map_anode(struct super_block *s, anode_secno ano, struct buff
178 if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, ano, 1, "anode")) return NULL; 178 if (hpfs_sb(s)->sb_chk) if (hpfs_chk_sectors(s, ano, 1, "anode")) return NULL;
179 if ((anode = hpfs_map_sector(s, ano, bhp, ANODE_RD_AHEAD))) 179 if ((anode = hpfs_map_sector(s, ano, bhp, ANODE_RD_AHEAD)))
180 if (hpfs_sb(s)->sb_chk) { 180 if (hpfs_sb(s)->sb_chk) {
181 if (anode->magic != ANODE_MAGIC || anode->self != ano) { 181 if (le32_to_cpu(anode->magic) != ANODE_MAGIC) {
182 hpfs_error(s, "bad magic on anode %08x", ano); 182 hpfs_error(s, "bad magic on anode %08x", ano);
183 goto bail; 183 goto bail;
184 } 184 }
185 if (le32_to_cpu(anode->self) != ano) {
186 hpfs_error(s, "self pointer invalid on anode %08x", ano);
187 goto bail;
188 }
185 if ((unsigned)anode->btree.n_used_nodes + (unsigned)anode->btree.n_free_nodes != 189 if ((unsigned)anode->btree.n_used_nodes + (unsigned)anode->btree.n_free_nodes !=
186 (anode->btree.internal ? 60 : 40)) { 190 (anode->btree.internal ? 60 : 40)) {
187 hpfs_error(s, "bad number of nodes in anode %08x", ano); 191 hpfs_error(s, "bad number of nodes in anode %08x", ano);
188 goto bail; 192 goto bail;
189 } 193 }
190 if (anode->btree.first_free != 194 if (le16_to_cpu(anode->btree.first_free) !=
191 8 + anode->btree.n_used_nodes * (anode->btree.internal ? 8 : 12)) { 195 8 + anode->btree.n_used_nodes * (anode->btree.internal ? 8 : 12)) {
192 hpfs_error(s, "bad first_free pointer in anode %08x", ano); 196 hpfs_error(s, "bad first_free pointer in anode %08x", ano);
193 goto bail; 197 goto bail;
@@ -219,26 +223,26 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno,
219 unsigned p, pp = 0; 223 unsigned p, pp = 0;
220 unsigned char *d = (unsigned char *)dnode; 224 unsigned char *d = (unsigned char *)dnode;
221 int b = 0; 225 int b = 0;
222 if (dnode->magic != DNODE_MAGIC) { 226 if (le32_to_cpu(dnode->magic) != DNODE_MAGIC) {
223 hpfs_error(s, "bad magic on dnode %08x", secno); 227 hpfs_error(s, "bad magic on dnode %08x", secno);
224 goto bail; 228 goto bail;
225 } 229 }
226 if (dnode->self != secno) 230 if (le32_to_cpu(dnode->self) != secno)
227 hpfs_error(s, "bad self pointer on dnode %08x self = %08x", secno, dnode->self); 231 hpfs_error(s, "bad self pointer on dnode %08x self = %08x", secno, le32_to_cpu(dnode->self));
228 /* Check dirents - bad dirents would cause infinite 232 /* Check dirents - bad dirents would cause infinite
229 loops or shooting to memory */ 233 loops or shooting to memory */
230 if (dnode->first_free > 2048/* || dnode->first_free < 84*/) { 234 if (le32_to_cpu(dnode->first_free) > 2048) {
231 hpfs_error(s, "dnode %08x has first_free == %08x", secno, dnode->first_free); 235 hpfs_error(s, "dnode %08x has first_free == %08x", secno, le32_to_cpu(dnode->first_free));
232 goto bail; 236 goto bail;
233 } 237 }
234 for (p = 20; p < dnode->first_free; p += d[p] + (d[p+1] << 8)) { 238 for (p = 20; p < le32_to_cpu(dnode->first_free); p += d[p] + (d[p+1] << 8)) {
235 struct hpfs_dirent *de = (struct hpfs_dirent *)((char *)dnode + p); 239 struct hpfs_dirent *de = (struct hpfs_dirent *)((char *)dnode + p);
236 if (de->length > 292 || (de->length < 32) || (de->length & 3) || p + de->length > 2048) { 240 if (le16_to_cpu(de->length) > 292 || (le16_to_cpu(de->length) < 32) || (le16_to_cpu(de->length) & 3) || p + le16_to_cpu(de->length) > 2048) {
237 hpfs_error(s, "bad dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp); 241 hpfs_error(s, "bad dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp);
238 goto bail; 242 goto bail;
239 } 243 }
240 if (((31 + de->namelen + de->down*4 + 3) & ~3) != de->length) { 244 if (((31 + de->namelen + de->down*4 + 3) & ~3) != le16_to_cpu(de->length)) {
241 if (((31 + de->namelen + de->down*4 + 3) & ~3) < de->length && s->s_flags & MS_RDONLY) goto ok; 245 if (((31 + de->namelen + de->down*4 + 3) & ~3) < le16_to_cpu(de->length) && s->s_flags & MS_RDONLY) goto ok;
242 hpfs_error(s, "namelen does not match dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp); 246 hpfs_error(s, "namelen does not match dirent size in dnode %08x, dirent %03x, last %03x", secno, p, pp);
243 goto bail; 247 goto bail;
244 } 248 }
@@ -251,7 +255,7 @@ struct dnode *hpfs_map_dnode(struct super_block *s, unsigned secno,
251 pp = p; 255 pp = p;
252 256
253 } 257 }
254 if (p != dnode->first_free) { 258 if (p != le32_to_cpu(dnode->first_free)) {
255 hpfs_error(s, "size on last dirent does not match first_free; dnode %08x", secno); 259 hpfs_error(s, "size on last dirent does not match first_free; dnode %08x", secno);
256 goto bail; 260 goto bail;
257 } 261 }
@@ -277,7 +281,7 @@ dnode_secno hpfs_fnode_dno(struct super_block *s, ino_t ino)
277 if (!fnode) 281 if (!fnode)
278 return 0; 282 return 0;
279 283
280 dno = fnode->u.external[0].disk_secno; 284 dno = le32_to_cpu(fnode->u.external[0].disk_secno);
281 brelse(bh); 285 brelse(bh);
282 return dno; 286 return dno;
283} 287}
diff --git a/fs/hpfs/name.c b/fs/hpfs/name.c
index f24736d7a439..9acdf338def0 100644
--- a/fs/hpfs/name.c
+++ b/fs/hpfs/name.c
@@ -8,39 +8,6 @@
8 8
9#include "hpfs_fn.h" 9#include "hpfs_fn.h"
10 10
11static const char *text_postfix[]={
12".ASM", ".BAS", ".BAT", ".C", ".CC", ".CFG", ".CMD", ".CON", ".CPP", ".DEF",
13".DOC", ".DPR", ".ERX", ".H", ".HPP", ".HTM", ".HTML", ".JAVA", ".LOG", ".PAS",
14".RC", ".TEX", ".TXT", ".Y", ""};
15
16static const char *text_prefix[]={
17"AUTOEXEC.", "CHANGES", "COPYING", "CONFIG.", "CREDITS", "FAQ", "FILE_ID.DIZ",
18"MAKEFILE", "READ.ME", "README", "TERMCAP", ""};
19
20void hpfs_decide_conv(struct inode *inode, const unsigned char *name, unsigned len)
21{
22 struct hpfs_inode_info *hpfs_inode = hpfs_i(inode);
23 int i;
24 if (hpfs_inode->i_conv != CONV_AUTO) return;
25 for (i = 0; *text_postfix[i]; i++) {
26 int l = strlen(text_postfix[i]);
27 if (l <= len)
28 if (!hpfs_compare_names(inode->i_sb, text_postfix[i], l, name + len - l, l, 0))
29 goto text;
30 }
31 for (i = 0; *text_prefix[i]; i++) {
32 int l = strlen(text_prefix[i]);
33 if (l <= len)
34 if (!hpfs_compare_names(inode->i_sb, text_prefix[i], l, name, l, 0))
35 goto text;
36 }
37 hpfs_inode->i_conv = CONV_BINARY;
38 return;
39 text:
40 hpfs_inode->i_conv = CONV_TEXT;
41 return;
42}
43
44static inline int not_allowed_char(unsigned char c) 11static inline int not_allowed_char(unsigned char c)
45{ 12{
46 return c<' ' || c=='"' || c=='*' || c=='/' || c==':' || c=='<' || 13 return c<' ' || c=='"' || c=='*' || c=='/' || c==':' || c=='<' ||
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index d3db95f51a4e..ff0ce21c0867 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -29,7 +29,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
29 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh); 29 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh);
30 if (!fnode) 30 if (!fnode)
31 goto bail; 31 goto bail;
32 dnode = hpfs_alloc_dnode(dir->i_sb, fno, &dno, &qbh0, 1); 32 dnode = hpfs_alloc_dnode(dir->i_sb, fno, &dno, &qbh0);
33 if (!dnode) 33 if (!dnode)
34 goto bail1; 34 goto bail1;
35 memset(&dee, 0, sizeof dee); 35 memset(&dee, 0, sizeof dee);
@@ -37,8 +37,8 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
37 if (!(mode & 0222)) dee.read_only = 1; 37 if (!(mode & 0222)) dee.read_only = 1;
38 /*dee.archive = 0;*/ 38 /*dee.archive = 0;*/
39 dee.hidden = name[0] == '.'; 39 dee.hidden = name[0] == '.';
40 dee.fnode = fno; 40 dee.fnode = cpu_to_le32(fno);
41 dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds()); 41 dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
42 result = new_inode(dir->i_sb); 42 result = new_inode(dir->i_sb);
43 if (!result) 43 if (!result)
44 goto bail2; 44 goto bail2;
@@ -46,7 +46,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
46 result->i_ino = fno; 46 result->i_ino = fno;
47 hpfs_i(result)->i_parent_dir = dir->i_ino; 47 hpfs_i(result)->i_parent_dir = dir->i_ino;
48 hpfs_i(result)->i_dno = dno; 48 hpfs_i(result)->i_dno = dno;
49 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date); 49 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
50 result->i_ctime.tv_nsec = 0; 50 result->i_ctime.tv_nsec = 0;
51 result->i_mtime.tv_nsec = 0; 51 result->i_mtime.tv_nsec = 0;
52 result->i_atime.tv_nsec = 0; 52 result->i_atime.tv_nsec = 0;
@@ -60,8 +60,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
60 if (dee.read_only) 60 if (dee.read_only)
61 result->i_mode &= ~0222; 61 result->i_mode &= ~0222;
62 62
63 mutex_lock(&hpfs_i(dir)->i_mutex); 63 r = hpfs_add_dirent(dir, name, len, &dee);
64 r = hpfs_add_dirent(dir, name, len, &dee, 0);
65 if (r == 1) 64 if (r == 1)
66 goto bail3; 65 goto bail3;
67 if (r == -1) { 66 if (r == -1) {
@@ -70,21 +69,21 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
70 } 69 }
71 fnode->len = len; 70 fnode->len = len;
72 memcpy(fnode->name, name, len > 15 ? 15 : len); 71 memcpy(fnode->name, name, len > 15 ? 15 : len);
73 fnode->up = dir->i_ino; 72 fnode->up = cpu_to_le32(dir->i_ino);
74 fnode->dirflag = 1; 73 fnode->dirflag = 1;
75 fnode->btree.n_free_nodes = 7; 74 fnode->btree.n_free_nodes = 7;
76 fnode->btree.n_used_nodes = 1; 75 fnode->btree.n_used_nodes = 1;
77 fnode->btree.first_free = 0x14; 76 fnode->btree.first_free = cpu_to_le16(0x14);
78 fnode->u.external[0].disk_secno = dno; 77 fnode->u.external[0].disk_secno = cpu_to_le32(dno);
79 fnode->u.external[0].file_secno = -1; 78 fnode->u.external[0].file_secno = cpu_to_le32(-1);
80 dnode->root_dnode = 1; 79 dnode->root_dnode = 1;
81 dnode->up = fno; 80 dnode->up = cpu_to_le32(fno);
82 de = hpfs_add_de(dir->i_sb, dnode, "\001\001", 2, 0); 81 de = hpfs_add_de(dir->i_sb, dnode, "\001\001", 2, 0);
83 de->creation_date = de->write_date = de->read_date = gmt_to_local(dir->i_sb, get_seconds()); 82 de->creation_date = de->write_date = de->read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
84 if (!(mode & 0222)) de->read_only = 1; 83 if (!(mode & 0222)) de->read_only = 1;
85 de->first = de->directory = 1; 84 de->first = de->directory = 1;
86 /*de->hidden = de->system = 0;*/ 85 /*de->hidden = de->system = 0;*/
87 de->fnode = fno; 86 de->fnode = cpu_to_le32(fno);
88 mark_buffer_dirty(bh); 87 mark_buffer_dirty(bh);
89 brelse(bh); 88 brelse(bh);
90 hpfs_mark_4buffers_dirty(&qbh0); 89 hpfs_mark_4buffers_dirty(&qbh0);
@@ -101,11 +100,9 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
101 hpfs_write_inode_nolock(result); 100 hpfs_write_inode_nolock(result);
102 } 101 }
103 d_instantiate(dentry, result); 102 d_instantiate(dentry, result);
104 mutex_unlock(&hpfs_i(dir)->i_mutex);
105 hpfs_unlock(dir->i_sb); 103 hpfs_unlock(dir->i_sb);
106 return 0; 104 return 0;
107bail3: 105bail3:
108 mutex_unlock(&hpfs_i(dir)->i_mutex);
109 iput(result); 106 iput(result);
110bail2: 107bail2:
111 hpfs_brelse4(&qbh0); 108 hpfs_brelse4(&qbh0);
@@ -140,8 +137,8 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
140 if (!(mode & 0222)) dee.read_only = 1; 137 if (!(mode & 0222)) dee.read_only = 1;
141 dee.archive = 1; 138 dee.archive = 1;
142 dee.hidden = name[0] == '.'; 139 dee.hidden = name[0] == '.';
143 dee.fnode = fno; 140 dee.fnode = cpu_to_le32(fno);
144 dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds()); 141 dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
145 142
146 result = new_inode(dir->i_sb); 143 result = new_inode(dir->i_sb);
147 if (!result) 144 if (!result)
@@ -154,9 +151,8 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
154 result->i_op = &hpfs_file_iops; 151 result->i_op = &hpfs_file_iops;
155 result->i_fop = &hpfs_file_ops; 152 result->i_fop = &hpfs_file_ops;
156 result->i_nlink = 1; 153 result->i_nlink = 1;
157 hpfs_decide_conv(result, name, len);
158 hpfs_i(result)->i_parent_dir = dir->i_ino; 154 hpfs_i(result)->i_parent_dir = dir->i_ino;
159 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date); 155 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
160 result->i_ctime.tv_nsec = 0; 156 result->i_ctime.tv_nsec = 0;
161 result->i_mtime.tv_nsec = 0; 157 result->i_mtime.tv_nsec = 0;
162 result->i_atime.tv_nsec = 0; 158 result->i_atime.tv_nsec = 0;
@@ -168,8 +164,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
168 result->i_data.a_ops = &hpfs_aops; 164 result->i_data.a_ops = &hpfs_aops;
169 hpfs_i(result)->mmu_private = 0; 165 hpfs_i(result)->mmu_private = 0;
170 166
171 mutex_lock(&hpfs_i(dir)->i_mutex); 167 r = hpfs_add_dirent(dir, name, len, &dee);
172 r = hpfs_add_dirent(dir, name, len, &dee, 0);
173 if (r == 1) 168 if (r == 1)
174 goto bail2; 169 goto bail2;
175 if (r == -1) { 170 if (r == -1) {
@@ -178,7 +173,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
178 } 173 }
179 fnode->len = len; 174 fnode->len = len;
180 memcpy(fnode->name, name, len > 15 ? 15 : len); 175 memcpy(fnode->name, name, len > 15 ? 15 : len);
181 fnode->up = dir->i_ino; 176 fnode->up = cpu_to_le32(dir->i_ino);
182 mark_buffer_dirty(bh); 177 mark_buffer_dirty(bh);
183 brelse(bh); 178 brelse(bh);
184 179
@@ -193,12 +188,10 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
193 hpfs_write_inode_nolock(result); 188 hpfs_write_inode_nolock(result);
194 } 189 }
195 d_instantiate(dentry, result); 190 d_instantiate(dentry, result);
196 mutex_unlock(&hpfs_i(dir)->i_mutex);
197 hpfs_unlock(dir->i_sb); 191 hpfs_unlock(dir->i_sb);
198 return 0; 192 return 0;
199 193
200bail2: 194bail2:
201 mutex_unlock(&hpfs_i(dir)->i_mutex);
202 iput(result); 195 iput(result);
203bail1: 196bail1:
204 brelse(bh); 197 brelse(bh);
@@ -232,8 +225,8 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
232 if (!(mode & 0222)) dee.read_only = 1; 225 if (!(mode & 0222)) dee.read_only = 1;
233 dee.archive = 1; 226 dee.archive = 1;
234 dee.hidden = name[0] == '.'; 227 dee.hidden = name[0] == '.';
235 dee.fnode = fno; 228 dee.fnode = cpu_to_le32(fno);
236 dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds()); 229 dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
237 230
238 result = new_inode(dir->i_sb); 231 result = new_inode(dir->i_sb);
239 if (!result) 232 if (!result)
@@ -242,7 +235,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
242 hpfs_init_inode(result); 235 hpfs_init_inode(result);
243 result->i_ino = fno; 236 result->i_ino = fno;
244 hpfs_i(result)->i_parent_dir = dir->i_ino; 237 hpfs_i(result)->i_parent_dir = dir->i_ino;
245 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date); 238 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
246 result->i_ctime.tv_nsec = 0; 239 result->i_ctime.tv_nsec = 0;
247 result->i_mtime.tv_nsec = 0; 240 result->i_mtime.tv_nsec = 0;
248 result->i_atime.tv_nsec = 0; 241 result->i_atime.tv_nsec = 0;
@@ -254,8 +247,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
254 result->i_blocks = 1; 247 result->i_blocks = 1;
255 init_special_inode(result, mode, rdev); 248 init_special_inode(result, mode, rdev);
256 249
257 mutex_lock(&hpfs_i(dir)->i_mutex); 250 r = hpfs_add_dirent(dir, name, len, &dee);
258 r = hpfs_add_dirent(dir, name, len, &dee, 0);
259 if (r == 1) 251 if (r == 1)
260 goto bail2; 252 goto bail2;
261 if (r == -1) { 253 if (r == -1) {
@@ -264,19 +256,17 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
264 } 256 }
265 fnode->len = len; 257 fnode->len = len;
266 memcpy(fnode->name, name, len > 15 ? 15 : len); 258 memcpy(fnode->name, name, len > 15 ? 15 : len);
267 fnode->up = dir->i_ino; 259 fnode->up = cpu_to_le32(dir->i_ino);
268 mark_buffer_dirty(bh); 260 mark_buffer_dirty(bh);
269 261
270 insert_inode_hash(result); 262 insert_inode_hash(result);
271 263
272 hpfs_write_inode_nolock(result); 264 hpfs_write_inode_nolock(result);
273 d_instantiate(dentry, result); 265 d_instantiate(dentry, result);
274 mutex_unlock(&hpfs_i(dir)->i_mutex);
275 brelse(bh); 266 brelse(bh);
276 hpfs_unlock(dir->i_sb); 267 hpfs_unlock(dir->i_sb);
277 return 0; 268 return 0;
278bail2: 269bail2:
279 mutex_unlock(&hpfs_i(dir)->i_mutex);
280 iput(result); 270 iput(result);
281bail1: 271bail1:
282 brelse(bh); 272 brelse(bh);
@@ -310,8 +300,8 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
310 memset(&dee, 0, sizeof dee); 300 memset(&dee, 0, sizeof dee);
311 dee.archive = 1; 301 dee.archive = 1;
312 dee.hidden = name[0] == '.'; 302 dee.hidden = name[0] == '.';
313 dee.fnode = fno; 303 dee.fnode = cpu_to_le32(fno);
314 dee.creation_date = dee.write_date = dee.read_date = gmt_to_local(dir->i_sb, get_seconds()); 304 dee.creation_date = dee.write_date = dee.read_date = cpu_to_le32(gmt_to_local(dir->i_sb, get_seconds()));
315 305
316 result = new_inode(dir->i_sb); 306 result = new_inode(dir->i_sb);
317 if (!result) 307 if (!result)
@@ -319,7 +309,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
319 result->i_ino = fno; 309 result->i_ino = fno;
320 hpfs_init_inode(result); 310 hpfs_init_inode(result);
321 hpfs_i(result)->i_parent_dir = dir->i_ino; 311 hpfs_i(result)->i_parent_dir = dir->i_ino;
322 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, dee.creation_date); 312 result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
323 result->i_ctime.tv_nsec = 0; 313 result->i_ctime.tv_nsec = 0;
324 result->i_mtime.tv_nsec = 0; 314 result->i_mtime.tv_nsec = 0;
325 result->i_atime.tv_nsec = 0; 315 result->i_atime.tv_nsec = 0;
@@ -333,8 +323,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
333 result->i_op = &page_symlink_inode_operations; 323 result->i_op = &page_symlink_inode_operations;
334 result->i_data.a_ops = &hpfs_symlink_aops; 324 result->i_data.a_ops = &hpfs_symlink_aops;
335 325
336 mutex_lock(&hpfs_i(dir)->i_mutex); 326 r = hpfs_add_dirent(dir, name, len, &dee);
337 r = hpfs_add_dirent(dir, name, len, &dee, 0);
338 if (r == 1) 327 if (r == 1)
339 goto bail2; 328 goto bail2;
340 if (r == -1) { 329 if (r == -1) {
@@ -343,7 +332,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
343 } 332 }
344 fnode->len = len; 333 fnode->len = len;
345 memcpy(fnode->name, name, len > 15 ? 15 : len); 334 memcpy(fnode->name, name, len > 15 ? 15 : len);
346 fnode->up = dir->i_ino; 335 fnode->up = cpu_to_le32(dir->i_ino);
347 hpfs_set_ea(result, fnode, "SYMLINK", symlink, strlen(symlink)); 336 hpfs_set_ea(result, fnode, "SYMLINK", symlink, strlen(symlink));
348 mark_buffer_dirty(bh); 337 mark_buffer_dirty(bh);
349 brelse(bh); 338 brelse(bh);
@@ -352,11 +341,9 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
352 341
353 hpfs_write_inode_nolock(result); 342 hpfs_write_inode_nolock(result);
354 d_instantiate(dentry, result); 343 d_instantiate(dentry, result);
355 mutex_unlock(&hpfs_i(dir)->i_mutex);
356 hpfs_unlock(dir->i_sb); 344 hpfs_unlock(dir->i_sb);
357 return 0; 345 return 0;
358bail2: 346bail2:
359 mutex_unlock(&hpfs_i(dir)->i_mutex);
360 iput(result); 347 iput(result);
361bail1: 348bail1:
362 brelse(bh); 349 brelse(bh);
@@ -374,7 +361,6 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
374 struct hpfs_dirent *de; 361 struct hpfs_dirent *de;
375 struct inode *inode = dentry->d_inode; 362 struct inode *inode = dentry->d_inode;
376 dnode_secno dno; 363 dnode_secno dno;
377 fnode_secno fno;
378 int r; 364 int r;
379 int rep = 0; 365 int rep = 0;
380 int err; 366 int err;
@@ -382,8 +368,6 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
382 hpfs_lock(dir->i_sb); 368 hpfs_lock(dir->i_sb);
383 hpfs_adjust_length(name, &len); 369 hpfs_adjust_length(name, &len);
384again: 370again:
385 mutex_lock(&hpfs_i(inode)->i_parent_mutex);
386 mutex_lock(&hpfs_i(dir)->i_mutex);
387 err = -ENOENT; 371 err = -ENOENT;
388 de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh); 372 de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
389 if (!de) 373 if (!de)
@@ -397,7 +381,6 @@ again:
397 if (de->directory) 381 if (de->directory)
398 goto out1; 382 goto out1;
399 383
400 fno = de->fnode;
401 r = hpfs_remove_dirent(dir, dno, de, &qbh, 1); 384 r = hpfs_remove_dirent(dir, dno, de, &qbh, 1);
402 switch (r) { 385 switch (r) {
403 case 1: 386 case 1:
@@ -410,8 +393,6 @@ again:
410 if (rep++) 393 if (rep++)
411 break; 394 break;
412 395
413 mutex_unlock(&hpfs_i(dir)->i_mutex);
414 mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
415 dentry_unhash(dentry); 396 dentry_unhash(dentry);
416 if (!d_unhashed(dentry)) { 397 if (!d_unhashed(dentry)) {
417 hpfs_unlock(dir->i_sb); 398 hpfs_unlock(dir->i_sb);
@@ -442,8 +423,6 @@ again:
442out1: 423out1:
443 hpfs_brelse4(&qbh); 424 hpfs_brelse4(&qbh);
444out: 425out:
445 mutex_unlock(&hpfs_i(dir)->i_mutex);
446 mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
447 hpfs_unlock(dir->i_sb); 426 hpfs_unlock(dir->i_sb);
448 return err; 427 return err;
449} 428}
@@ -456,7 +435,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
456 struct hpfs_dirent *de; 435 struct hpfs_dirent *de;
457 struct inode *inode = dentry->d_inode; 436 struct inode *inode = dentry->d_inode;
458 dnode_secno dno; 437 dnode_secno dno;
459 fnode_secno fno;
460 int n_items = 0; 438 int n_items = 0;
461 int err; 439 int err;
462 int r; 440 int r;
@@ -465,8 +443,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
465 443
466 hpfs_adjust_length(name, &len); 444 hpfs_adjust_length(name, &len);
467 hpfs_lock(dir->i_sb); 445 hpfs_lock(dir->i_sb);
468 mutex_lock(&hpfs_i(inode)->i_parent_mutex);
469 mutex_lock(&hpfs_i(dir)->i_mutex);
470 err = -ENOENT; 446 err = -ENOENT;
471 de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh); 447 de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
472 if (!de) 448 if (!de)
@@ -485,7 +461,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
485 if (n_items) 461 if (n_items)
486 goto out1; 462 goto out1;
487 463
488 fno = de->fnode;
489 r = hpfs_remove_dirent(dir, dno, de, &qbh, 1); 464 r = hpfs_remove_dirent(dir, dno, de, &qbh, 1);
490 switch (r) { 465 switch (r) {
491 case 1: 466 case 1:
@@ -504,8 +479,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
504out1: 479out1:
505 hpfs_brelse4(&qbh); 480 hpfs_brelse4(&qbh);
506out: 481out:
507 mutex_unlock(&hpfs_i(dir)->i_mutex);
508 mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
509 hpfs_unlock(dir->i_sb); 482 hpfs_unlock(dir->i_sb);
510 return err; 483 return err;
511} 484}
@@ -571,12 +544,6 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
571 544
572 hpfs_lock(i->i_sb); 545 hpfs_lock(i->i_sb);
573 /* order doesn't matter, due to VFS exclusion */ 546 /* order doesn't matter, due to VFS exclusion */
574 mutex_lock(&hpfs_i(i)->i_parent_mutex);
575 if (new_inode)
576 mutex_lock(&hpfs_i(new_inode)->i_parent_mutex);
577 mutex_lock(&hpfs_i(old_dir)->i_mutex);
578 if (new_dir != old_dir)
579 mutex_lock(&hpfs_i(new_dir)->i_mutex);
580 547
581 /* Erm? Moving over the empty non-busy directory is perfectly legal */ 548 /* Erm? Moving over the empty non-busy directory is perfectly legal */
582 if (new_inode && S_ISDIR(new_inode->i_mode)) { 549 if (new_inode && S_ISDIR(new_inode->i_mode)) {
@@ -613,9 +580,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
613 580
614 if (new_dir == old_dir) hpfs_brelse4(&qbh); 581 if (new_dir == old_dir) hpfs_brelse4(&qbh);
615 582
616 hpfs_lock_creation(i->i_sb); 583 if ((r = hpfs_add_dirent(new_dir, new_name, new_len, &de))) {
617 if ((r = hpfs_add_dirent(new_dir, new_name, new_len, &de, 1))) {
618 hpfs_unlock_creation(i->i_sb);
619 if (r == -1) hpfs_error(new_dir->i_sb, "hpfs_rename: dirent already exists!"); 584 if (r == -1) hpfs_error(new_dir->i_sb, "hpfs_rename: dirent already exists!");
620 err = r == 1 ? -ENOSPC : -EFSERROR; 585 err = r == 1 ? -ENOSPC : -EFSERROR;
621 if (new_dir != old_dir) hpfs_brelse4(&qbh); 586 if (new_dir != old_dir) hpfs_brelse4(&qbh);
@@ -624,20 +589,17 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
624 589
625 if (new_dir == old_dir) 590 if (new_dir == old_dir)
626 if (!(dep = map_dirent(old_dir, hpfs_i(old_dir)->i_dno, old_name, old_len, &dno, &qbh))) { 591 if (!(dep = map_dirent(old_dir, hpfs_i(old_dir)->i_dno, old_name, old_len, &dno, &qbh))) {
627 hpfs_unlock_creation(i->i_sb);
628 hpfs_error(i->i_sb, "lookup succeeded but map dirent failed at #2"); 592 hpfs_error(i->i_sb, "lookup succeeded but map dirent failed at #2");
629 err = -ENOENT; 593 err = -ENOENT;
630 goto end1; 594 goto end1;
631 } 595 }
632 596
633 if ((r = hpfs_remove_dirent(old_dir, dno, dep, &qbh, 0))) { 597 if ((r = hpfs_remove_dirent(old_dir, dno, dep, &qbh, 0))) {
634 hpfs_unlock_creation(i->i_sb);
635 hpfs_error(i->i_sb, "hpfs_rename: could not remove dirent"); 598 hpfs_error(i->i_sb, "hpfs_rename: could not remove dirent");
636 err = r == 2 ? -ENOSPC : -EFSERROR; 599 err = r == 2 ? -ENOSPC : -EFSERROR;
637 goto end1; 600 goto end1;
638 } 601 }
639 hpfs_unlock_creation(i->i_sb); 602
640
641 end: 603 end:
642 hpfs_i(i)->i_parent_dir = new_dir->i_ino; 604 hpfs_i(i)->i_parent_dir = new_dir->i_ino;
643 if (S_ISDIR(i->i_mode)) { 605 if (S_ISDIR(i->i_mode)) {
@@ -645,22 +607,14 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
645 drop_nlink(old_dir); 607 drop_nlink(old_dir);
646 } 608 }
647 if ((fnode = hpfs_map_fnode(i->i_sb, i->i_ino, &bh))) { 609 if ((fnode = hpfs_map_fnode(i->i_sb, i->i_ino, &bh))) {
648 fnode->up = new_dir->i_ino; 610 fnode->up = cpu_to_le32(new_dir->i_ino);
649 fnode->len = new_len; 611 fnode->len = new_len;
650 memcpy(fnode->name, new_name, new_len>15?15:new_len); 612 memcpy(fnode->name, new_name, new_len>15?15:new_len);
651 if (new_len < 15) memset(&fnode->name[new_len], 0, 15 - new_len); 613 if (new_len < 15) memset(&fnode->name[new_len], 0, 15 - new_len);
652 mark_buffer_dirty(bh); 614 mark_buffer_dirty(bh);
653 brelse(bh); 615 brelse(bh);
654 } 616 }
655 hpfs_i(i)->i_conv = hpfs_sb(i->i_sb)->sb_conv;
656 hpfs_decide_conv(i, new_name, new_len);
657end1: 617end1:
658 if (old_dir != new_dir)
659 mutex_unlock(&hpfs_i(new_dir)->i_mutex);
660 mutex_unlock(&hpfs_i(old_dir)->i_mutex);
661 mutex_unlock(&hpfs_i(i)->i_parent_mutex);
662 if (new_inode)
663 mutex_unlock(&hpfs_i(new_inode)->i_parent_mutex);
664 hpfs_unlock(i->i_sb); 618 hpfs_unlock(i->i_sb);
665 return err; 619 return err;
666} 620}
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index c89b40808587..98580a3b5005 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -18,15 +18,16 @@
18 18
19/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */ 19/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
20 20
21static void mark_dirty(struct super_block *s) 21static void mark_dirty(struct super_block *s, int remount)
22{ 22{
23 if (hpfs_sb(s)->sb_chkdsk && !(s->s_flags & MS_RDONLY)) { 23 if (hpfs_sb(s)->sb_chkdsk && (remount || !(s->s_flags & MS_RDONLY))) {
24 struct buffer_head *bh; 24 struct buffer_head *bh;
25 struct hpfs_spare_block *sb; 25 struct hpfs_spare_block *sb;
26 if ((sb = hpfs_map_sector(s, 17, &bh, 0))) { 26 if ((sb = hpfs_map_sector(s, 17, &bh, 0))) {
27 sb->dirty = 1; 27 sb->dirty = 1;
28 sb->old_wrote = 0; 28 sb->old_wrote = 0;
29 mark_buffer_dirty(bh); 29 mark_buffer_dirty(bh);
30 sync_dirty_buffer(bh);
30 brelse(bh); 31 brelse(bh);
31 } 32 }
32 } 33 }
@@ -40,10 +41,12 @@ static void unmark_dirty(struct super_block *s)
40 struct buffer_head *bh; 41 struct buffer_head *bh;
41 struct hpfs_spare_block *sb; 42 struct hpfs_spare_block *sb;
42 if (s->s_flags & MS_RDONLY) return; 43 if (s->s_flags & MS_RDONLY) return;
44 sync_blockdev(s->s_bdev);
43 if ((sb = hpfs_map_sector(s, 17, &bh, 0))) { 45 if ((sb = hpfs_map_sector(s, 17, &bh, 0))) {
44 sb->dirty = hpfs_sb(s)->sb_chkdsk > 1 - hpfs_sb(s)->sb_was_error; 46 sb->dirty = hpfs_sb(s)->sb_chkdsk > 1 - hpfs_sb(s)->sb_was_error;
45 sb->old_wrote = hpfs_sb(s)->sb_chkdsk >= 2 && !hpfs_sb(s)->sb_was_error; 47 sb->old_wrote = hpfs_sb(s)->sb_chkdsk >= 2 && !hpfs_sb(s)->sb_was_error;
46 mark_buffer_dirty(bh); 48 mark_buffer_dirty(bh);
49 sync_dirty_buffer(bh);
47 brelse(bh); 50 brelse(bh);
48 } 51 }
49} 52}
@@ -63,13 +66,13 @@ void hpfs_error(struct super_block *s, const char *fmt, ...)
63 if (!hpfs_sb(s)->sb_was_error) { 66 if (!hpfs_sb(s)->sb_was_error) {
64 if (hpfs_sb(s)->sb_err == 2) { 67 if (hpfs_sb(s)->sb_err == 2) {
65 printk("; crashing the system because you wanted it\n"); 68 printk("; crashing the system because you wanted it\n");
66 mark_dirty(s); 69 mark_dirty(s, 0);
67 panic("HPFS panic"); 70 panic("HPFS panic");
68 } else if (hpfs_sb(s)->sb_err == 1) { 71 } else if (hpfs_sb(s)->sb_err == 1) {
69 if (s->s_flags & MS_RDONLY) printk("; already mounted read-only\n"); 72 if (s->s_flags & MS_RDONLY) printk("; already mounted read-only\n");
70 else { 73 else {
71 printk("; remounting read-only\n"); 74 printk("; remounting read-only\n");
72 mark_dirty(s); 75 mark_dirty(s, 0);
73 s->s_flags |= MS_RDONLY; 76 s->s_flags |= MS_RDONLY;
74 } 77 }
75 } else if (s->s_flags & MS_RDONLY) printk("; going on - but anything won't be destroyed because it's read-only\n"); 78 } else if (s->s_flags & MS_RDONLY) printk("; going on - but anything won't be destroyed because it's read-only\n");
@@ -102,9 +105,12 @@ static void hpfs_put_super(struct super_block *s)
102{ 105{
103 struct hpfs_sb_info *sbi = hpfs_sb(s); 106 struct hpfs_sb_info *sbi = hpfs_sb(s);
104 107
108 hpfs_lock(s);
109 unmark_dirty(s);
110 hpfs_unlock(s);
111
105 kfree(sbi->sb_cp_table); 112 kfree(sbi->sb_cp_table);
106 kfree(sbi->sb_bmp_dir); 113 kfree(sbi->sb_bmp_dir);
107 unmark_dirty(s);
108 s->s_fs_info = NULL; 114 s->s_fs_info = NULL;
109 kfree(sbi); 115 kfree(sbi);
110} 116}
@@ -129,7 +135,7 @@ static unsigned count_bitmaps(struct super_block *s)
129 n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; 135 n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14;
130 count = 0; 136 count = 0;
131 for (n = 0; n < n_bands; n++) 137 for (n = 0; n < n_bands; n++)
132 count += hpfs_count_one_bitmap(s, hpfs_sb(s)->sb_bmp_dir[n]); 138 count += hpfs_count_one_bitmap(s, le32_to_cpu(hpfs_sb(s)->sb_bmp_dir[n]));
133 return count; 139 return count;
134} 140}
135 141
@@ -188,8 +194,6 @@ static void init_once(void *foo)
188{ 194{
189 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; 195 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
190 196
191 mutex_init(&ei->i_mutex);
192 mutex_init(&ei->i_parent_mutex);
193 inode_init_once(&ei->vfs_inode); 197 inode_init_once(&ei->vfs_inode);
194} 198}
195 199
@@ -218,7 +222,6 @@ static void destroy_inodecache(void)
218 222
219enum { 223enum {
220 Opt_help, Opt_uid, Opt_gid, Opt_umask, Opt_case_lower, Opt_case_asis, 224 Opt_help, Opt_uid, Opt_gid, Opt_umask, Opt_case_lower, Opt_case_asis,
221 Opt_conv_binary, Opt_conv_text, Opt_conv_auto,
222 Opt_check_none, Opt_check_normal, Opt_check_strict, 225 Opt_check_none, Opt_check_normal, Opt_check_strict,
223 Opt_err_cont, Opt_err_ro, Opt_err_panic, 226 Opt_err_cont, Opt_err_ro, Opt_err_panic,
224 Opt_eas_no, Opt_eas_ro, Opt_eas_rw, 227 Opt_eas_no, Opt_eas_ro, Opt_eas_rw,
@@ -233,9 +236,6 @@ static const match_table_t tokens = {
233 {Opt_umask, "umask=%o"}, 236 {Opt_umask, "umask=%o"},
234 {Opt_case_lower, "case=lower"}, 237 {Opt_case_lower, "case=lower"},
235 {Opt_case_asis, "case=asis"}, 238 {Opt_case_asis, "case=asis"},
236 {Opt_conv_binary, "conv=binary"},
237 {Opt_conv_text, "conv=text"},
238 {Opt_conv_auto, "conv=auto"},
239 {Opt_check_none, "check=none"}, 239 {Opt_check_none, "check=none"},
240 {Opt_check_normal, "check=normal"}, 240 {Opt_check_normal, "check=normal"},
241 {Opt_check_strict, "check=strict"}, 241 {Opt_check_strict, "check=strict"},
@@ -253,7 +253,7 @@ static const match_table_t tokens = {
253}; 253};
254 254
255static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask, 255static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask,
256 int *lowercase, int *conv, int *eas, int *chk, int *errs, 256 int *lowercase, int *eas, int *chk, int *errs,
257 int *chkdsk, int *timeshift) 257 int *chkdsk, int *timeshift)
258{ 258{
259 char *p; 259 char *p;
@@ -295,15 +295,6 @@ static int parse_opts(char *opts, uid_t *uid, gid_t *gid, umode_t *umask,
295 case Opt_case_asis: 295 case Opt_case_asis:
296 *lowercase = 0; 296 *lowercase = 0;
297 break; 297 break;
298 case Opt_conv_binary:
299 *conv = CONV_BINARY;
300 break;
301 case Opt_conv_text:
302 *conv = CONV_TEXT;
303 break;
304 case Opt_conv_auto:
305 *conv = CONV_AUTO;
306 break;
307 case Opt_check_none: 298 case Opt_check_none:
308 *chk = 0; 299 *chk = 0;
309 break; 300 break;
@@ -370,9 +361,6 @@ HPFS filesystem options:\n\
370 umask=xxx set mode of files that don't have mode specified in eas\n\ 361 umask=xxx set mode of files that don't have mode specified in eas\n\
371 case=lower lowercase all files\n\ 362 case=lower lowercase all files\n\
372 case=asis do not lowercase files (default)\n\ 363 case=asis do not lowercase files (default)\n\
373 conv=binary do not convert CR/LF -> LF (default)\n\
374 conv=auto convert only files with known text extensions\n\
375 conv=text convert all files\n\
376 check=none no fs checks - kernel may crash on corrupted filesystem\n\ 364 check=none no fs checks - kernel may crash on corrupted filesystem\n\
377 check=normal do some checks - it should not crash (default)\n\ 365 check=normal do some checks - it should not crash (default)\n\
378 check=strict do extra time-consuming checks, used for debugging\n\ 366 check=strict do extra time-consuming checks, used for debugging\n\
@@ -394,7 +382,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
394 uid_t uid; 382 uid_t uid;
395 gid_t gid; 383 gid_t gid;
396 umode_t umask; 384 umode_t umask;
397 int lowercase, conv, eas, chk, errs, chkdsk, timeshift; 385 int lowercase, eas, chk, errs, chkdsk, timeshift;
398 int o; 386 int o;
399 struct hpfs_sb_info *sbi = hpfs_sb(s); 387 struct hpfs_sb_info *sbi = hpfs_sb(s);
400 char *new_opts = kstrdup(data, GFP_KERNEL); 388 char *new_opts = kstrdup(data, GFP_KERNEL);
@@ -405,11 +393,11 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
405 lock_super(s); 393 lock_super(s);
406 uid = sbi->sb_uid; gid = sbi->sb_gid; 394 uid = sbi->sb_uid; gid = sbi->sb_gid;
407 umask = 0777 & ~sbi->sb_mode; 395 umask = 0777 & ~sbi->sb_mode;
408 lowercase = sbi->sb_lowercase; conv = sbi->sb_conv; 396 lowercase = sbi->sb_lowercase;
409 eas = sbi->sb_eas; chk = sbi->sb_chk; chkdsk = sbi->sb_chkdsk; 397 eas = sbi->sb_eas; chk = sbi->sb_chk; chkdsk = sbi->sb_chkdsk;
410 errs = sbi->sb_err; timeshift = sbi->sb_timeshift; 398 errs = sbi->sb_err; timeshift = sbi->sb_timeshift;
411 399
412 if (!(o = parse_opts(data, &uid, &gid, &umask, &lowercase, &conv, 400 if (!(o = parse_opts(data, &uid, &gid, &umask, &lowercase,
413 &eas, &chk, &errs, &chkdsk, &timeshift))) { 401 &eas, &chk, &errs, &chkdsk, &timeshift))) {
414 printk("HPFS: bad mount options.\n"); 402 printk("HPFS: bad mount options.\n");
415 goto out_err; 403 goto out_err;
@@ -427,11 +415,11 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
427 415
428 sbi->sb_uid = uid; sbi->sb_gid = gid; 416 sbi->sb_uid = uid; sbi->sb_gid = gid;
429 sbi->sb_mode = 0777 & ~umask; 417 sbi->sb_mode = 0777 & ~umask;
430 sbi->sb_lowercase = lowercase; sbi->sb_conv = conv; 418 sbi->sb_lowercase = lowercase;
431 sbi->sb_eas = eas; sbi->sb_chk = chk; sbi->sb_chkdsk = chkdsk; 419 sbi->sb_eas = eas; sbi->sb_chk = chk; sbi->sb_chkdsk = chkdsk;
432 sbi->sb_err = errs; sbi->sb_timeshift = timeshift; 420 sbi->sb_err = errs; sbi->sb_timeshift = timeshift;
433 421
434 if (!(*flags & MS_RDONLY)) mark_dirty(s); 422 if (!(*flags & MS_RDONLY)) mark_dirty(s, 1);
435 423
436 replace_mount_options(s, new_opts); 424 replace_mount_options(s, new_opts);
437 425
@@ -471,7 +459,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
471 uid_t uid; 459 uid_t uid;
472 gid_t gid; 460 gid_t gid;
473 umode_t umask; 461 umode_t umask;
474 int lowercase, conv, eas, chk, errs, chkdsk, timeshift; 462 int lowercase, eas, chk, errs, chkdsk, timeshift;
475 463
476 dnode_secno root_dno; 464 dnode_secno root_dno;
477 struct hpfs_dirent *de = NULL; 465 struct hpfs_dirent *de = NULL;
@@ -479,11 +467,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
479 467
480 int o; 468 int o;
481 469
482 if (num_possible_cpus() > 1) {
483 printk(KERN_ERR "HPFS is not SMP safe\n");
484 return -EINVAL;
485 }
486
487 save_mount_options(s, options); 470 save_mount_options(s, options);
488 471
489 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 472 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
@@ -495,20 +478,20 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
495 sbi->sb_bmp_dir = NULL; 478 sbi->sb_bmp_dir = NULL;
496 sbi->sb_cp_table = NULL; 479 sbi->sb_cp_table = NULL;
497 480
498 mutex_init(&sbi->hpfs_creation_de); 481 mutex_init(&sbi->hpfs_mutex);
482 hpfs_lock(s);
499 483
500 uid = current_uid(); 484 uid = current_uid();
501 gid = current_gid(); 485 gid = current_gid();
502 umask = current_umask(); 486 umask = current_umask();
503 lowercase = 0; 487 lowercase = 0;
504 conv = CONV_BINARY;
505 eas = 2; 488 eas = 2;
506 chk = 1; 489 chk = 1;
507 errs = 1; 490 errs = 1;
508 chkdsk = 1; 491 chkdsk = 1;
509 timeshift = 0; 492 timeshift = 0;
510 493
511 if (!(o = parse_opts(options, &uid, &gid, &umask, &lowercase, &conv, 494 if (!(o = parse_opts(options, &uid, &gid, &umask, &lowercase,
512 &eas, &chk, &errs, &chkdsk, &timeshift))) { 495 &eas, &chk, &errs, &chkdsk, &timeshift))) {
513 printk("HPFS: bad mount options.\n"); 496 printk("HPFS: bad mount options.\n");
514 goto bail0; 497 goto bail0;
@@ -526,9 +509,9 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
526 if (!(spareblock = hpfs_map_sector(s, 17, &bh2, 0))) goto bail3; 509 if (!(spareblock = hpfs_map_sector(s, 17, &bh2, 0))) goto bail3;
527 510
528 /* Check magics */ 511 /* Check magics */
529 if (/*bootblock->magic != BB_MAGIC 512 if (/*le16_to_cpu(bootblock->magic) != BB_MAGIC
530 ||*/ superblock->magic != SB_MAGIC 513 ||*/ le32_to_cpu(superblock->magic) != SB_MAGIC
531 || spareblock->magic != SP_MAGIC) { 514 || le32_to_cpu(spareblock->magic) != SP_MAGIC) {
532 if (!silent) printk("HPFS: Bad magic ... probably not HPFS\n"); 515 if (!silent) printk("HPFS: Bad magic ... probably not HPFS\n");
533 goto bail4; 516 goto bail4;
534 } 517 }
@@ -549,19 +532,18 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
549 s->s_op = &hpfs_sops; 532 s->s_op = &hpfs_sops;
550 s->s_d_op = &hpfs_dentry_operations; 533 s->s_d_op = &hpfs_dentry_operations;
551 534
552 sbi->sb_root = superblock->root; 535 sbi->sb_root = le32_to_cpu(superblock->root);
553 sbi->sb_fs_size = superblock->n_sectors; 536 sbi->sb_fs_size = le32_to_cpu(superblock->n_sectors);
554 sbi->sb_bitmaps = superblock->bitmaps; 537 sbi->sb_bitmaps = le32_to_cpu(superblock->bitmaps);
555 sbi->sb_dirband_start = superblock->dir_band_start; 538 sbi->sb_dirband_start = le32_to_cpu(superblock->dir_band_start);
556 sbi->sb_dirband_size = superblock->n_dir_band; 539 sbi->sb_dirband_size = le32_to_cpu(superblock->n_dir_band);
557 sbi->sb_dmap = superblock->dir_band_bitmap; 540 sbi->sb_dmap = le32_to_cpu(superblock->dir_band_bitmap);
558 sbi->sb_uid = uid; 541 sbi->sb_uid = uid;
559 sbi->sb_gid = gid; 542 sbi->sb_gid = gid;
560 sbi->sb_mode = 0777 & ~umask; 543 sbi->sb_mode = 0777 & ~umask;
561 sbi->sb_n_free = -1; 544 sbi->sb_n_free = -1;
562 sbi->sb_n_free_dnodes = -1; 545 sbi->sb_n_free_dnodes = -1;
563 sbi->sb_lowercase = lowercase; 546 sbi->sb_lowercase = lowercase;
564 sbi->sb_conv = conv;
565 sbi->sb_eas = eas; 547 sbi->sb_eas = eas;
566 sbi->sb_chk = chk; 548 sbi->sb_chk = chk;
567 sbi->sb_chkdsk = chkdsk; 549 sbi->sb_chkdsk = chkdsk;
@@ -573,7 +555,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
573 sbi->sb_max_fwd_alloc = 0xffffff; 555 sbi->sb_max_fwd_alloc = 0xffffff;
574 556
575 /* Load bitmap directory */ 557 /* Load bitmap directory */
576 if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, superblock->bitmaps))) 558 if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, le32_to_cpu(superblock->bitmaps))))
577 goto bail4; 559 goto bail4;
578 560
579 /* Check for general fs errors*/ 561 /* Check for general fs errors*/
@@ -591,20 +573,20 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
591 mark_buffer_dirty(bh2); 573 mark_buffer_dirty(bh2);
592 } 574 }
593 575
594 if (spareblock->hotfixes_used || spareblock->n_spares_used) { 576 if (le32_to_cpu(spareblock->hotfixes_used) || le32_to_cpu(spareblock->n_spares_used)) {
595 if (errs >= 2) { 577 if (errs >= 2) {
596 printk("HPFS: Hotfixes not supported here, try chkdsk\n"); 578 printk("HPFS: Hotfixes not supported here, try chkdsk\n");
597 mark_dirty(s); 579 mark_dirty(s, 0);
598 goto bail4; 580 goto bail4;
599 } 581 }
600 hpfs_error(s, "hotfixes not supported here, try chkdsk"); 582 hpfs_error(s, "hotfixes not supported here, try chkdsk");
601 if (errs == 0) printk("HPFS: Proceeding, but your filesystem will be probably corrupted by this driver...\n"); 583 if (errs == 0) printk("HPFS: Proceeding, but your filesystem will be probably corrupted by this driver...\n");
602 else printk("HPFS: This driver may read bad files or crash when operating on disk with hotfixes.\n"); 584 else printk("HPFS: This driver may read bad files or crash when operating on disk with hotfixes.\n");
603 } 585 }
604 if (spareblock->n_dnode_spares != spareblock->n_dnode_spares_free) { 586 if (le32_to_cpu(spareblock->n_dnode_spares) != le32_to_cpu(spareblock->n_dnode_spares_free)) {
605 if (errs >= 2) { 587 if (errs >= 2) {
606 printk("HPFS: Spare dnodes used, try chkdsk\n"); 588 printk("HPFS: Spare dnodes used, try chkdsk\n");
607 mark_dirty(s); 589 mark_dirty(s, 0);
608 goto bail4; 590 goto bail4;
609 } 591 }
610 hpfs_error(s, "warning: spare dnodes used, try chkdsk"); 592 hpfs_error(s, "warning: spare dnodes used, try chkdsk");
@@ -612,26 +594,26 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
612 } 594 }
613 if (chk) { 595 if (chk) {
614 unsigned a; 596 unsigned a;
615 if (superblock->dir_band_end - superblock->dir_band_start + 1 != superblock->n_dir_band || 597 if (le32_to_cpu(superblock->dir_band_end) - le32_to_cpu(superblock->dir_band_start) + 1 != le32_to_cpu(superblock->n_dir_band) ||
616 superblock->dir_band_end < superblock->dir_band_start || superblock->n_dir_band > 0x4000) { 598 le32_to_cpu(superblock->dir_band_end) < le32_to_cpu(superblock->dir_band_start) || le32_to_cpu(superblock->n_dir_band) > 0x4000) {
617 hpfs_error(s, "dir band size mismatch: dir_band_start==%08x, dir_band_end==%08x, n_dir_band==%08x", 599 hpfs_error(s, "dir band size mismatch: dir_band_start==%08x, dir_band_end==%08x, n_dir_band==%08x",
618 superblock->dir_band_start, superblock->dir_band_end, superblock->n_dir_band); 600 le32_to_cpu(superblock->dir_band_start), le32_to_cpu(superblock->dir_band_end), le32_to_cpu(superblock->n_dir_band));
619 goto bail4; 601 goto bail4;
620 } 602 }
621 a = sbi->sb_dirband_size; 603 a = sbi->sb_dirband_size;
622 sbi->sb_dirband_size = 0; 604 sbi->sb_dirband_size = 0;
623 if (hpfs_chk_sectors(s, superblock->dir_band_start, superblock->n_dir_band, "dir_band") || 605 if (hpfs_chk_sectors(s, le32_to_cpu(superblock->dir_band_start), le32_to_cpu(superblock->n_dir_band), "dir_band") ||
624 hpfs_chk_sectors(s, superblock->dir_band_bitmap, 4, "dir_band_bitmap") || 606 hpfs_chk_sectors(s, le32_to_cpu(superblock->dir_band_bitmap), 4, "dir_band_bitmap") ||
625 hpfs_chk_sectors(s, superblock->bitmaps, 4, "bitmaps")) { 607 hpfs_chk_sectors(s, le32_to_cpu(superblock->bitmaps), 4, "bitmaps")) {
626 mark_dirty(s); 608 mark_dirty(s, 0);
627 goto bail4; 609 goto bail4;
628 } 610 }
629 sbi->sb_dirband_size = a; 611 sbi->sb_dirband_size = a;
630 } else printk("HPFS: You really don't want any checks? You are crazy...\n"); 612 } else printk("HPFS: You really don't want any checks? You are crazy...\n");
631 613
632 /* Load code page table */ 614 /* Load code page table */
633 if (spareblock->n_code_pages) 615 if (le32_to_cpu(spareblock->n_code_pages))
634 if (!(sbi->sb_cp_table = hpfs_load_code_page(s, spareblock->code_page_dir))) 616 if (!(sbi->sb_cp_table = hpfs_load_code_page(s, le32_to_cpu(spareblock->code_page_dir))))
635 printk("HPFS: Warning: code page support is disabled\n"); 617 printk("HPFS: Warning: code page support is disabled\n");
636 618
637 brelse(bh2); 619 brelse(bh2);
@@ -660,13 +642,13 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
660 if (!de) 642 if (!de)
661 hpfs_error(s, "unable to find root dir"); 643 hpfs_error(s, "unable to find root dir");
662 else { 644 else {
663 root->i_atime.tv_sec = local_to_gmt(s, de->read_date); 645 root->i_atime.tv_sec = local_to_gmt(s, le32_to_cpu(de->read_date));
664 root->i_atime.tv_nsec = 0; 646 root->i_atime.tv_nsec = 0;
665 root->i_mtime.tv_sec = local_to_gmt(s, de->write_date); 647 root->i_mtime.tv_sec = local_to_gmt(s, le32_to_cpu(de->write_date));
666 root->i_mtime.tv_nsec = 0; 648 root->i_mtime.tv_nsec = 0;
667 root->i_ctime.tv_sec = local_to_gmt(s, de->creation_date); 649 root->i_ctime.tv_sec = local_to_gmt(s, le32_to_cpu(de->creation_date));
668 root->i_ctime.tv_nsec = 0; 650 root->i_ctime.tv_nsec = 0;
669 hpfs_i(root)->i_ea_size = de->ea_size; 651 hpfs_i(root)->i_ea_size = le16_to_cpu(de->ea_size);
670 hpfs_i(root)->i_parent_dir = root->i_ino; 652 hpfs_i(root)->i_parent_dir = root->i_ino;
671 if (root->i_size == -1) 653 if (root->i_size == -1)
672 root->i_size = 2048; 654 root->i_size = 2048;
@@ -674,6 +656,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
674 root->i_blocks = 5; 656 root->i_blocks = 5;
675 hpfs_brelse4(&qbh); 657 hpfs_brelse4(&qbh);
676 } 658 }
659 hpfs_unlock(s);
677 return 0; 660 return 0;
678 661
679bail4: brelse(bh2); 662bail4: brelse(bh2);
@@ -681,6 +664,7 @@ bail3: brelse(bh1);
681bail2: brelse(bh0); 664bail2: brelse(bh0);
682bail1: 665bail1:
683bail0: 666bail0:
667 hpfs_unlock(s);
684 kfree(sbi->sb_bmp_dir); 668 kfree(sbi->sb_bmp_dir);
685 kfree(sbi->sb_cp_table); 669 kfree(sbi->sb_cp_table);
686 s->s_fs_info = NULL; 670 s->s_fs_info = NULL;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b9eeb1cd03ff..7aafeb8fa300 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -412,10 +412,10 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
412 pgoff = offset >> PAGE_SHIFT; 412 pgoff = offset >> PAGE_SHIFT;
413 413
414 i_size_write(inode, offset); 414 i_size_write(inode, offset);
415 spin_lock(&mapping->i_mmap_lock); 415 mutex_lock(&mapping->i_mmap_mutex);
416 if (!prio_tree_empty(&mapping->i_mmap)) 416 if (!prio_tree_empty(&mapping->i_mmap))
417 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); 417 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
418 spin_unlock(&mapping->i_mmap_lock); 418 mutex_unlock(&mapping->i_mmap_mutex);
419 truncate_hugepages(inode, offset); 419 truncate_hugepages(inode, offset);
420 return 0; 420 return 0;
421} 421}
@@ -921,7 +921,8 @@ static int can_do_hugetlb_shm(void)
921 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); 921 return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
922} 922}
923 923
924struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, 924struct file *hugetlb_file_setup(const char *name, size_t size,
925 vm_flags_t acctflag,
925 struct user_struct **user, int creat_flags) 926 struct user_struct **user, int creat_flags)
926{ 927{
927 int error = -ENOMEM; 928 int error = -ENOMEM;
diff --git a/fs/inode.c b/fs/inode.c
index 33c963d08ab4..990d284877a1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -24,6 +24,7 @@
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <linux/async.h> 25#include <linux/async.h>
26#include <linux/posix_acl.h> 26#include <linux/posix_acl.h>
27#include <linux/prefetch.h>
27#include <linux/ima.h> 28#include <linux/ima.h>
28#include <linux/cred.h> 29#include <linux/cred.h>
29#include "internal.h" 30#include "internal.h"
@@ -325,12 +326,11 @@ void address_space_init_once(struct address_space *mapping)
325 memset(mapping, 0, sizeof(*mapping)); 326 memset(mapping, 0, sizeof(*mapping));
326 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); 327 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
327 spin_lock_init(&mapping->tree_lock); 328 spin_lock_init(&mapping->tree_lock);
328 spin_lock_init(&mapping->i_mmap_lock); 329 mutex_init(&mapping->i_mmap_mutex);
329 INIT_LIST_HEAD(&mapping->private_list); 330 INIT_LIST_HEAD(&mapping->private_list);
330 spin_lock_init(&mapping->private_lock); 331 spin_lock_init(&mapping->private_lock);
331 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); 332 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
332 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); 333 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
333 mutex_init(&mapping->unmap_mutex);
334} 334}
335EXPORT_SYMBOL(address_space_init_once); 335EXPORT_SYMBOL(address_space_init_once);
336 336
@@ -751,8 +751,12 @@ static void prune_icache(int nr_to_scan)
751 * This function is passed the number of inodes to scan, and it returns the 751 * This function is passed the number of inodes to scan, and it returns the
752 * total number of remaining possibly-reclaimable inodes. 752 * total number of remaining possibly-reclaimable inodes.
753 */ 753 */
754static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 754static int shrink_icache_memory(struct shrinker *shrink,
755 struct shrink_control *sc)
755{ 756{
757 int nr = sc->nr_to_scan;
758 gfp_t gfp_mask = sc->gfp_mask;
759
756 if (nr) { 760 if (nr) {
757 /* 761 /*
758 * Nasty deadlock avoidance. We may hold various FS locks, 762 * Nasty deadlock avoidance. We may hold various FS locks,
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 69b180459463..72ffa974b0b8 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -302,12 +302,6 @@ void journal_commit_transaction(journal_t *journal)
302 * all outstanding updates to complete. 302 * all outstanding updates to complete.
303 */ 303 */
304 304
305#ifdef COMMIT_STATS
306 spin_lock(&journal->j_list_lock);
307 summarise_journal_usage(journal);
308 spin_unlock(&journal->j_list_lock);
309#endif
310
311 /* Do we need to erase the effects of a prior journal_flush? */ 305 /* Do we need to erase the effects of a prior journal_flush? */
312 if (journal->j_flags & JFS_FLUSHED) { 306 if (journal->j_flags & JFS_FLUSHED) {
313 jbd_debug(3, "super block updated\n"); 307 jbd_debug(3, "super block updated\n");
@@ -722,8 +716,13 @@ wait_for_iobuf:
722 required. */ 716 required. */
723 JBUFFER_TRACE(jh, "file as BJ_Forget"); 717 JBUFFER_TRACE(jh, "file as BJ_Forget");
724 journal_file_buffer(jh, commit_transaction, BJ_Forget); 718 journal_file_buffer(jh, commit_transaction, BJ_Forget);
725 /* Wake up any transactions which were waiting for this 719 /*
726 IO to complete */ 720 * Wake up any transactions which were waiting for this
721 * IO to complete. The barrier must be here so that changes
722 * by journal_file_buffer() take effect before wake_up_bit()
723 * does the waitqueue check.
724 */
725 smp_mb();
727 wake_up_bit(&bh->b_state, BH_Unshadow); 726 wake_up_bit(&bh->b_state, BH_Unshadow);
728 JBUFFER_TRACE(jh, "brelse shadowed buffer"); 727 JBUFFER_TRACE(jh, "brelse shadowed buffer");
729 __brelse(bh); 728 __brelse(bh);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b3713afaaa9e..e2d4285fbe90 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -437,9 +437,12 @@ int __log_space_left(journal_t *journal)
437int __log_start_commit(journal_t *journal, tid_t target) 437int __log_start_commit(journal_t *journal, tid_t target)
438{ 438{
439 /* 439 /*
440 * Are we already doing a recent enough commit? 440 * The only transaction we can possibly wait upon is the
441 * currently running transaction (if it exists). Otherwise,
442 * the target tid must be an old one.
441 */ 443 */
442 if (!tid_geq(journal->j_commit_request, target)) { 444 if (journal->j_running_transaction &&
445 journal->j_running_transaction->t_tid == target) {
443 /* 446 /*
444 * We want a new commit: OK, mark the request and wakeup the 447 * We want a new commit: OK, mark the request and wakeup the
445 * commit thread. We do _not_ do the commit ourselves. 448 * commit thread. We do _not_ do the commit ourselves.
@@ -451,7 +454,14 @@ int __log_start_commit(journal_t *journal, tid_t target)
451 journal->j_commit_sequence); 454 journal->j_commit_sequence);
452 wake_up(&journal->j_wait_commit); 455 wake_up(&journal->j_wait_commit);
453 return 1; 456 return 1;
454 } 457 } else if (!tid_geq(journal->j_commit_request, target))
458 /* This should never happen, but if it does, preserve
459 the evidence before kjournald goes into a loop and
460 increments j_commit_sequence beyond all recognition. */
461 WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n",
462 journal->j_commit_request, journal->j_commit_sequence,
463 target, journal->j_running_transaction ?
464 journal->j_running_transaction->t_tid : 0);
455 return 0; 465 return 0;
456} 466}
457 467
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 60d2319651b2..f7ee81a065da 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -266,7 +266,8 @@ static handle_t *new_handle(int nblocks)
266 * This function is visible to journal users (like ext3fs), so is not 266 * This function is visible to journal users (like ext3fs), so is not
267 * called with the journal already locked. 267 * called with the journal already locked.
268 * 268 *
269 * Return a pointer to a newly allocated handle, or NULL on failure 269 * Return a pointer to a newly allocated handle, or an ERR_PTR() value
270 * on failure.
270 */ 271 */
271handle_t *journal_start(journal_t *journal, int nblocks) 272handle_t *journal_start(journal_t *journal, int nblocks)
272{ 273{
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6e28000a4b21..29148a81c783 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -338,12 +338,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
338 * all outstanding updates to complete. 338 * all outstanding updates to complete.
339 */ 339 */
340 340
341#ifdef COMMIT_STATS
342 spin_lock(&journal->j_list_lock);
343 summarise_journal_usage(journal);
344 spin_unlock(&journal->j_list_lock);
345#endif
346
347 /* Do we need to erase the effects of a prior jbd2_journal_flush? */ 341 /* Do we need to erase the effects of a prior jbd2_journal_flush? */
348 if (journal->j_flags & JBD2_FLUSHED) { 342 if (journal->j_flags & JBD2_FLUSHED) {
349 jbd_debug(3, "super block updated\n"); 343 jbd_debug(3, "super block updated\n");
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 1adc8d455f0e..df0de27c2733 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -10,6 +10,7 @@
10#include <linux/blkdev.h> 10#include <linux/blkdev.h>
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/gfp.h> 12#include <linux/gfp.h>
13#include <linux/prefetch.h>
13 14
14#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) 15#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
15 16
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 9e22085231b3..d8d09380c7de 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -481,7 +481,7 @@ static int inode_write_alias(struct super_block *sb,
481 val = inode_val0(inode); 481 val = inode_val0(inode);
482 break; 482 break;
483 case INODE_USED_OFS: 483 case INODE_USED_OFS:
484 val = cpu_to_be64(li->li_used_bytes);; 484 val = cpu_to_be64(li->li_used_bytes);
485 break; 485 break;
486 case INODE_SIZE_OFS: 486 case INODE_SIZE_OFS:
487 val = cpu_to_be64(i_size_read(inode)); 487 val = cpu_to_be64(i_size_read(inode));
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 33435e4b14d2..ce03a182c771 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -480,10 +480,6 @@ static int logfs_read_sb(struct super_block *sb, int read_only)
480 !read_only) 480 !read_only)
481 return -EIO; 481 return -EIO;
482 482
483 mutex_init(&super->s_dirop_mutex);
484 mutex_init(&super->s_object_alias_mutex);
485 INIT_LIST_HEAD(&super->s_freeing_list);
486
487 ret = logfs_init_rw(sb); 483 ret = logfs_init_rw(sb);
488 if (ret) 484 if (ret)
489 return ret; 485 return ret;
@@ -601,6 +597,10 @@ static struct dentry *logfs_mount(struct file_system_type *type, int flags,
601 if (!super) 597 if (!super)
602 return ERR_PTR(-ENOMEM); 598 return ERR_PTR(-ENOMEM);
603 599
600 mutex_init(&super->s_dirop_mutex);
601 mutex_init(&super->s_object_alias_mutex);
602 INIT_LIST_HEAD(&super->s_freeing_list);
603
604 if (!devname) 604 if (!devname)
605 err = logfs_get_sb_bdev(super, type, devname); 605 err = logfs_get_sb_bdev(super, type, devname);
606 else if (strncmp(devname, "mtd", 3)) 606 else if (strncmp(devname, "mtd", 3))
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 2f174be06555..8c32ef3ba88e 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -90,7 +90,8 @@ static DEFINE_SPINLOCK(mb_cache_spinlock);
90 * What the mbcache registers as to get shrunk dynamically. 90 * What the mbcache registers as to get shrunk dynamically.
91 */ 91 */
92 92
93static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); 93static int mb_cache_shrink_fn(struct shrinker *shrink,
94 struct shrink_control *sc);
94 95
95static struct shrinker mb_cache_shrinker = { 96static struct shrinker mb_cache_shrinker = {
96 .shrink = mb_cache_shrink_fn, 97 .shrink = mb_cache_shrink_fn,
@@ -156,18 +157,19 @@ forget:
156 * gets low. 157 * gets low.
157 * 158 *
158 * @shrink: (ignored) 159 * @shrink: (ignored)
159 * @nr_to_scan: Number of objects to scan 160 * @sc: shrink_control passed from reclaim
160 * @gfp_mask: (ignored)
161 * 161 *
162 * Returns the number of objects which are present in the cache. 162 * Returns the number of objects which are present in the cache.
163 */ 163 */
164static int 164static int
165mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) 165mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc)
166{ 166{
167 LIST_HEAD(free_list); 167 LIST_HEAD(free_list);
168 struct mb_cache *cache; 168 struct mb_cache *cache;
169 struct mb_cache_entry *entry, *tmp; 169 struct mb_cache_entry *entry, *tmp;
170 int count = 0; 170 int count = 0;
171 int nr_to_scan = sc->nr_to_scan;
172 gfp_t gfp_mask = sc->gfp_mask;
171 173
172 mb_debug("trying to free %d entries", nr_to_scan); 174 mb_debug("trying to free %d entries", nr_to_scan);
173 spin_lock(&mb_cache_spinlock); 175 spin_lock(&mb_cache_spinlock);
diff --git a/fs/namei.c b/fs/namei.c
index f90f0593092a..2358b326b221 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -179,7 +179,7 @@ EXPORT_SYMBOL(putname);
179static int acl_permission_check(struct inode *inode, int mask, unsigned int flags, 179static int acl_permission_check(struct inode *inode, int mask, unsigned int flags,
180 int (*check_acl)(struct inode *inode, int mask, unsigned int flags)) 180 int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
181{ 181{
182 umode_t mode = inode->i_mode; 182 unsigned int mode = inode->i_mode;
183 183
184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
185 185
@@ -1296,12 +1296,12 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
1296{ 1296{
1297 int res; 1297 int res;
1298 1298
1299 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1300 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) { 1299 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
1301 path_put_conditional(path, nd); 1300 path_put_conditional(path, nd);
1302 path_put(&nd->path); 1301 path_put(&nd->path);
1303 return -ELOOP; 1302 return -ELOOP;
1304 } 1303 }
1304 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1305 1305
1306 nd->depth++; 1306 nd->depth++;
1307 current->link_count++; 1307 current->link_count++;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 0250e4ce4893..202f370526a7 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -461,7 +461,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
461#endif 461#endif
462 struct ncp_entry_info finfo; 462 struct ncp_entry_info finfo;
463 463
464 data.wdog_pid = NULL; 464 memset(&data, 0, sizeof(data));
465 server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL); 465 server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL);
466 if (!server) 466 if (!server)
467 return -ENOMEM; 467 return -ENOMEM;
@@ -496,7 +496,6 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
496 struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data; 496 struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data;
497 497
498 data.flags = md->flags; 498 data.flags = md->flags;
499 data.int_flags = 0;
500 data.mounted_uid = md->mounted_uid; 499 data.mounted_uid = md->mounted_uid;
501 data.wdog_pid = find_get_pid(md->wdog_pid); 500 data.wdog_pid = find_get_pid(md->wdog_pid);
502 data.ncp_fd = md->ncp_fd; 501 data.ncp_fd = md->ncp_fd;
@@ -507,7 +506,6 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
507 data.file_mode = md->file_mode; 506 data.file_mode = md->file_mode;
508 data.dir_mode = md->dir_mode; 507 data.dir_mode = md->dir_mode;
509 data.info_fd = -1; 508 data.info_fd = -1;
510 data.mounted_vol[0] = 0;
511 } 509 }
512 break; 510 break;
513 default: 511 default:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7237672216c8..424e47773a84 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2042,11 +2042,14 @@ static void nfs_access_free_list(struct list_head *head)
2042 } 2042 }
2043} 2043}
2044 2044
2045int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) 2045int nfs_access_cache_shrinker(struct shrinker *shrink,
2046 struct shrink_control *sc)
2046{ 2047{
2047 LIST_HEAD(head); 2048 LIST_HEAD(head);
2048 struct nfs_inode *nfsi, *next; 2049 struct nfs_inode *nfsi, *next;
2049 struct nfs_access_entry *cache; 2050 struct nfs_access_entry *cache;
2051 int nr_to_scan = sc->nr_to_scan;
2052 gfp_t gfp_mask = sc->gfp_mask;
2050 2053
2051 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) 2054 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
2052 return (nr_to_scan == 0) ? 0 : -1; 2055 return (nr_to_scan == 0) ? 0 : -1;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ce118ce885dd..2df6ca7b5898 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -234,7 +234,7 @@ extern int nfs_init_client(struct nfs_client *clp,
234 234
235/* dir.c */ 235/* dir.c */
236extern int nfs_access_cache_shrinker(struct shrinker *shrink, 236extern int nfs_access_cache_shrinker(struct shrinker *shrink,
237 int nr_to_scan, gfp_t gfp_mask); 237 struct shrink_control *sc);
238 238
239/* inode.c */ 239/* inode.c */
240extern struct workqueue_struct *nfsiod_workqueue; 240extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 6f8192f4cfc7..be79dc9f386d 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -117,6 +117,8 @@ static int filelayout_async_handle_error(struct rpc_task *task,
117 case -EKEYEXPIRED: 117 case -EKEYEXPIRED:
118 rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX); 118 rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX);
119 break; 119 break;
120 case -NFS4ERR_RETRY_UNCACHED_REP:
121 break;
120 default: 122 default:
121 dprintk("%s DS error. Retry through MDS %d\n", __func__, 123 dprintk("%s DS error. Retry through MDS %d\n", __func__,
122 task->tk_status); 124 task->tk_status);
@@ -416,7 +418,8 @@ static int
416filelayout_check_layout(struct pnfs_layout_hdr *lo, 418filelayout_check_layout(struct pnfs_layout_hdr *lo,
417 struct nfs4_filelayout_segment *fl, 419 struct nfs4_filelayout_segment *fl,
418 struct nfs4_layoutget_res *lgr, 420 struct nfs4_layoutget_res *lgr,
419 struct nfs4_deviceid *id) 421 struct nfs4_deviceid *id,
422 gfp_t gfp_flags)
420{ 423{
421 struct nfs4_file_layout_dsaddr *dsaddr; 424 struct nfs4_file_layout_dsaddr *dsaddr;
422 int status = -EINVAL; 425 int status = -EINVAL;
@@ -439,7 +442,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
439 /* find and reference the deviceid */ 442 /* find and reference the deviceid */
440 dsaddr = nfs4_fl_find_get_deviceid(id); 443 dsaddr = nfs4_fl_find_get_deviceid(id);
441 if (dsaddr == NULL) { 444 if (dsaddr == NULL) {
442 dsaddr = get_device_info(lo->plh_inode, id); 445 dsaddr = get_device_info(lo->plh_inode, id, gfp_flags);
443 if (dsaddr == NULL) 446 if (dsaddr == NULL)
444 goto out; 447 goto out;
445 } 448 }
@@ -500,7 +503,8 @@ static int
500filelayout_decode_layout(struct pnfs_layout_hdr *flo, 503filelayout_decode_layout(struct pnfs_layout_hdr *flo,
501 struct nfs4_filelayout_segment *fl, 504 struct nfs4_filelayout_segment *fl,
502 struct nfs4_layoutget_res *lgr, 505 struct nfs4_layoutget_res *lgr,
503 struct nfs4_deviceid *id) 506 struct nfs4_deviceid *id,
507 gfp_t gfp_flags)
504{ 508{
505 struct xdr_stream stream; 509 struct xdr_stream stream;
506 struct xdr_buf buf = { 510 struct xdr_buf buf = {
@@ -516,7 +520,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
516 520
517 dprintk("%s: set_layout_map Begin\n", __func__); 521 dprintk("%s: set_layout_map Begin\n", __func__);
518 522
519 scratch = alloc_page(GFP_KERNEL); 523 scratch = alloc_page(gfp_flags);
520 if (!scratch) 524 if (!scratch)
521 return -ENOMEM; 525 return -ENOMEM;
522 526
@@ -554,13 +558,13 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
554 goto out_err; 558 goto out_err;
555 559
556 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), 560 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
557 GFP_KERNEL); 561 gfp_flags);
558 if (!fl->fh_array) 562 if (!fl->fh_array)
559 goto out_err; 563 goto out_err;
560 564
561 for (i = 0; i < fl->num_fh; i++) { 565 for (i = 0; i < fl->num_fh; i++) {
562 /* Do we want to use a mempool here? */ 566 /* Do we want to use a mempool here? */
563 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); 567 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags);
564 if (!fl->fh_array[i]) 568 if (!fl->fh_array[i])
565 goto out_err_free; 569 goto out_err_free;
566 570
@@ -605,19 +609,20 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg)
605 609
606static struct pnfs_layout_segment * 610static struct pnfs_layout_segment *
607filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, 611filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
608 struct nfs4_layoutget_res *lgr) 612 struct nfs4_layoutget_res *lgr,
613 gfp_t gfp_flags)
609{ 614{
610 struct nfs4_filelayout_segment *fl; 615 struct nfs4_filelayout_segment *fl;
611 int rc; 616 int rc;
612 struct nfs4_deviceid id; 617 struct nfs4_deviceid id;
613 618
614 dprintk("--> %s\n", __func__); 619 dprintk("--> %s\n", __func__);
615 fl = kzalloc(sizeof(*fl), GFP_KERNEL); 620 fl = kzalloc(sizeof(*fl), gfp_flags);
616 if (!fl) 621 if (!fl)
617 return NULL; 622 return NULL;
618 623
619 rc = filelayout_decode_layout(layoutid, fl, lgr, &id); 624 rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags);
620 if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) { 625 if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) {
621 _filelayout_free_lseg(fl); 626 _filelayout_free_lseg(fl);
622 return NULL; 627 return NULL;
623 } 628 }
@@ -633,7 +638,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
633 int size = (fl->stripe_type == STRIPE_SPARSE) ? 638 int size = (fl->stripe_type == STRIPE_SPARSE) ?
634 fl->dsaddr->ds_num : fl->dsaddr->stripe_count; 639 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
635 640
636 fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL); 641 fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags);
637 if (!fl->commit_buckets) { 642 if (!fl->commit_buckets) {
638 filelayout_free_lseg(&fl->generic_hdr); 643 filelayout_free_lseg(&fl->generic_hdr);
639 return NULL; 644 return NULL;
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 7c44579f5832..2b461d77b43a 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -104,6 +104,6 @@ extern struct nfs4_file_layout_dsaddr *
104nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id); 104nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id);
105extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); 105extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
106struct nfs4_file_layout_dsaddr * 106struct nfs4_file_layout_dsaddr *
107get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id); 107get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
108 108
109#endif /* FS_NFS_NFS4FILELAYOUT_H */ 109#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index de5350f2b249..db07c7af1395 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -225,11 +225,11 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
225} 225}
226 226
227static struct nfs4_pnfs_ds * 227static struct nfs4_pnfs_ds *
228nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port) 228nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags)
229{ 229{
230 struct nfs4_pnfs_ds *tmp_ds, *ds; 230 struct nfs4_pnfs_ds *tmp_ds, *ds;
231 231
232 ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL); 232 ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
233 if (!ds) 233 if (!ds)
234 goto out; 234 goto out;
235 235
@@ -261,7 +261,7 @@ out:
261 * Currently only support ipv4, and one multi-path address. 261 * Currently only support ipv4, and one multi-path address.
262 */ 262 */
263static struct nfs4_pnfs_ds * 263static struct nfs4_pnfs_ds *
264decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode) 264decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
265{ 265{
266 struct nfs4_pnfs_ds *ds = NULL; 266 struct nfs4_pnfs_ds *ds = NULL;
267 char *buf; 267 char *buf;
@@ -303,7 +303,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
303 rlen); 303 rlen);
304 goto out_err; 304 goto out_err;
305 } 305 }
306 buf = kmalloc(rlen + 1, GFP_KERNEL); 306 buf = kmalloc(rlen + 1, gfp_flags);
307 if (!buf) { 307 if (!buf) {
308 dprintk("%s: Not enough memory\n", __func__); 308 dprintk("%s: Not enough memory\n", __func__);
309 goto out_err; 309 goto out_err;
@@ -333,7 +333,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
333 sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); 333 sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
334 port = htons((tmp[0] << 8) | (tmp[1])); 334 port = htons((tmp[0] << 8) | (tmp[1]));
335 335
336 ds = nfs4_pnfs_ds_add(inode, ip_addr, port); 336 ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags);
337 dprintk("%s: Decoded address and port %s\n", __func__, buf); 337 dprintk("%s: Decoded address and port %s\n", __func__, buf);
338out_free: 338out_free:
339 kfree(buf); 339 kfree(buf);
@@ -343,7 +343,7 @@ out_err:
343 343
344/* Decode opaque device data and return the result */ 344/* Decode opaque device data and return the result */
345static struct nfs4_file_layout_dsaddr* 345static struct nfs4_file_layout_dsaddr*
346decode_device(struct inode *ino, struct pnfs_device *pdev) 346decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
347{ 347{
348 int i; 348 int i;
349 u32 cnt, num; 349 u32 cnt, num;
@@ -362,7 +362,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
362 struct page *scratch; 362 struct page *scratch;
363 363
364 /* set up xdr stream */ 364 /* set up xdr stream */
365 scratch = alloc_page(GFP_KERNEL); 365 scratch = alloc_page(gfp_flags);
366 if (!scratch) 366 if (!scratch)
367 goto out_err; 367 goto out_err;
368 368
@@ -384,7 +384,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
384 } 384 }
385 385
386 /* read stripe indices */ 386 /* read stripe indices */
387 stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL); 387 stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags);
388 if (!stripe_indices) 388 if (!stripe_indices)
389 goto out_err_free_scratch; 389 goto out_err_free_scratch;
390 390
@@ -423,7 +423,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
423 423
424 dsaddr = kzalloc(sizeof(*dsaddr) + 424 dsaddr = kzalloc(sizeof(*dsaddr) +
425 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), 425 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
426 GFP_KERNEL); 426 gfp_flags);
427 if (!dsaddr) 427 if (!dsaddr)
428 goto out_err_free_stripe_indices; 428 goto out_err_free_stripe_indices;
429 429
@@ -452,7 +452,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
452 for (j = 0; j < mp_count; j++) { 452 for (j = 0; j < mp_count; j++) {
453 if (j == 0) { 453 if (j == 0) {
454 dsaddr->ds_list[i] = decode_and_add_ds(&stream, 454 dsaddr->ds_list[i] = decode_and_add_ds(&stream,
455 ino); 455 ino, gfp_flags);
456 if (dsaddr->ds_list[i] == NULL) 456 if (dsaddr->ds_list[i] == NULL)
457 goto out_err_free_deviceid; 457 goto out_err_free_deviceid;
458 } else { 458 } else {
@@ -503,12 +503,12 @@ out_err:
503 * available devices. 503 * available devices.
504 */ 504 */
505static struct nfs4_file_layout_dsaddr * 505static struct nfs4_file_layout_dsaddr *
506decode_and_add_device(struct inode *inode, struct pnfs_device *dev) 506decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags)
507{ 507{
508 struct nfs4_file_layout_dsaddr *d, *new; 508 struct nfs4_file_layout_dsaddr *d, *new;
509 long hash; 509 long hash;
510 510
511 new = decode_device(inode, dev); 511 new = decode_device(inode, dev, gfp_flags);
512 if (!new) { 512 if (!new) {
513 printk(KERN_WARNING "%s: Could not decode or add device\n", 513 printk(KERN_WARNING "%s: Could not decode or add device\n",
514 __func__); 514 __func__);
@@ -537,7 +537,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
537 * of available devices, and return it. 537 * of available devices, and return it.
538 */ 538 */
539struct nfs4_file_layout_dsaddr * 539struct nfs4_file_layout_dsaddr *
540get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) 540get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags)
541{ 541{
542 struct pnfs_device *pdev = NULL; 542 struct pnfs_device *pdev = NULL;
543 u32 max_resp_sz; 543 u32 max_resp_sz;
@@ -556,17 +556,17 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
556 dprintk("%s inode %p max_resp_sz %u max_pages %d\n", 556 dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
557 __func__, inode, max_resp_sz, max_pages); 557 __func__, inode, max_resp_sz, max_pages);
558 558
559 pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL); 559 pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags);
560 if (pdev == NULL) 560 if (pdev == NULL)
561 return NULL; 561 return NULL;
562 562
563 pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); 563 pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
564 if (pages == NULL) { 564 if (pages == NULL) {
565 kfree(pdev); 565 kfree(pdev);
566 return NULL; 566 return NULL;
567 } 567 }
568 for (i = 0; i < max_pages; i++) { 568 for (i = 0; i < max_pages; i++) {
569 pages[i] = alloc_page(GFP_KERNEL); 569 pages[i] = alloc_page(gfp_flags);
570 if (!pages[i]) 570 if (!pages[i])
571 goto out_free; 571 goto out_free;
572 } 572 }
@@ -587,7 +587,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
587 * Found new device, need to decode it and then add it to the 587 * Found new device, need to decode it and then add it to the
588 * list of known devices for this mountpoint. 588 * list of known devices for this mountpoint.
589 */ 589 */
590 dsaddr = decode_and_add_device(inode, pdev); 590 dsaddr = decode_and_add_device(inode, pdev, gfp_flags);
591out_free: 591out_free:
592 for (i = 0; i < max_pages; i++) 592 for (i = 0; i < max_pages; i++)
593 __free_page(pages[i]); 593 __free_page(pages[i]);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 69c0f3c5ee7a..cf1b339c3937 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -300,6 +300,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
300 ret = nfs4_delay(server->client, &exception->timeout); 300 ret = nfs4_delay(server->client, &exception->timeout);
301 if (ret != 0) 301 if (ret != 0)
302 break; 302 break;
303 case -NFS4ERR_RETRY_UNCACHED_REP:
303 case -NFS4ERR_OLD_STATEID: 304 case -NFS4ERR_OLD_STATEID:
304 exception->retry = 1; 305 exception->retry = 1;
305 break; 306 break;
@@ -3695,6 +3696,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3695 rpc_delay(task, NFS4_POLL_RETRY_MAX); 3696 rpc_delay(task, NFS4_POLL_RETRY_MAX);
3696 task->tk_status = 0; 3697 task->tk_status = 0;
3697 return -EAGAIN; 3698 return -EAGAIN;
3699 case -NFS4ERR_RETRY_UNCACHED_REP:
3698 case -NFS4ERR_OLD_STATEID: 3700 case -NFS4ERR_OLD_STATEID:
3699 task->tk_status = 0; 3701 task->tk_status = 0;
3700 return -EAGAIN; 3702 return -EAGAIN;
@@ -4844,6 +4846,8 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
4844 dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); 4846 dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
4845 rpc_delay(task, NFS4_POLL_RETRY_MIN); 4847 rpc_delay(task, NFS4_POLL_RETRY_MIN);
4846 task->tk_status = 0; 4848 task->tk_status = 0;
4849 /* fall through */
4850 case -NFS4ERR_RETRY_UNCACHED_REP:
4847 nfs_restart_rpc(task, data->clp); 4851 nfs_restart_rpc(task, data->clp);
4848 return; 4852 return;
4849 } 4853 }
@@ -5479,6 +5483,8 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
5479 break; 5483 break;
5480 case -NFS4ERR_DELAY: 5484 case -NFS4ERR_DELAY:
5481 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5485 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5486 /* fall through */
5487 case -NFS4ERR_RETRY_UNCACHED_REP:
5482 return -EAGAIN; 5488 return -EAGAIN;
5483 default: 5489 default:
5484 nfs4_schedule_lease_recovery(clp); 5490 nfs4_schedule_lease_recovery(clp);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index ff681ab65d31..f57f5281a520 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -383,6 +383,7 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
383 plh_layouts); 383 plh_layouts);
384 dprintk("%s freeing layout for inode %lu\n", __func__, 384 dprintk("%s freeing layout for inode %lu\n", __func__,
385 lo->plh_inode->i_ino); 385 lo->plh_inode->i_ino);
386 list_del_init(&lo->plh_layouts);
386 pnfs_destroy_layout(NFS_I(lo->plh_inode)); 387 pnfs_destroy_layout(NFS_I(lo->plh_inode));
387 } 388 }
388} 389}
@@ -466,7 +467,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
466static struct pnfs_layout_segment * 467static struct pnfs_layout_segment *
467send_layoutget(struct pnfs_layout_hdr *lo, 468send_layoutget(struct pnfs_layout_hdr *lo,
468 struct nfs_open_context *ctx, 469 struct nfs_open_context *ctx,
469 u32 iomode) 470 u32 iomode,
471 gfp_t gfp_flags)
470{ 472{
471 struct inode *ino = lo->plh_inode; 473 struct inode *ino = lo->plh_inode;
472 struct nfs_server *server = NFS_SERVER(ino); 474 struct nfs_server *server = NFS_SERVER(ino);
@@ -479,7 +481,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
479 dprintk("--> %s\n", __func__); 481 dprintk("--> %s\n", __func__);
480 482
481 BUG_ON(ctx == NULL); 483 BUG_ON(ctx == NULL);
482 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); 484 lgp = kzalloc(sizeof(*lgp), gfp_flags);
483 if (lgp == NULL) 485 if (lgp == NULL)
484 return NULL; 486 return NULL;
485 487
@@ -487,12 +489,12 @@ send_layoutget(struct pnfs_layout_hdr *lo,
487 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; 489 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
488 max_pages = max_resp_sz >> PAGE_SHIFT; 490 max_pages = max_resp_sz >> PAGE_SHIFT;
489 491
490 pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); 492 pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags);
491 if (!pages) 493 if (!pages)
492 goto out_err_free; 494 goto out_err_free;
493 495
494 for (i = 0; i < max_pages; i++) { 496 for (i = 0; i < max_pages; i++) {
495 pages[i] = alloc_page(GFP_KERNEL); 497 pages[i] = alloc_page(gfp_flags);
496 if (!pages[i]) 498 if (!pages[i])
497 goto out_err_free; 499 goto out_err_free;
498 } 500 }
@@ -508,6 +510,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
508 lgp->args.layout.pages = pages; 510 lgp->args.layout.pages = pages;
509 lgp->args.layout.pglen = max_pages * PAGE_SIZE; 511 lgp->args.layout.pglen = max_pages * PAGE_SIZE;
510 lgp->lsegpp = &lseg; 512 lgp->lsegpp = &lseg;
513 lgp->gfp_flags = gfp_flags;
511 514
512 /* Synchronously retrieve layout information from server and 515 /* Synchronously retrieve layout information from server and
513 * store in lseg. 516 * store in lseg.
@@ -665,11 +668,11 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
665} 668}
666 669
667static struct pnfs_layout_hdr * 670static struct pnfs_layout_hdr *
668alloc_init_layout_hdr(struct inode *ino) 671alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags)
669{ 672{
670 struct pnfs_layout_hdr *lo; 673 struct pnfs_layout_hdr *lo;
671 674
672 lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL); 675 lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
673 if (!lo) 676 if (!lo)
674 return NULL; 677 return NULL;
675 atomic_set(&lo->plh_refcount, 1); 678 atomic_set(&lo->plh_refcount, 1);
@@ -681,7 +684,7 @@ alloc_init_layout_hdr(struct inode *ino)
681} 684}
682 685
683static struct pnfs_layout_hdr * 686static struct pnfs_layout_hdr *
684pnfs_find_alloc_layout(struct inode *ino) 687pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags)
685{ 688{
686 struct nfs_inode *nfsi = NFS_I(ino); 689 struct nfs_inode *nfsi = NFS_I(ino);
687 struct pnfs_layout_hdr *new = NULL; 690 struct pnfs_layout_hdr *new = NULL;
@@ -696,7 +699,7 @@ pnfs_find_alloc_layout(struct inode *ino)
696 return nfsi->layout; 699 return nfsi->layout;
697 } 700 }
698 spin_unlock(&ino->i_lock); 701 spin_unlock(&ino->i_lock);
699 new = alloc_init_layout_hdr(ino); 702 new = alloc_init_layout_hdr(ino, gfp_flags);
700 spin_lock(&ino->i_lock); 703 spin_lock(&ino->i_lock);
701 704
702 if (likely(nfsi->layout == NULL)) /* Won the race? */ 705 if (likely(nfsi->layout == NULL)) /* Won the race? */
@@ -756,7 +759,8 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
756struct pnfs_layout_segment * 759struct pnfs_layout_segment *
757pnfs_update_layout(struct inode *ino, 760pnfs_update_layout(struct inode *ino,
758 struct nfs_open_context *ctx, 761 struct nfs_open_context *ctx,
759 enum pnfs_iomode iomode) 762 enum pnfs_iomode iomode,
763 gfp_t gfp_flags)
760{ 764{
761 struct nfs_inode *nfsi = NFS_I(ino); 765 struct nfs_inode *nfsi = NFS_I(ino);
762 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; 766 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
@@ -767,7 +771,7 @@ pnfs_update_layout(struct inode *ino,
767 if (!pnfs_enabled_sb(NFS_SERVER(ino))) 771 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
768 return NULL; 772 return NULL;
769 spin_lock(&ino->i_lock); 773 spin_lock(&ino->i_lock);
770 lo = pnfs_find_alloc_layout(ino); 774 lo = pnfs_find_alloc_layout(ino, gfp_flags);
771 if (lo == NULL) { 775 if (lo == NULL) {
772 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); 776 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
773 goto out_unlock; 777 goto out_unlock;
@@ -807,7 +811,7 @@ pnfs_update_layout(struct inode *ino,
807 spin_unlock(&clp->cl_lock); 811 spin_unlock(&clp->cl_lock);
808 } 812 }
809 813
810 lseg = send_layoutget(lo, ctx, iomode); 814 lseg = send_layoutget(lo, ctx, iomode, gfp_flags);
811 if (!lseg && first) { 815 if (!lseg && first) {
812 spin_lock(&clp->cl_lock); 816 spin_lock(&clp->cl_lock);
813 list_del_init(&lo->plh_layouts); 817 list_del_init(&lo->plh_layouts);
@@ -846,7 +850,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
846 goto out; 850 goto out;
847 } 851 }
848 /* Inject layout blob into I/O device driver */ 852 /* Inject layout blob into I/O device driver */
849 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res); 853 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags);
850 if (!lseg || IS_ERR(lseg)) { 854 if (!lseg || IS_ERR(lseg)) {
851 if (!lseg) 855 if (!lseg)
852 status = -ENOMEM; 856 status = -ENOMEM;
@@ -899,7 +903,8 @@ static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
899 /* This is first coelesce call for a series of nfs_pages */ 903 /* This is first coelesce call for a series of nfs_pages */
900 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 904 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
901 prev->wb_context, 905 prev->wb_context,
902 IOMODE_READ); 906 IOMODE_READ,
907 GFP_KERNEL);
903 } 908 }
904 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); 909 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
905} 910}
@@ -921,7 +926,8 @@ static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
921 /* This is first coelesce call for a series of nfs_pages */ 926 /* This is first coelesce call for a series of nfs_pages */
922 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 927 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
923 prev->wb_context, 928 prev->wb_context,
924 IOMODE_RW); 929 IOMODE_RW,
930 GFP_NOFS);
925 } 931 }
926 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); 932 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
927} 933}
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index bc4827202e7a..0c015bad9e7a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -70,7 +70,7 @@ struct pnfs_layoutdriver_type {
70 const u32 id; 70 const u32 id;
71 const char *name; 71 const char *name;
72 struct module *owner; 72 struct module *owner;
73 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr); 73 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags);
74 void (*free_lseg) (struct pnfs_layout_segment *lseg); 74 void (*free_lseg) (struct pnfs_layout_segment *lseg);
75 75
76 /* test for nfs page cache coalescing */ 76 /* test for nfs page cache coalescing */
@@ -126,7 +126,7 @@ void get_layout_hdr(struct pnfs_layout_hdr *lo);
126void put_lseg(struct pnfs_layout_segment *lseg); 126void put_lseg(struct pnfs_layout_segment *lseg);
127struct pnfs_layout_segment * 127struct pnfs_layout_segment *
128pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 128pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
129 enum pnfs_iomode access_type); 129 enum pnfs_iomode access_type, gfp_t gfp_flags);
130void set_pnfs_layoutdriver(struct nfs_server *, u32 id); 130void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
131void unset_pnfs_layoutdriver(struct nfs_server *); 131void unset_pnfs_layoutdriver(struct nfs_server *);
132enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, 132enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
@@ -245,7 +245,7 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
245 245
246static inline struct pnfs_layout_segment * 246static inline struct pnfs_layout_segment *
247pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 247pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
248 enum pnfs_iomode access_type) 248 enum pnfs_iomode access_type, gfp_t gfp_flags)
249{ 249{
250 return NULL; 250 return NULL;
251} 251}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7cded2b12a05..2bcf0dc306a1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -288,7 +288,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
288 atomic_set(&req->wb_complete, requests); 288 atomic_set(&req->wb_complete, requests);
289 289
290 BUG_ON(desc->pg_lseg != NULL); 290 BUG_ON(desc->pg_lseg != NULL);
291 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ); 291 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
292 ClearPageError(page); 292 ClearPageError(page);
293 offset = 0; 293 offset = 0;
294 nbytes = desc->pg_count; 294 nbytes = desc->pg_count;
@@ -351,7 +351,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
351 } 351 }
352 req = nfs_list_entry(data->pages.next); 352 req = nfs_list_entry(data->pages.next);
353 if ((!lseg) && list_is_singular(&data->pages)) 353 if ((!lseg) && list_is_singular(&data->pages))
354 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ); 354 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL);
355 355
356 ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, 356 ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
357 0, lseg); 357 0, lseg);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3bd5d7e80f6c..49c715b4ac92 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -939,7 +939,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
939 atomic_set(&req->wb_complete, requests); 939 atomic_set(&req->wb_complete, requests);
940 940
941 BUG_ON(desc->pg_lseg); 941 BUG_ON(desc->pg_lseg);
942 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); 942 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
943 ClearPageError(page); 943 ClearPageError(page);
944 offset = 0; 944 offset = 0;
945 nbytes = desc->pg_count; 945 nbytes = desc->pg_count;
@@ -1013,7 +1013,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
1013 } 1013 }
1014 req = nfs_list_entry(data->pages.next); 1014 req = nfs_list_entry(data->pages.next);
1015 if ((!lseg) && list_is_singular(&data->pages)) 1015 if ((!lseg) && list_is_singular(&data->pages))
1016 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); 1016 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS);
1017 1017
1018 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1018 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1019 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) 1019 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 5232d3e8fb2f..a2e2402b2afb 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -8,7 +8,7 @@
8 * Statistsics for the reply cache 8 * Statistsics for the reply cache
9 * fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache> 9 * fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache>
10 * statistics for filehandle lookup 10 * statistics for filehandle lookup
11 * io <bytes-read> <bytes-writtten> 11 * io <bytes-read> <bytes-written>
12 * statistics for IO throughput 12 * statistics for IO throughput
13 * th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%> 13 * th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%>
14 * time (seconds) when nfsd thread usage above thresholds 14 * time (seconds) when nfsd thread usage above thresholds
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 0a0a66d98cce..eed4d7b26249 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -489,8 +489,8 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
489void nilfs_palloc_commit_alloc_entry(struct inode *inode, 489void nilfs_palloc_commit_alloc_entry(struct inode *inode,
490 struct nilfs_palloc_req *req) 490 struct nilfs_palloc_req *req)
491{ 491{
492 nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); 492 mark_buffer_dirty(req->pr_bitmap_bh);
493 nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); 493 mark_buffer_dirty(req->pr_desc_bh);
494 nilfs_mdt_mark_dirty(inode); 494 nilfs_mdt_mark_dirty(inode);
495 495
496 brelse(req->pr_bitmap_bh); 496 brelse(req->pr_bitmap_bh);
@@ -527,8 +527,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
527 kunmap(req->pr_bitmap_bh->b_page); 527 kunmap(req->pr_bitmap_bh->b_page);
528 kunmap(req->pr_desc_bh->b_page); 528 kunmap(req->pr_desc_bh->b_page);
529 529
530 nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); 530 mark_buffer_dirty(req->pr_desc_bh);
531 nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); 531 mark_buffer_dirty(req->pr_bitmap_bh);
532 nilfs_mdt_mark_dirty(inode); 532 nilfs_mdt_mark_dirty(inode);
533 533
534 brelse(req->pr_bitmap_bh); 534 brelse(req->pr_bitmap_bh);
@@ -646,7 +646,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
646 unsigned long group, group_offset; 646 unsigned long group, group_offset;
647 int i, j, n, ret; 647 int i, j, n, ret;
648 648
649 for (i = 0; i < nitems; i += n) { 649 for (i = 0; i < nitems; i = j) {
650 group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset); 650 group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset);
651 ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh); 651 ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh);
652 if (ret < 0) 652 if (ret < 0)
@@ -683,8 +683,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
683 kunmap(bitmap_bh->b_page); 683 kunmap(bitmap_bh->b_page);
684 kunmap(desc_bh->b_page); 684 kunmap(desc_bh->b_page);
685 685
686 nilfs_mdt_mark_buffer_dirty(desc_bh); 686 mark_buffer_dirty(desc_bh);
687 nilfs_mdt_mark_buffer_dirty(bitmap_bh); 687 mark_buffer_dirty(bitmap_bh);
688 nilfs_mdt_mark_dirty(inode); 688 nilfs_mdt_mark_dirty(inode);
689 689
690 brelse(bitmap_bh); 690 brelse(bitmap_bh);
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 4723f04e9b12..aadbd0b5e3e8 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -34,7 +34,9 @@
34 34
35struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) 35struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
36{ 36{
37 return NILFS_I_NILFS(bmap->b_inode)->ns_dat; 37 struct the_nilfs *nilfs = bmap->b_inode->i_sb->s_fs_info;
38
39 return nilfs->ns_dat;
38} 40}
39 41
40static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap, 42static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap,
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 609cd223eea8..a35ae35e6932 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -34,12 +34,6 @@
34#include "page.h" 34#include "page.h"
35#include "btnode.h" 35#include "btnode.h"
36 36
37void nilfs_btnode_cache_init(struct address_space *btnc,
38 struct backing_dev_info *bdi)
39{
40 nilfs_mapping_init(btnc, bdi);
41}
42
43void nilfs_btnode_cache_clear(struct address_space *btnc) 37void nilfs_btnode_cache_clear(struct address_space *btnc)
44{ 38{
45 invalidate_mapping_pages(btnc, 0, -1); 39 invalidate_mapping_pages(btnc, 0, -1);
@@ -62,7 +56,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
62 BUG(); 56 BUG();
63 } 57 }
64 memset(bh->b_data, 0, 1 << inode->i_blkbits); 58 memset(bh->b_data, 0, 1 << inode->i_blkbits);
65 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 59 bh->b_bdev = inode->i_sb->s_bdev;
66 bh->b_blocknr = blocknr; 60 bh->b_blocknr = blocknr;
67 set_buffer_mapped(bh); 61 set_buffer_mapped(bh);
68 set_buffer_uptodate(bh); 62 set_buffer_uptodate(bh);
@@ -94,10 +88,11 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
94 if (pblocknr == 0) { 88 if (pblocknr == 0) {
95 pblocknr = blocknr; 89 pblocknr = blocknr;
96 if (inode->i_ino != NILFS_DAT_INO) { 90 if (inode->i_ino != NILFS_DAT_INO) {
97 struct inode *dat = NILFS_I_NILFS(inode)->ns_dat; 91 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
98 92
99 /* blocknr is a virtual block number */ 93 /* blocknr is a virtual block number */
100 err = nilfs_dat_translate(dat, blocknr, &pblocknr); 94 err = nilfs_dat_translate(nilfs->ns_dat, blocknr,
95 &pblocknr);
101 if (unlikely(err)) { 96 if (unlikely(err)) {
102 brelse(bh); 97 brelse(bh);
103 goto out_locked; 98 goto out_locked;
@@ -120,7 +115,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
120 goto found; 115 goto found;
121 } 116 }
122 set_buffer_mapped(bh); 117 set_buffer_mapped(bh);
123 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 118 bh->b_bdev = inode->i_sb->s_bdev;
124 bh->b_blocknr = pblocknr; /* set block address for read */ 119 bh->b_blocknr = pblocknr; /* set block address for read */
125 bh->b_end_io = end_buffer_read_sync; 120 bh->b_end_io = end_buffer_read_sync;
126 get_bh(bh); 121 get_bh(bh);
@@ -259,7 +254,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
259 "invalid oldkey %lld (newkey=%lld)", 254 "invalid oldkey %lld (newkey=%lld)",
260 (unsigned long long)oldkey, 255 (unsigned long long)oldkey,
261 (unsigned long long)newkey); 256 (unsigned long long)newkey);
262 nilfs_btnode_mark_dirty(obh); 257 mark_buffer_dirty(obh);
263 258
264 spin_lock_irq(&btnc->tree_lock); 259 spin_lock_irq(&btnc->tree_lock);
265 radix_tree_delete(&btnc->page_tree, oldkey); 260 radix_tree_delete(&btnc->page_tree, oldkey);
@@ -271,7 +266,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
271 unlock_page(opage); 266 unlock_page(opage);
272 } else { 267 } else {
273 nilfs_copy_buffer(nbh, obh); 268 nilfs_copy_buffer(nbh, obh);
274 nilfs_btnode_mark_dirty(nbh); 269 mark_buffer_dirty(nbh);
275 270
276 nbh->b_blocknr = newkey; 271 nbh->b_blocknr = newkey;
277 ctxt->bh = nbh; 272 ctxt->bh = nbh;
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 1b8ebd888c28..3a4dd2d8d3fc 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
37 struct buffer_head *newbh; 37 struct buffer_head *newbh;
38}; 38};
39 39
40void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
41void nilfs_btnode_cache_clear(struct address_space *); 40void nilfs_btnode_cache_clear(struct address_space *);
42struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, 41struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
43 __u64 blocknr); 42 __u64 blocknr);
@@ -51,7 +50,4 @@ void nilfs_btnode_commit_change_key(struct address_space *,
51void nilfs_btnode_abort_change_key(struct address_space *, 50void nilfs_btnode_abort_change_key(struct address_space *,
52 struct nilfs_btnode_chkey_ctxt *); 51 struct nilfs_btnode_chkey_ctxt *);
53 52
54#define nilfs_btnode_mark_dirty(bh) nilfs_mark_buffer_dirty(bh)
55
56
57#endif /* _NILFS_BTNODE_H */ 53#endif /* _NILFS_BTNODE_H */
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index d451ae0e0bf3..7eafe468a29c 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -714,7 +714,7 @@ static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
714 nilfs_btree_get_nonroot_node(path, level), 714 nilfs_btree_get_nonroot_node(path, level),
715 path[level].bp_index, key); 715 path[level].bp_index, key);
716 if (!buffer_dirty(path[level].bp_bh)) 716 if (!buffer_dirty(path[level].bp_bh))
717 nilfs_btnode_mark_dirty(path[level].bp_bh); 717 mark_buffer_dirty(path[level].bp_bh);
718 } while ((path[level].bp_index == 0) && 718 } while ((path[level].bp_index == 0) &&
719 (++level < nilfs_btree_height(btree) - 1)); 719 (++level < nilfs_btree_height(btree) - 1));
720 } 720 }
@@ -739,7 +739,7 @@ static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
739 nilfs_btree_node_insert(node, path[level].bp_index, 739 nilfs_btree_node_insert(node, path[level].bp_index,
740 *keyp, *ptrp, ncblk); 740 *keyp, *ptrp, ncblk);
741 if (!buffer_dirty(path[level].bp_bh)) 741 if (!buffer_dirty(path[level].bp_bh))
742 nilfs_btnode_mark_dirty(path[level].bp_bh); 742 mark_buffer_dirty(path[level].bp_bh);
743 743
744 if (path[level].bp_index == 0) 744 if (path[level].bp_index == 0)
745 nilfs_btree_promote_key(btree, path, level + 1, 745 nilfs_btree_promote_key(btree, path, level + 1,
@@ -777,9 +777,9 @@ static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
777 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 777 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
778 778
779 if (!buffer_dirty(path[level].bp_bh)) 779 if (!buffer_dirty(path[level].bp_bh))
780 nilfs_btnode_mark_dirty(path[level].bp_bh); 780 mark_buffer_dirty(path[level].bp_bh);
781 if (!buffer_dirty(path[level].bp_sib_bh)) 781 if (!buffer_dirty(path[level].bp_sib_bh))
782 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 782 mark_buffer_dirty(path[level].bp_sib_bh);
783 783
784 nilfs_btree_promote_key(btree, path, level + 1, 784 nilfs_btree_promote_key(btree, path, level + 1,
785 nilfs_btree_node_get_key(node, 0)); 785 nilfs_btree_node_get_key(node, 0));
@@ -823,9 +823,9 @@ static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
823 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 823 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
824 824
825 if (!buffer_dirty(path[level].bp_bh)) 825 if (!buffer_dirty(path[level].bp_bh))
826 nilfs_btnode_mark_dirty(path[level].bp_bh); 826 mark_buffer_dirty(path[level].bp_bh);
827 if (!buffer_dirty(path[level].bp_sib_bh)) 827 if (!buffer_dirty(path[level].bp_sib_bh))
828 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 828 mark_buffer_dirty(path[level].bp_sib_bh);
829 829
830 path[level + 1].bp_index++; 830 path[level + 1].bp_index++;
831 nilfs_btree_promote_key(btree, path, level + 1, 831 nilfs_btree_promote_key(btree, path, level + 1,
@@ -870,9 +870,9 @@ static void nilfs_btree_split(struct nilfs_bmap *btree,
870 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 870 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
871 871
872 if (!buffer_dirty(path[level].bp_bh)) 872 if (!buffer_dirty(path[level].bp_bh))
873 nilfs_btnode_mark_dirty(path[level].bp_bh); 873 mark_buffer_dirty(path[level].bp_bh);
874 if (!buffer_dirty(path[level].bp_sib_bh)) 874 if (!buffer_dirty(path[level].bp_sib_bh))
875 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 875 mark_buffer_dirty(path[level].bp_sib_bh);
876 876
877 newkey = nilfs_btree_node_get_key(right, 0); 877 newkey = nilfs_btree_node_get_key(right, 0);
878 newptr = path[level].bp_newreq.bpr_ptr; 878 newptr = path[level].bp_newreq.bpr_ptr;
@@ -919,7 +919,7 @@ static void nilfs_btree_grow(struct nilfs_bmap *btree,
919 nilfs_btree_node_set_level(root, level + 1); 919 nilfs_btree_node_set_level(root, level + 1);
920 920
921 if (!buffer_dirty(path[level].bp_sib_bh)) 921 if (!buffer_dirty(path[level].bp_sib_bh))
922 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 922 mark_buffer_dirty(path[level].bp_sib_bh);
923 923
924 path[level].bp_bh = path[level].bp_sib_bh; 924 path[level].bp_bh = path[level].bp_sib_bh;
925 path[level].bp_sib_bh = NULL; 925 path[level].bp_sib_bh = NULL;
@@ -1194,7 +1194,7 @@ static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1194 nilfs_btree_node_delete(node, path[level].bp_index, 1194 nilfs_btree_node_delete(node, path[level].bp_index,
1195 keyp, ptrp, ncblk); 1195 keyp, ptrp, ncblk);
1196 if (!buffer_dirty(path[level].bp_bh)) 1196 if (!buffer_dirty(path[level].bp_bh))
1197 nilfs_btnode_mark_dirty(path[level].bp_bh); 1197 mark_buffer_dirty(path[level].bp_bh);
1198 if (path[level].bp_index == 0) 1198 if (path[level].bp_index == 0)
1199 nilfs_btree_promote_key(btree, path, level + 1, 1199 nilfs_btree_promote_key(btree, path, level + 1,
1200 nilfs_btree_node_get_key(node, 0)); 1200 nilfs_btree_node_get_key(node, 0));
@@ -1226,9 +1226,9 @@ static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1226 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); 1226 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1227 1227
1228 if (!buffer_dirty(path[level].bp_bh)) 1228 if (!buffer_dirty(path[level].bp_bh))
1229 nilfs_btnode_mark_dirty(path[level].bp_bh); 1229 mark_buffer_dirty(path[level].bp_bh);
1230 if (!buffer_dirty(path[level].bp_sib_bh)) 1230 if (!buffer_dirty(path[level].bp_sib_bh))
1231 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1231 mark_buffer_dirty(path[level].bp_sib_bh);
1232 1232
1233 nilfs_btree_promote_key(btree, path, level + 1, 1233 nilfs_btree_promote_key(btree, path, level + 1,
1234 nilfs_btree_node_get_key(node, 0)); 1234 nilfs_btree_node_get_key(node, 0));
@@ -1258,9 +1258,9 @@ static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1258 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1258 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1259 1259
1260 if (!buffer_dirty(path[level].bp_bh)) 1260 if (!buffer_dirty(path[level].bp_bh))
1261 nilfs_btnode_mark_dirty(path[level].bp_bh); 1261 mark_buffer_dirty(path[level].bp_bh);
1262 if (!buffer_dirty(path[level].bp_sib_bh)) 1262 if (!buffer_dirty(path[level].bp_sib_bh))
1263 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1263 mark_buffer_dirty(path[level].bp_sib_bh);
1264 1264
1265 path[level + 1].bp_index++; 1265 path[level + 1].bp_index++;
1266 nilfs_btree_promote_key(btree, path, level + 1, 1266 nilfs_btree_promote_key(btree, path, level + 1,
@@ -1289,7 +1289,7 @@ static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1289 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 1289 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1290 1290
1291 if (!buffer_dirty(path[level].bp_sib_bh)) 1291 if (!buffer_dirty(path[level].bp_sib_bh))
1292 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1292 mark_buffer_dirty(path[level].bp_sib_bh);
1293 1293
1294 nilfs_btnode_delete(path[level].bp_bh); 1294 nilfs_btnode_delete(path[level].bp_bh);
1295 path[level].bp_bh = path[level].bp_sib_bh; 1295 path[level].bp_bh = path[level].bp_sib_bh;
@@ -1315,7 +1315,7 @@ static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1315 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1315 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1316 1316
1317 if (!buffer_dirty(path[level].bp_bh)) 1317 if (!buffer_dirty(path[level].bp_bh))
1318 nilfs_btnode_mark_dirty(path[level].bp_bh); 1318 mark_buffer_dirty(path[level].bp_bh);
1319 1319
1320 nilfs_btnode_delete(path[level].bp_sib_bh); 1320 nilfs_btnode_delete(path[level].bp_sib_bh);
1321 path[level].bp_sib_bh = NULL; 1321 path[level].bp_sib_bh = NULL;
@@ -1709,7 +1709,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1709 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); 1709 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1710 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); 1710 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1711 if (!buffer_dirty(bh)) 1711 if (!buffer_dirty(bh))
1712 nilfs_btnode_mark_dirty(bh); 1712 mark_buffer_dirty(bh);
1713 if (!nilfs_bmap_dirty(btree)) 1713 if (!nilfs_bmap_dirty(btree))
1714 nilfs_bmap_set_dirty(btree); 1714 nilfs_bmap_set_dirty(btree);
1715 1715
@@ -1787,7 +1787,7 @@ static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1787{ 1787{
1788 while ((++level < nilfs_btree_height(btree) - 1) && 1788 while ((++level < nilfs_btree_height(btree) - 1) &&
1789 !buffer_dirty(path[level].bp_bh)) 1789 !buffer_dirty(path[level].bp_bh))
1790 nilfs_btnode_mark_dirty(path[level].bp_bh); 1790 mark_buffer_dirty(path[level].bp_bh);
1791 1791
1792 return 0; 1792 return 0;
1793} 1793}
@@ -2229,7 +2229,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2229 } 2229 }
2230 2230
2231 if (!buffer_dirty(bh)) 2231 if (!buffer_dirty(bh))
2232 nilfs_btnode_mark_dirty(bh); 2232 mark_buffer_dirty(bh);
2233 brelse(bh); 2233 brelse(bh);
2234 if (!nilfs_bmap_dirty(btree)) 2234 if (!nilfs_bmap_dirty(btree))
2235 nilfs_bmap_set_dirty(btree); 2235 nilfs_bmap_set_dirty(btree);
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 5ff15a8a1024..c9b342c8b503 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -216,14 +216,14 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
216 if (!nilfs_cpfile_is_in_first(cpfile, cno)) 216 if (!nilfs_cpfile_is_in_first(cpfile, cno))
217 nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh, 217 nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh,
218 kaddr, 1); 218 kaddr, 1);
219 nilfs_mdt_mark_buffer_dirty(cp_bh); 219 mark_buffer_dirty(cp_bh);
220 220
221 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 221 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
222 header = nilfs_cpfile_block_get_header(cpfile, header_bh, 222 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
223 kaddr); 223 kaddr);
224 le64_add_cpu(&header->ch_ncheckpoints, 1); 224 le64_add_cpu(&header->ch_ncheckpoints, 1);
225 kunmap_atomic(kaddr, KM_USER0); 225 kunmap_atomic(kaddr, KM_USER0);
226 nilfs_mdt_mark_buffer_dirty(header_bh); 226 mark_buffer_dirty(header_bh);
227 nilfs_mdt_mark_dirty(cpfile); 227 nilfs_mdt_mark_dirty(cpfile);
228 } 228 }
229 229
@@ -326,7 +326,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
326 } 326 }
327 if (nicps > 0) { 327 if (nicps > 0) {
328 tnicps += nicps; 328 tnicps += nicps;
329 nilfs_mdt_mark_buffer_dirty(cp_bh); 329 mark_buffer_dirty(cp_bh);
330 nilfs_mdt_mark_dirty(cpfile); 330 nilfs_mdt_mark_dirty(cpfile);
331 if (!nilfs_cpfile_is_in_first(cpfile, cno)) { 331 if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
332 count = 332 count =
@@ -358,7 +358,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
358 header = nilfs_cpfile_block_get_header(cpfile, header_bh, 358 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
359 kaddr); 359 kaddr);
360 le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); 360 le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps);
361 nilfs_mdt_mark_buffer_dirty(header_bh); 361 mark_buffer_dirty(header_bh);
362 nilfs_mdt_mark_dirty(cpfile); 362 nilfs_mdt_mark_dirty(cpfile);
363 kunmap_atomic(kaddr, KM_USER0); 363 kunmap_atomic(kaddr, KM_USER0);
364 } 364 }
@@ -671,10 +671,10 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
671 le64_add_cpu(&header->ch_nsnapshots, 1); 671 le64_add_cpu(&header->ch_nsnapshots, 1);
672 kunmap_atomic(kaddr, KM_USER0); 672 kunmap_atomic(kaddr, KM_USER0);
673 673
674 nilfs_mdt_mark_buffer_dirty(prev_bh); 674 mark_buffer_dirty(prev_bh);
675 nilfs_mdt_mark_buffer_dirty(curr_bh); 675 mark_buffer_dirty(curr_bh);
676 nilfs_mdt_mark_buffer_dirty(cp_bh); 676 mark_buffer_dirty(cp_bh);
677 nilfs_mdt_mark_buffer_dirty(header_bh); 677 mark_buffer_dirty(header_bh);
678 nilfs_mdt_mark_dirty(cpfile); 678 nilfs_mdt_mark_dirty(cpfile);
679 679
680 brelse(prev_bh); 680 brelse(prev_bh);
@@ -774,10 +774,10 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
774 le64_add_cpu(&header->ch_nsnapshots, -1); 774 le64_add_cpu(&header->ch_nsnapshots, -1);
775 kunmap_atomic(kaddr, KM_USER0); 775 kunmap_atomic(kaddr, KM_USER0);
776 776
777 nilfs_mdt_mark_buffer_dirty(next_bh); 777 mark_buffer_dirty(next_bh);
778 nilfs_mdt_mark_buffer_dirty(prev_bh); 778 mark_buffer_dirty(prev_bh);
779 nilfs_mdt_mark_buffer_dirty(cp_bh); 779 mark_buffer_dirty(cp_bh);
780 nilfs_mdt_mark_buffer_dirty(header_bh); 780 mark_buffer_dirty(header_bh);
781 nilfs_mdt_mark_dirty(cpfile); 781 nilfs_mdt_mark_dirty(cpfile);
782 782
783 brelse(prev_bh); 783 brelse(prev_bh);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 59e5fe742f7b..fcc2f869af16 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -54,7 +54,7 @@ static int nilfs_dat_prepare_entry(struct inode *dat,
54static void nilfs_dat_commit_entry(struct inode *dat, 54static void nilfs_dat_commit_entry(struct inode *dat,
55 struct nilfs_palloc_req *req) 55 struct nilfs_palloc_req *req)
56{ 56{
57 nilfs_mdt_mark_buffer_dirty(req->pr_entry_bh); 57 mark_buffer_dirty(req->pr_entry_bh);
58 nilfs_mdt_mark_dirty(dat); 58 nilfs_mdt_mark_dirty(dat);
59 brelse(req->pr_entry_bh); 59 brelse(req->pr_entry_bh);
60} 60}
@@ -361,7 +361,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
361 entry->de_blocknr = cpu_to_le64(blocknr); 361 entry->de_blocknr = cpu_to_le64(blocknr);
362 kunmap_atomic(kaddr, KM_USER0); 362 kunmap_atomic(kaddr, KM_USER0);
363 363
364 nilfs_mdt_mark_buffer_dirty(entry_bh); 364 mark_buffer_dirty(entry_bh);
365 nilfs_mdt_mark_dirty(dat); 365 nilfs_mdt_mark_dirty(dat);
366 366
367 brelse(entry_bh); 367 brelse(entry_bh);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 397e73258631..d7eeca62febd 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -111,7 +111,6 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
111 nilfs_transaction_commit(inode->i_sb); 111 nilfs_transaction_commit(inode->i_sb);
112 112
113 mapped: 113 mapped:
114 SetPageChecked(page);
115 wait_on_page_writeback(page); 114 wait_on_page_writeback(page);
116 return VM_FAULT_LOCKED; 115 return VM_FAULT_LOCKED;
117} 116}
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 1c2a3e23f8b2..08a07a218d26 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -48,9 +48,6 @@
48#include "dat.h" 48#include "dat.h"
49#include "ifile.h" 49#include "ifile.h"
50 50
51static const struct address_space_operations def_gcinode_aops = {
52};
53
54/* 51/*
55 * nilfs_gccache_submit_read_data() - add data buffer and submit read request 52 * nilfs_gccache_submit_read_data() - add data buffer and submit read request
56 * @inode - gc inode 53 * @inode - gc inode
@@ -87,9 +84,9 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
87 goto out; 84 goto out;
88 85
89 if (pbn == 0) { 86 if (pbn == 0) {
90 struct inode *dat_inode = NILFS_I_NILFS(inode)->ns_dat; 87 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
91 /* use original dat, not gc dat. */ 88
92 err = nilfs_dat_translate(dat_inode, vbn, &pbn); 89 err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn);
93 if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */ 90 if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */
94 brelse(bh); 91 brelse(bh);
95 goto failed; 92 goto failed;
@@ -103,7 +100,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
103 } 100 }
104 101
105 if (!buffer_mapped(bh)) { 102 if (!buffer_mapped(bh)) {
106 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 103 bh->b_bdev = inode->i_sb->s_bdev;
107 set_buffer_mapped(bh); 104 set_buffer_mapped(bh);
108 } 105 }
109 bh->b_blocknr = pbn; 106 bh->b_blocknr = pbn;
@@ -160,15 +157,11 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
160 if (buffer_dirty(bh)) 157 if (buffer_dirty(bh))
161 return -EEXIST; 158 return -EEXIST;
162 159
163 if (buffer_nilfs_node(bh)) { 160 if (buffer_nilfs_node(bh) && nilfs_btree_broken_node_block(bh)) {
164 if (nilfs_btree_broken_node_block(bh)) { 161 clear_buffer_uptodate(bh);
165 clear_buffer_uptodate(bh); 162 return -EIO;
166 return -EIO;
167 }
168 nilfs_btnode_mark_dirty(bh);
169 } else {
170 nilfs_mark_buffer_dirty(bh);
171 } 163 }
164 mark_buffer_dirty(bh);
172 return 0; 165 return 0;
173} 166}
174 167
@@ -178,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode)
178 171
179 inode->i_mode = S_IFREG; 172 inode->i_mode = S_IFREG;
180 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 173 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
181 inode->i_mapping->a_ops = &def_gcinode_aops; 174 inode->i_mapping->a_ops = &empty_aops;
182 inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; 175 inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi;
183 176
184 ii->i_flags = 0; 177 ii->i_flags = 0;
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index bfc73d3a30ed..684d76300a80 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -80,7 +80,7 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
80 return ret; 80 return ret;
81 } 81 }
82 nilfs_palloc_commit_alloc_entry(ifile, &req); 82 nilfs_palloc_commit_alloc_entry(ifile, &req);
83 nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); 83 mark_buffer_dirty(req.pr_entry_bh);
84 nilfs_mdt_mark_dirty(ifile); 84 nilfs_mdt_mark_dirty(ifile);
85 *out_ino = (ino_t)req.pr_entry_nr; 85 *out_ino = (ino_t)req.pr_entry_nr;
86 *out_bh = req.pr_entry_bh; 86 *out_bh = req.pr_entry_bh;
@@ -128,7 +128,7 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
128 raw_inode->i_flags = 0; 128 raw_inode->i_flags = 0;
129 kunmap_atomic(kaddr, KM_USER0); 129 kunmap_atomic(kaddr, KM_USER0);
130 130
131 nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); 131 mark_buffer_dirty(req.pr_entry_bh);
132 brelse(req.pr_entry_bh); 132 brelse(req.pr_entry_bh);
133 133
134 nilfs_palloc_commit_free_entry(ifile, &req); 134 nilfs_palloc_commit_free_entry(ifile, &req);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index c0aa27490c02..587f18432832 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -74,14 +74,14 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
74 struct buffer_head *bh_result, int create) 74 struct buffer_head *bh_result, int create)
75{ 75{
76 struct nilfs_inode_info *ii = NILFS_I(inode); 76 struct nilfs_inode_info *ii = NILFS_I(inode);
77 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
77 __u64 blknum = 0; 78 __u64 blknum = 0;
78 int err = 0, ret; 79 int err = 0, ret;
79 struct inode *dat = NILFS_I_NILFS(inode)->ns_dat;
80 unsigned maxblocks = bh_result->b_size >> inode->i_blkbits; 80 unsigned maxblocks = bh_result->b_size >> inode->i_blkbits;
81 81
82 down_read(&NILFS_MDT(dat)->mi_sem); 82 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
83 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); 83 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
84 up_read(&NILFS_MDT(dat)->mi_sem); 84 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
85 if (ret >= 0) { /* found */ 85 if (ret >= 0) { /* found */
86 map_bh(bh_result, inode->i_sb, blknum); 86 map_bh(bh_result, inode->i_sb, blknum);
87 if (ret > 0) 87 if (ret > 0)
@@ -596,6 +596,16 @@ void nilfs_write_inode_common(struct inode *inode,
596 raw_inode->i_flags = cpu_to_le32(ii->i_flags); 596 raw_inode->i_flags = cpu_to_le32(ii->i_flags);
597 raw_inode->i_generation = cpu_to_le32(inode->i_generation); 597 raw_inode->i_generation = cpu_to_le32(inode->i_generation);
598 598
599 if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) {
600 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
601
602 /* zero-fill unused portion in the case of super root block */
603 raw_inode->i_xattr = 0;
604 raw_inode->i_pad = 0;
605 memset((void *)raw_inode + sizeof(*raw_inode), 0,
606 nilfs->ns_inode_size - sizeof(*raw_inode));
607 }
608
599 if (has_bmap) 609 if (has_bmap)
600 nilfs_bmap_write(ii->i_bmap, raw_inode); 610 nilfs_bmap_write(ii->i_bmap, raw_inode);
601 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 611 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
@@ -872,8 +882,7 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
872 return -EINVAL; /* NILFS_I_DIRTY may remain for 882 return -EINVAL; /* NILFS_I_DIRTY may remain for
873 freeing inode */ 883 freeing inode */
874 } 884 }
875 list_del(&ii->i_dirty); 885 list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
876 list_add_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
877 set_bit(NILFS_I_QUEUED, &ii->i_state); 886 set_bit(NILFS_I_QUEUED, &ii->i_state);
878 } 887 }
879 spin_unlock(&nilfs->ns_inode_lock); 888 spin_unlock(&nilfs->ns_inode_lock);
@@ -892,7 +901,7 @@ int nilfs_mark_inode_dirty(struct inode *inode)
892 return err; 901 return err;
893 } 902 }
894 nilfs_update_inode(inode, ibh); 903 nilfs_update_inode(inode, ibh);
895 nilfs_mdt_mark_buffer_dirty(ibh); 904 mark_buffer_dirty(ibh);
896 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); 905 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile);
897 brelse(ibh); 906 brelse(ibh);
898 return 0; 907 return 0;
@@ -931,7 +940,7 @@ void nilfs_dirty_inode(struct inode *inode)
931int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 940int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
932 __u64 start, __u64 len) 941 __u64 start, __u64 len)
933{ 942{
934 struct the_nilfs *nilfs = NILFS_I_NILFS(inode); 943 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
935 __u64 logical = 0, phys = 0, size = 0; 944 __u64 logical = 0, phys = 0, size = 0;
936 __u32 flags = 0; 945 __u32 flags = 0;
937 loff_t isize; 946 loff_t isize;
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index f2469ba6246b..41d6743d303c 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -698,6 +698,63 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
698 return 0; 698 return 0;
699} 699}
700 700
701static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
702 void __user *argp)
703{
704 __u64 newsize;
705 int ret = -EPERM;
706
707 if (!capable(CAP_SYS_ADMIN))
708 goto out;
709
710 ret = mnt_want_write(filp->f_path.mnt);
711 if (ret)
712 goto out;
713
714 ret = -EFAULT;
715 if (copy_from_user(&newsize, argp, sizeof(newsize)))
716 goto out_drop_write;
717
718 ret = nilfs_resize_fs(inode->i_sb, newsize);
719
720out_drop_write:
721 mnt_drop_write(filp->f_path.mnt);
722out:
723 return ret;
724}
725
726static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
727{
728 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
729 __u64 range[2];
730 __u64 minseg, maxseg;
731 unsigned long segbytes;
732 int ret = -EPERM;
733
734 if (!capable(CAP_SYS_ADMIN))
735 goto out;
736
737 ret = -EFAULT;
738 if (copy_from_user(range, argp, sizeof(__u64[2])))
739 goto out;
740
741 ret = -ERANGE;
742 if (range[1] > i_size_read(inode->i_sb->s_bdev->bd_inode))
743 goto out;
744
745 segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize;
746
747 minseg = range[0] + segbytes - 1;
748 do_div(minseg, segbytes);
749 maxseg = NILFS_SB2_OFFSET_BYTES(range[1]);
750 do_div(maxseg, segbytes);
751 maxseg--;
752
753 ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg);
754out:
755 return ret;
756}
757
701static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, 758static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
702 unsigned int cmd, void __user *argp, 759 unsigned int cmd, void __user *argp,
703 size_t membsz, 760 size_t membsz,
@@ -763,6 +820,10 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
763 return nilfs_ioctl_clean_segments(inode, filp, cmd, argp); 820 return nilfs_ioctl_clean_segments(inode, filp, cmd, argp);
764 case NILFS_IOCTL_SYNC: 821 case NILFS_IOCTL_SYNC:
765 return nilfs_ioctl_sync(inode, filp, cmd, argp); 822 return nilfs_ioctl_sync(inode, filp, cmd, argp);
823 case NILFS_IOCTL_RESIZE:
824 return nilfs_ioctl_resize(inode, filp, argp);
825 case NILFS_IOCTL_SET_ALLOC_RANGE:
826 return nilfs_ioctl_set_alloc_range(inode, argp);
766 default: 827 default:
767 return -ENOTTY; 828 return -ENOTTY;
768 } 829 }
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index a649b05f7069..800e8d78a83b 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -66,7 +66,7 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
66 kunmap_atomic(kaddr, KM_USER0); 66 kunmap_atomic(kaddr, KM_USER0);
67 67
68 set_buffer_uptodate(bh); 68 set_buffer_uptodate(bh);
69 nilfs_mark_buffer_dirty(bh); 69 mark_buffer_dirty(bh);
70 nilfs_mdt_mark_dirty(inode); 70 nilfs_mdt_mark_dirty(inode);
71 return 0; 71 return 0;
72} 72}
@@ -355,7 +355,7 @@ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block)
355 err = nilfs_mdt_read_block(inode, block, 0, &bh); 355 err = nilfs_mdt_read_block(inode, block, 0, &bh);
356 if (unlikely(err)) 356 if (unlikely(err))
357 return err; 357 return err;
358 nilfs_mark_buffer_dirty(bh); 358 mark_buffer_dirty(bh);
359 nilfs_mdt_mark_dirty(inode); 359 nilfs_mdt_mark_dirty(inode);
360 brelse(bh); 360 brelse(bh);
361 return 0; 361 return 0;
@@ -450,9 +450,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
450 450
451 INIT_LIST_HEAD(&shadow->frozen_buffers); 451 INIT_LIST_HEAD(&shadow->frozen_buffers);
452 address_space_init_once(&shadow->frozen_data); 452 address_space_init_once(&shadow->frozen_data);
453 nilfs_mapping_init(&shadow->frozen_data, bdi); 453 nilfs_mapping_init(&shadow->frozen_data, inode, bdi);
454 address_space_init_once(&shadow->frozen_btnodes); 454 address_space_init_once(&shadow->frozen_btnodes);
455 nilfs_mapping_init(&shadow->frozen_btnodes, bdi); 455 nilfs_mapping_init(&shadow->frozen_btnodes, inode, bdi);
456 mi->mi_shadow = shadow; 456 mi->mi_shadow = shadow;
457 return 0; 457 return 0;
458} 458}
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index ed68563ec708..ab20a4baa50f 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -64,11 +64,6 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
64 return inode->i_private; 64 return inode->i_private;
65} 65}
66 66
67static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode)
68{
69 return inode->i_sb->s_fs_info;
70}
71
72/* Default GFP flags using highmem */ 67/* Default GFP flags using highmem */
73#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM) 68#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM)
74 69
@@ -93,8 +88,6 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh);
93struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode, 88struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode,
94 struct buffer_head *bh); 89 struct buffer_head *bh);
95 90
96#define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh)
97
98static inline void nilfs_mdt_mark_dirty(struct inode *inode) 91static inline void nilfs_mdt_mark_dirty(struct inode *inode)
99{ 92{
100 if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state)) 93 if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state))
@@ -108,7 +101,7 @@ static inline void nilfs_mdt_clear_dirty(struct inode *inode)
108 101
109static inline __u64 nilfs_mdt_cno(struct inode *inode) 102static inline __u64 nilfs_mdt_cno(struct inode *inode)
110{ 103{
111 return NILFS_I_NILFS(inode)->ns_cno; 104 return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno;
112} 105}
113 106
114#define nilfs_mdt_bgl_lock(inode, bg) \ 107#define nilfs_mdt_bgl_lock(inode, bg) \
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index a8dd344303cb..a9c6a531f80c 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -80,12 +80,6 @@ static inline struct inode *NILFS_BTNC_I(struct address_space *btnc)
80 return &ii->vfs_inode; 80 return &ii->vfs_inode;
81} 81}
82 82
83static inline struct inode *NILFS_AS_I(struct address_space *mapping)
84{
85 return (mapping->host) ? :
86 container_of(mapping, struct inode, i_data);
87}
88
89/* 83/*
90 * Dynamic state flags of NILFS on-memory inode (i_state) 84 * Dynamic state flags of NILFS on-memory inode (i_state)
91 */ 85 */
@@ -298,6 +292,7 @@ struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
298 int flip); 292 int flip);
299int nilfs_commit_super(struct super_block *sb, int flag); 293int nilfs_commit_super(struct super_block *sb, int flag);
300int nilfs_cleanup_super(struct super_block *sb); 294int nilfs_cleanup_super(struct super_block *sb);
295int nilfs_resize_fs(struct super_block *sb, __u64 newsize);
301int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, 296int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
302 struct nilfs_root **root); 297 struct nilfs_root **root);
303int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno); 298int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 1168059c7efd..65221a04c6f0 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -37,8 +37,7 @@
37 37
38#define NILFS_BUFFER_INHERENT_BITS \ 38#define NILFS_BUFFER_INHERENT_BITS \
39 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ 39 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
40 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \ 40 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked))
41 (1UL << BH_NILFS_Checked))
42 41
43static struct buffer_head * 42static struct buffer_head *
44__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, 43__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
@@ -59,19 +58,6 @@ __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
59 return bh; 58 return bh;
60} 59}
61 60
62/*
63 * Since the page cache of B-tree node pages or data page cache of pseudo
64 * inodes does not have a valid mapping->host pointer, calling
65 * mark_buffer_dirty() for their buffers causes a NULL pointer dereference;
66 * it calls __mark_inode_dirty(NULL) through __set_page_dirty().
67 * To avoid this problem, the old style mark_buffer_dirty() is used instead.
68 */
69void nilfs_mark_buffer_dirty(struct buffer_head *bh)
70{
71 if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
72 __set_page_dirty_nobuffers(bh->b_page);
73}
74
75struct buffer_head *nilfs_grab_buffer(struct inode *inode, 61struct buffer_head *nilfs_grab_buffer(struct inode *inode,
76 struct address_space *mapping, 62 struct address_space *mapping,
77 unsigned long blkoff, 63 unsigned long blkoff,
@@ -183,7 +169,7 @@ int nilfs_page_buffers_clean(struct page *page)
183void nilfs_page_bug(struct page *page) 169void nilfs_page_bug(struct page *page)
184{ 170{
185 struct address_space *m; 171 struct address_space *m;
186 unsigned long ino = 0; 172 unsigned long ino;
187 173
188 if (unlikely(!page)) { 174 if (unlikely(!page)) {
189 printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); 175 printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
@@ -191,11 +177,8 @@ void nilfs_page_bug(struct page *page)
191 } 177 }
192 178
193 m = page->mapping; 179 m = page->mapping;
194 if (m) { 180 ino = m ? m->host->i_ino : 0;
195 struct inode *inode = NILFS_AS_I(m); 181
196 if (inode != NULL)
197 ino = inode->i_ino;
198 }
199 printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " 182 printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
200 "mapping=%p ino=%lu\n", 183 "mapping=%p ino=%lu\n",
201 page, atomic_read(&page->_count), 184 page, atomic_read(&page->_count),
@@ -217,56 +200,6 @@ void nilfs_page_bug(struct page *page)
217} 200}
218 201
219/** 202/**
220 * nilfs_alloc_private_page - allocate a private page with buffer heads
221 *
222 * Return Value: On success, a pointer to the allocated page is returned.
223 * On error, NULL is returned.
224 */
225struct page *nilfs_alloc_private_page(struct block_device *bdev, int size,
226 unsigned long state)
227{
228 struct buffer_head *bh, *head, *tail;
229 struct page *page;
230
231 page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */
232 if (unlikely(!page))
233 return NULL;
234
235 lock_page(page);
236 head = alloc_page_buffers(page, size, 0);
237 if (unlikely(!head)) {
238 unlock_page(page);
239 __free_page(page);
240 return NULL;
241 }
242
243 bh = head;
244 do {
245 bh->b_state = (1UL << BH_NILFS_Allocated) | state;
246 tail = bh;
247 bh->b_bdev = bdev;
248 bh = bh->b_this_page;
249 } while (bh);
250
251 tail->b_this_page = head;
252 attach_page_buffers(page, head);
253
254 return page;
255}
256
257void nilfs_free_private_page(struct page *page)
258{
259 BUG_ON(!PageLocked(page));
260 BUG_ON(page->mapping);
261
262 if (page_has_buffers(page) && !try_to_free_buffers(page))
263 NILFS_PAGE_BUG(page, "failed to free page");
264
265 unlock_page(page);
266 __free_page(page);
267}
268
269/**
270 * nilfs_copy_page -- copy the page with buffers 203 * nilfs_copy_page -- copy the page with buffers
271 * @dst: destination page 204 * @dst: destination page
272 * @src: source page 205 * @src: source page
@@ -492,10 +425,10 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
492 return nc; 425 return nc;
493} 426}
494 427
495void nilfs_mapping_init(struct address_space *mapping, 428void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
496 struct backing_dev_info *bdi) 429 struct backing_dev_info *bdi)
497{ 430{
498 mapping->host = NULL; 431 mapping->host = inode;
499 mapping->flags = 0; 432 mapping->flags = 0;
500 mapping_set_gfp_mask(mapping, GFP_NOFS); 433 mapping_set_gfp_mask(mapping, GFP_NOFS);
501 mapping->assoc_mapping = NULL; 434 mapping->assoc_mapping = NULL;
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index f06b79ad7493..fb7de71605a0 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -38,14 +38,12 @@ enum {
38 BH_NILFS_Redirected, 38 BH_NILFS_Redirected,
39}; 39};
40 40
41BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */
42BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ 41BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */
43BUFFER_FNS(NILFS_Volatile, nilfs_volatile) 42BUFFER_FNS(NILFS_Volatile, nilfs_volatile)
44BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */ 43BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */
45BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */ 44BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */
46 45
47 46
48void nilfs_mark_buffer_dirty(struct buffer_head *bh);
49int __nilfs_clear_page_dirty(struct page *); 47int __nilfs_clear_page_dirty(struct page *);
50 48
51struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *, 49struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *,
@@ -54,14 +52,11 @@ void nilfs_forget_buffer(struct buffer_head *);
54void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *); 52void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *);
55int nilfs_page_buffers_clean(struct page *); 53int nilfs_page_buffers_clean(struct page *);
56void nilfs_page_bug(struct page *); 54void nilfs_page_bug(struct page *);
57struct page *nilfs_alloc_private_page(struct block_device *, int,
58 unsigned long);
59void nilfs_free_private_page(struct page *);
60 55
61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); 56int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
62void nilfs_copy_back_pages(struct address_space *, struct address_space *); 57void nilfs_copy_back_pages(struct address_space *, struct address_space *);
63void nilfs_clear_dirty_pages(struct address_space *); 58void nilfs_clear_dirty_pages(struct address_space *);
64void nilfs_mapping_init(struct address_space *mapping, 59void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
65 struct backing_dev_info *bdi); 60 struct backing_dev_info *bdi);
66unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); 61unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
67unsigned long nilfs_find_uncommitted_extent(struct inode *inode, 62unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index ba4a64518f38..a604ac0331b2 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -387,9 +387,9 @@ static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr,
387static void dispose_recovery_list(struct list_head *head) 387static void dispose_recovery_list(struct list_head *head)
388{ 388{
389 while (!list_empty(head)) { 389 while (!list_empty(head)) {
390 struct nilfs_recovery_block *rb 390 struct nilfs_recovery_block *rb;
391 = list_entry(head->next, 391
392 struct nilfs_recovery_block, list); 392 rb = list_first_entry(head, struct nilfs_recovery_block, list);
393 list_del(&rb->list); 393 list_del(&rb->list);
394 kfree(rb); 394 kfree(rb);
395 } 395 }
@@ -416,9 +416,9 @@ static int nilfs_segment_list_add(struct list_head *head, __u64 segnum)
416void nilfs_dispose_segment_list(struct list_head *head) 416void nilfs_dispose_segment_list(struct list_head *head)
417{ 417{
418 while (!list_empty(head)) { 418 while (!list_empty(head)) {
419 struct nilfs_segment_entry *ent 419 struct nilfs_segment_entry *ent;
420 = list_entry(head->next, 420
421 struct nilfs_segment_entry, list); 421 ent = list_first_entry(head, struct nilfs_segment_entry, list);
422 list_del(&ent->list); 422 list_del(&ent->list);
423 kfree(ent); 423 kfree(ent);
424 } 424 }
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 2853ff20f85a..850a7c0228fb 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -239,12 +239,15 @@ nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf,
239 u32 seed) 239 u32 seed)
240{ 240{
241 struct nilfs_super_root *raw_sr; 241 struct nilfs_super_root *raw_sr;
242 struct the_nilfs *nilfs = segbuf->sb_super->s_fs_info;
243 unsigned srsize;
242 u32 crc; 244 u32 crc;
243 245
244 raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data; 246 raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data;
247 srsize = NILFS_SR_BYTES(nilfs->ns_inode_size);
245 crc = crc32_le(seed, 248 crc = crc32_le(seed,
246 (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum), 249 (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
247 NILFS_SR_BYTES - sizeof(raw_sr->sr_sum)); 250 srsize - sizeof(raw_sr->sr_sum));
248 raw_sr->sr_sum = cpu_to_le32(crc); 251 raw_sr->sr_sum = cpu_to_le32(crc);
249} 252}
250 253
@@ -254,18 +257,6 @@ static void nilfs_release_buffers(struct list_head *list)
254 257
255 list_for_each_entry_safe(bh, n, list, b_assoc_buffers) { 258 list_for_each_entry_safe(bh, n, list, b_assoc_buffers) {
256 list_del_init(&bh->b_assoc_buffers); 259 list_del_init(&bh->b_assoc_buffers);
257 if (buffer_nilfs_allocated(bh)) {
258 struct page *clone_page = bh->b_page;
259
260 /* remove clone page */
261 brelse(bh);
262 page_cache_release(clone_page); /* for each bh */
263 if (page_count(clone_page) <= 2) {
264 lock_page(clone_page);
265 nilfs_free_private_page(clone_page);
266 }
267 continue;
268 }
269 brelse(bh); 260 brelse(bh);
270 } 261 }
271} 262}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index afe4f2183454..141646e88fb5 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -655,13 +655,10 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
655 if (unlikely(page->index > last)) 655 if (unlikely(page->index > last))
656 break; 656 break;
657 657
658 if (mapping->host) { 658 lock_page(page);
659 lock_page(page); 659 if (!page_has_buffers(page))
660 if (!page_has_buffers(page)) 660 create_empty_buffers(page, 1 << inode->i_blkbits, 0);
661 create_empty_buffers(page, 661 unlock_page(page);
662 1 << inode->i_blkbits, 0);
663 unlock_page(page);
664 }
665 662
666 bh = head = page_buffers(page); 663 bh = head = page_buffers(page);
667 do { 664 do {
@@ -809,7 +806,7 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
809 /* The following code is duplicated with cpfile. But, it is 806 /* The following code is duplicated with cpfile. But, it is
810 needed to collect the checkpoint even if it was not newly 807 needed to collect the checkpoint even if it was not newly
811 created */ 808 created */
812 nilfs_mdt_mark_buffer_dirty(bh_cp); 809 mark_buffer_dirty(bh_cp);
813 nilfs_mdt_mark_dirty(nilfs->ns_cpfile); 810 nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
814 nilfs_cpfile_put_checkpoint( 811 nilfs_cpfile_put_checkpoint(
815 nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); 812 nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
@@ -889,12 +886,14 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
889{ 886{
890 struct buffer_head *bh_sr; 887 struct buffer_head *bh_sr;
891 struct nilfs_super_root *raw_sr; 888 struct nilfs_super_root *raw_sr;
892 unsigned isz = nilfs->ns_inode_size; 889 unsigned isz, srsz;
893 890
894 bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root; 891 bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
895 raw_sr = (struct nilfs_super_root *)bh_sr->b_data; 892 raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
893 isz = nilfs->ns_inode_size;
894 srsz = NILFS_SR_BYTES(isz);
896 895
897 raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES); 896 raw_sr->sr_bytes = cpu_to_le16(srsz);
898 raw_sr->sr_nongc_ctime 897 raw_sr->sr_nongc_ctime
899 = cpu_to_le64(nilfs_doing_gc() ? 898 = cpu_to_le64(nilfs_doing_gc() ?
900 nilfs->ns_nongc_ctime : sci->sc_seg_ctime); 899 nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
@@ -906,6 +905,7 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
906 NILFS_SR_CPFILE_OFFSET(isz), 1); 905 NILFS_SR_CPFILE_OFFSET(isz), 1);
907 nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + 906 nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
908 NILFS_SR_SUFILE_OFFSET(isz), 1); 907 NILFS_SR_SUFILE_OFFSET(isz), 1);
908 memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
909} 909}
910 910
911static void nilfs_redirty_inodes(struct list_head *head) 911static void nilfs_redirty_inodes(struct list_head *head)
@@ -954,8 +954,8 @@ static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
954 954
955 dispose_buffers: 955 dispose_buffers:
956 while (!list_empty(listp)) { 956 while (!list_empty(listp)) {
957 bh = list_entry(listp->next, struct buffer_head, 957 bh = list_first_entry(listp, struct buffer_head,
958 b_assoc_buffers); 958 b_assoc_buffers);
959 list_del_init(&bh->b_assoc_buffers); 959 list_del_init(&bh->b_assoc_buffers);
960 brelse(bh); 960 brelse(bh);
961 } 961 }
@@ -1500,10 +1500,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
1500 nblocks = le32_to_cpu(finfo->fi_nblocks); 1500 nblocks = le32_to_cpu(finfo->fi_nblocks);
1501 ndatablk = le32_to_cpu(finfo->fi_ndatablk); 1501 ndatablk = le32_to_cpu(finfo->fi_ndatablk);
1502 1502
1503 if (buffer_nilfs_node(bh)) 1503 inode = bh->b_page->mapping->host;
1504 inode = NILFS_BTNC_I(bh->b_page->mapping);
1505 else
1506 inode = NILFS_AS_I(bh->b_page->mapping);
1507 1504
1508 if (mode == SC_LSEG_DSYNC) 1505 if (mode == SC_LSEG_DSYNC)
1509 sc_op = &nilfs_sc_dsync_ops; 1506 sc_op = &nilfs_sc_dsync_ops;
@@ -1556,83 +1553,24 @@ static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode)
1556 return 0; 1553 return 0;
1557} 1554}
1558 1555
1559static int 1556static void nilfs_begin_page_io(struct page *page)
1560nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out)
1561{
1562 struct page *clone_page;
1563 struct buffer_head *bh, *head, *bh2;
1564 void *kaddr;
1565
1566 bh = head = page_buffers(page);
1567
1568 clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0);
1569 if (unlikely(!clone_page))
1570 return -ENOMEM;
1571
1572 bh2 = page_buffers(clone_page);
1573 kaddr = kmap_atomic(page, KM_USER0);
1574 do {
1575 if (list_empty(&bh->b_assoc_buffers))
1576 continue;
1577 get_bh(bh2);
1578 page_cache_get(clone_page); /* for each bh */
1579 memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size);
1580 bh2->b_blocknr = bh->b_blocknr;
1581 list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers);
1582 list_add_tail(&bh->b_assoc_buffers, out);
1583 } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head);
1584 kunmap_atomic(kaddr, KM_USER0);
1585
1586 if (!TestSetPageWriteback(clone_page))
1587 account_page_writeback(clone_page);
1588 unlock_page(clone_page);
1589
1590 return 0;
1591}
1592
1593static int nilfs_test_page_to_be_frozen(struct page *page)
1594{
1595 struct address_space *mapping = page->mapping;
1596
1597 if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode))
1598 return 0;
1599
1600 if (page_mapped(page)) {
1601 ClearPageChecked(page);
1602 return 1;
1603 }
1604 return PageChecked(page);
1605}
1606
1607static int nilfs_begin_page_io(struct page *page, struct list_head *out)
1608{ 1557{
1609 if (!page || PageWriteback(page)) 1558 if (!page || PageWriteback(page))
1610 /* For split b-tree node pages, this function may be called 1559 /* For split b-tree node pages, this function may be called
1611 twice. We ignore the 2nd or later calls by this check. */ 1560 twice. We ignore the 2nd or later calls by this check. */
1612 return 0; 1561 return;
1613 1562
1614 lock_page(page); 1563 lock_page(page);
1615 clear_page_dirty_for_io(page); 1564 clear_page_dirty_for_io(page);
1616 set_page_writeback(page); 1565 set_page_writeback(page);
1617 unlock_page(page); 1566 unlock_page(page);
1618
1619 if (nilfs_test_page_to_be_frozen(page)) {
1620 int err = nilfs_copy_replace_page_buffers(page, out);
1621 if (unlikely(err))
1622 return err;
1623 }
1624 return 0;
1625} 1567}
1626 1568
1627static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, 1569static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
1628 struct page **failed_page)
1629{ 1570{
1630 struct nilfs_segment_buffer *segbuf; 1571 struct nilfs_segment_buffer *segbuf;
1631 struct page *bd_page = NULL, *fs_page = NULL; 1572 struct page *bd_page = NULL, *fs_page = NULL;
1632 struct list_head *list = &sci->sc_copied_buffers;
1633 int err;
1634 1573
1635 *failed_page = NULL;
1636 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1574 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1637 struct buffer_head *bh; 1575 struct buffer_head *bh;
1638 1576
@@ -1662,11 +1600,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
1662 break; 1600 break;
1663 } 1601 }
1664 if (bh->b_page != fs_page) { 1602 if (bh->b_page != fs_page) {
1665 err = nilfs_begin_page_io(fs_page, list); 1603 nilfs_begin_page_io(fs_page);
1666 if (unlikely(err)) {
1667 *failed_page = fs_page;
1668 goto out;
1669 }
1670 fs_page = bh->b_page; 1604 fs_page = bh->b_page;
1671 } 1605 }
1672 } 1606 }
@@ -1677,11 +1611,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
1677 set_page_writeback(bd_page); 1611 set_page_writeback(bd_page);
1678 unlock_page(bd_page); 1612 unlock_page(bd_page);
1679 } 1613 }
1680 err = nilfs_begin_page_io(fs_page, list); 1614 nilfs_begin_page_io(fs_page);
1681 if (unlikely(err))
1682 *failed_page = fs_page;
1683 out:
1684 return err;
1685} 1615}
1686 1616
1687static int nilfs_segctor_write(struct nilfs_sc_info *sci, 1617static int nilfs_segctor_write(struct nilfs_sc_info *sci,
@@ -1694,24 +1624,6 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci,
1694 return ret; 1624 return ret;
1695} 1625}
1696 1626
1697static void __nilfs_end_page_io(struct page *page, int err)
1698{
1699 if (!err) {
1700 if (!nilfs_page_buffers_clean(page))
1701 __set_page_dirty_nobuffers(page);
1702 ClearPageError(page);
1703 } else {
1704 __set_page_dirty_nobuffers(page);
1705 SetPageError(page);
1706 }
1707
1708 if (buffer_nilfs_allocated(page_buffers(page))) {
1709 if (TestClearPageWriteback(page))
1710 dec_zone_page_state(page, NR_WRITEBACK);
1711 } else
1712 end_page_writeback(page);
1713}
1714
1715static void nilfs_end_page_io(struct page *page, int err) 1627static void nilfs_end_page_io(struct page *page, int err)
1716{ 1628{
1717 if (!page) 1629 if (!page)
@@ -1738,40 +1650,19 @@ static void nilfs_end_page_io(struct page *page, int err)
1738 return; 1650 return;
1739 } 1651 }
1740 1652
1741 __nilfs_end_page_io(page, err); 1653 if (!err) {
1742} 1654 if (!nilfs_page_buffers_clean(page))
1743 1655 __set_page_dirty_nobuffers(page);
1744static void nilfs_clear_copied_buffers(struct list_head *list, int err) 1656 ClearPageError(page);
1745{ 1657 } else {
1746 struct buffer_head *bh, *head; 1658 __set_page_dirty_nobuffers(page);
1747 struct page *page; 1659 SetPageError(page);
1748
1749 while (!list_empty(list)) {
1750 bh = list_entry(list->next, struct buffer_head,
1751 b_assoc_buffers);
1752 page = bh->b_page;
1753 page_cache_get(page);
1754 head = bh = page_buffers(page);
1755 do {
1756 if (!list_empty(&bh->b_assoc_buffers)) {
1757 list_del_init(&bh->b_assoc_buffers);
1758 if (!err) {
1759 set_buffer_uptodate(bh);
1760 clear_buffer_dirty(bh);
1761 clear_buffer_delay(bh);
1762 clear_buffer_nilfs_volatile(bh);
1763 }
1764 brelse(bh); /* for b_assoc_buffers */
1765 }
1766 } while ((bh = bh->b_this_page) != head);
1767
1768 __nilfs_end_page_io(page, err);
1769 page_cache_release(page);
1770 } 1660 }
1661
1662 end_page_writeback(page);
1771} 1663}
1772 1664
1773static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page, 1665static void nilfs_abort_logs(struct list_head *logs, int err)
1774 int err)
1775{ 1666{
1776 struct nilfs_segment_buffer *segbuf; 1667 struct nilfs_segment_buffer *segbuf;
1777 struct page *bd_page = NULL, *fs_page = NULL; 1668 struct page *bd_page = NULL, *fs_page = NULL;
@@ -1801,8 +1692,6 @@ static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
1801 } 1692 }
1802 if (bh->b_page != fs_page) { 1693 if (bh->b_page != fs_page) {
1803 nilfs_end_page_io(fs_page, err); 1694 nilfs_end_page_io(fs_page, err);
1804 if (fs_page && fs_page == failed_page)
1805 return;
1806 fs_page = bh->b_page; 1695 fs_page = bh->b_page;
1807 } 1696 }
1808 } 1697 }
@@ -1821,12 +1710,11 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
1821 1710
1822 list_splice_tail_init(&sci->sc_write_logs, &logs); 1711 list_splice_tail_init(&sci->sc_write_logs, &logs);
1823 ret = nilfs_wait_on_logs(&logs); 1712 ret = nilfs_wait_on_logs(&logs);
1824 nilfs_abort_logs(&logs, NULL, ret ? : err); 1713 nilfs_abort_logs(&logs, ret ? : err);
1825 1714
1826 list_splice_tail_init(&sci->sc_segbufs, &logs); 1715 list_splice_tail_init(&sci->sc_segbufs, &logs);
1827 nilfs_cancel_segusage(&logs, nilfs->ns_sufile); 1716 nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
1828 nilfs_free_incomplete_logs(&logs, nilfs); 1717 nilfs_free_incomplete_logs(&logs, nilfs);
1829 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err);
1830 1718
1831 if (sci->sc_stage.flags & NILFS_CF_SUFREED) { 1719 if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1832 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, 1720 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
@@ -1920,8 +1808,6 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1920 1808
1921 nilfs_end_page_io(fs_page, 0); 1809 nilfs_end_page_io(fs_page, 0);
1922 1810
1923 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0);
1924
1925 nilfs_drop_collected_inodes(&sci->sc_dirty_files); 1811 nilfs_drop_collected_inodes(&sci->sc_dirty_files);
1926 1812
1927 if (nilfs_doing_gc()) 1813 if (nilfs_doing_gc())
@@ -1979,7 +1865,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1979 "failed to get inode block.\n"); 1865 "failed to get inode block.\n");
1980 return err; 1866 return err;
1981 } 1867 }
1982 nilfs_mdt_mark_buffer_dirty(ibh); 1868 mark_buffer_dirty(ibh);
1983 nilfs_mdt_mark_dirty(ifile); 1869 nilfs_mdt_mark_dirty(ifile);
1984 spin_lock(&nilfs->ns_inode_lock); 1870 spin_lock(&nilfs->ns_inode_lock);
1985 if (likely(!ii->i_bh)) 1871 if (likely(!ii->i_bh))
@@ -1991,8 +1877,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1991 1877
1992 clear_bit(NILFS_I_QUEUED, &ii->i_state); 1878 clear_bit(NILFS_I_QUEUED, &ii->i_state);
1993 set_bit(NILFS_I_BUSY, &ii->i_state); 1879 set_bit(NILFS_I_BUSY, &ii->i_state);
1994 list_del(&ii->i_dirty); 1880 list_move_tail(&ii->i_dirty, &sci->sc_dirty_files);
1995 list_add_tail(&ii->i_dirty, &sci->sc_dirty_files);
1996 } 1881 }
1997 spin_unlock(&nilfs->ns_inode_lock); 1882 spin_unlock(&nilfs->ns_inode_lock);
1998 1883
@@ -2014,8 +1899,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
2014 clear_bit(NILFS_I_BUSY, &ii->i_state); 1899 clear_bit(NILFS_I_BUSY, &ii->i_state);
2015 brelse(ii->i_bh); 1900 brelse(ii->i_bh);
2016 ii->i_bh = NULL; 1901 ii->i_bh = NULL;
2017 list_del(&ii->i_dirty); 1902 list_move_tail(&ii->i_dirty, &ti->ti_garbage);
2018 list_add_tail(&ii->i_dirty, &ti->ti_garbage);
2019 } 1903 }
2020 spin_unlock(&nilfs->ns_inode_lock); 1904 spin_unlock(&nilfs->ns_inode_lock);
2021} 1905}
@@ -2026,7 +1910,6 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
2026static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) 1910static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2027{ 1911{
2028 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 1912 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2029 struct page *failed_page;
2030 int err; 1913 int err;
2031 1914
2032 sci->sc_stage.scnt = NILFS_ST_INIT; 1915 sci->sc_stage.scnt = NILFS_ST_INIT;
@@ -2081,11 +1964,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2081 nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); 1964 nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
2082 1965
2083 /* Write partial segments */ 1966 /* Write partial segments */
2084 err = nilfs_segctor_prepare_write(sci, &failed_page); 1967 nilfs_segctor_prepare_write(sci);
2085 if (err) {
2086 nilfs_abort_logs(&sci->sc_segbufs, failed_page, err);
2087 goto failed_to_write;
2088 }
2089 1968
2090 nilfs_add_checksums_on_logs(&sci->sc_segbufs, 1969 nilfs_add_checksums_on_logs(&sci->sc_segbufs,
2091 nilfs->ns_crc_seed); 1970 nilfs->ns_crc_seed);
@@ -2687,7 +2566,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
2687 INIT_LIST_HEAD(&sci->sc_segbufs); 2566 INIT_LIST_HEAD(&sci->sc_segbufs);
2688 INIT_LIST_HEAD(&sci->sc_write_logs); 2567 INIT_LIST_HEAD(&sci->sc_write_logs);
2689 INIT_LIST_HEAD(&sci->sc_gc_inodes); 2568 INIT_LIST_HEAD(&sci->sc_gc_inodes);
2690 INIT_LIST_HEAD(&sci->sc_copied_buffers);
2691 init_timer(&sci->sc_timer); 2569 init_timer(&sci->sc_timer);
2692 2570
2693 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; 2571 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
@@ -2741,8 +2619,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2741 if (flag || !nilfs_segctor_confirm(sci)) 2619 if (flag || !nilfs_segctor_confirm(sci))
2742 nilfs_segctor_write_out(sci); 2620 nilfs_segctor_write_out(sci);
2743 2621
2744 WARN_ON(!list_empty(&sci->sc_copied_buffers));
2745
2746 if (!list_empty(&sci->sc_dirty_files)) { 2622 if (!list_empty(&sci->sc_dirty_files)) {
2747 nilfs_warning(sci->sc_super, __func__, 2623 nilfs_warning(sci->sc_super, __func__,
2748 "dirty file(s) after the final construction\n"); 2624 "dirty file(s) after the final construction\n");
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 6c02a86745fb..38a1d0013314 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -92,7 +92,6 @@ struct nilfs_segsum_pointer {
92 * @sc_nblk_inc: Block count of current generation 92 * @sc_nblk_inc: Block count of current generation
93 * @sc_dirty_files: List of files to be written 93 * @sc_dirty_files: List of files to be written
94 * @sc_gc_inodes: List of GC inodes having blocks to be written 94 * @sc_gc_inodes: List of GC inodes having blocks to be written
95 * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data
96 * @sc_freesegs: array of segment numbers to be freed 95 * @sc_freesegs: array of segment numbers to be freed
97 * @sc_nfreesegs: number of segments on @sc_freesegs 96 * @sc_nfreesegs: number of segments on @sc_freesegs
98 * @sc_dsync_inode: inode whose data pages are written for a sync operation 97 * @sc_dsync_inode: inode whose data pages are written for a sync operation
@@ -136,7 +135,6 @@ struct nilfs_sc_info {
136 135
137 struct list_head sc_dirty_files; 136 struct list_head sc_dirty_files;
138 struct list_head sc_gc_inodes; 137 struct list_head sc_gc_inodes;
139 struct list_head sc_copied_buffers;
140 138
141 __u64 *sc_freesegs; 139 __u64 *sc_freesegs;
142 size_t sc_nfreesegs; 140 size_t sc_nfreesegs;
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 1d6f488ccae8..0a0aba617d8a 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -33,7 +33,9 @@
33 33
34struct nilfs_sufile_info { 34struct nilfs_sufile_info {
35 struct nilfs_mdt_info mi; 35 struct nilfs_mdt_info mi;
36 unsigned long ncleansegs; 36 unsigned long ncleansegs;/* number of clean segments */
37 __u64 allocmin; /* lower limit of allocatable segment range */
38 __u64 allocmax; /* upper limit of allocatable segment range */
37}; 39};
38 40
39static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile) 41static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile)
@@ -96,6 +98,13 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum,
96 create, NULL, bhp); 98 create, NULL, bhp);
97} 99}
98 100
101static int nilfs_sufile_delete_segment_usage_block(struct inode *sufile,
102 __u64 segnum)
103{
104 return nilfs_mdt_delete_block(sufile,
105 nilfs_sufile_get_blkoff(sufile, segnum));
106}
107
99static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, 108static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
100 u64 ncleanadd, u64 ndirtyadd) 109 u64 ncleanadd, u64 ndirtyadd)
101{ 110{
@@ -108,7 +117,7 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
108 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); 117 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
109 kunmap_atomic(kaddr, KM_USER0); 118 kunmap_atomic(kaddr, KM_USER0);
110 119
111 nilfs_mdt_mark_buffer_dirty(header_bh); 120 mark_buffer_dirty(header_bh);
112} 121}
113 122
114/** 123/**
@@ -248,6 +257,35 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
248} 257}
249 258
250/** 259/**
260 * nilfs_sufile_set_alloc_range - limit range of segment to be allocated
261 * @sufile: inode of segment usage file
262 * @start: minimum segment number of allocatable region (inclusive)
263 * @end: maximum segment number of allocatable region (inclusive)
264 *
265 * Return Value: On success, 0 is returned. On error, one of the
266 * following negative error codes is returned.
267 *
268 * %-ERANGE - invalid segment region
269 */
270int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end)
271{
272 struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
273 __u64 nsegs;
274 int ret = -ERANGE;
275
276 down_write(&NILFS_MDT(sufile)->mi_sem);
277 nsegs = nilfs_sufile_get_nsegments(sufile);
278
279 if (start <= end && end < nsegs) {
280 sui->allocmin = start;
281 sui->allocmax = end;
282 ret = 0;
283 }
284 up_write(&NILFS_MDT(sufile)->mi_sem);
285 return ret;
286}
287
288/**
251 * nilfs_sufile_alloc - allocate a segment 289 * nilfs_sufile_alloc - allocate a segment
252 * @sufile: inode of segment usage file 290 * @sufile: inode of segment usage file
253 * @segnump: pointer to segment number 291 * @segnump: pointer to segment number
@@ -269,11 +307,12 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
269 struct buffer_head *header_bh, *su_bh; 307 struct buffer_head *header_bh, *su_bh;
270 struct nilfs_sufile_header *header; 308 struct nilfs_sufile_header *header;
271 struct nilfs_segment_usage *su; 309 struct nilfs_segment_usage *su;
310 struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
272 size_t susz = NILFS_MDT(sufile)->mi_entry_size; 311 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
273 __u64 segnum, maxsegnum, last_alloc; 312 __u64 segnum, maxsegnum, last_alloc;
274 void *kaddr; 313 void *kaddr;
275 unsigned long nsegments, ncleansegs, nsus; 314 unsigned long nsegments, ncleansegs, nsus, cnt;
276 int ret, i, j; 315 int ret, j;
277 316
278 down_write(&NILFS_MDT(sufile)->mi_sem); 317 down_write(&NILFS_MDT(sufile)->mi_sem);
279 318
@@ -287,13 +326,31 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
287 kunmap_atomic(kaddr, KM_USER0); 326 kunmap_atomic(kaddr, KM_USER0);
288 327
289 nsegments = nilfs_sufile_get_nsegments(sufile); 328 nsegments = nilfs_sufile_get_nsegments(sufile);
329 maxsegnum = sui->allocmax;
290 segnum = last_alloc + 1; 330 segnum = last_alloc + 1;
291 maxsegnum = nsegments - 1; 331 if (segnum < sui->allocmin || segnum > sui->allocmax)
292 for (i = 0; i < nsegments; i += nsus) { 332 segnum = sui->allocmin;
293 if (segnum >= nsegments) { 333
294 /* wrap around */ 334 for (cnt = 0; cnt < nsegments; cnt += nsus) {
295 segnum = 0; 335 if (segnum > maxsegnum) {
296 maxsegnum = last_alloc; 336 if (cnt < sui->allocmax - sui->allocmin + 1) {
337 /*
338 * wrap around in the limited region.
339 * if allocation started from
340 * sui->allocmin, this never happens.
341 */
342 segnum = sui->allocmin;
343 maxsegnum = last_alloc;
344 } else if (segnum > sui->allocmin &&
345 sui->allocmax + 1 < nsegments) {
346 segnum = sui->allocmax + 1;
347 maxsegnum = nsegments - 1;
348 } else if (sui->allocmin > 0) {
349 segnum = 0;
350 maxsegnum = sui->allocmin - 1;
351 } else {
352 break; /* never happens */
353 }
297 } 354 }
298 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, 355 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1,
299 &su_bh); 356 &su_bh);
@@ -319,9 +376,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
319 header->sh_last_alloc = cpu_to_le64(segnum); 376 header->sh_last_alloc = cpu_to_le64(segnum);
320 kunmap_atomic(kaddr, KM_USER0); 377 kunmap_atomic(kaddr, KM_USER0);
321 378
322 NILFS_SUI(sufile)->ncleansegs--; 379 sui->ncleansegs--;
323 nilfs_mdt_mark_buffer_dirty(header_bh); 380 mark_buffer_dirty(header_bh);
324 nilfs_mdt_mark_buffer_dirty(su_bh); 381 mark_buffer_dirty(su_bh);
325 nilfs_mdt_mark_dirty(sufile); 382 nilfs_mdt_mark_dirty(sufile);
326 brelse(su_bh); 383 brelse(su_bh);
327 *segnump = segnum; 384 *segnump = segnum;
@@ -364,7 +421,7 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
364 nilfs_sufile_mod_counter(header_bh, -1, 1); 421 nilfs_sufile_mod_counter(header_bh, -1, 1);
365 NILFS_SUI(sufile)->ncleansegs--; 422 NILFS_SUI(sufile)->ncleansegs--;
366 423
367 nilfs_mdt_mark_buffer_dirty(su_bh); 424 mark_buffer_dirty(su_bh);
368 nilfs_mdt_mark_dirty(sufile); 425 nilfs_mdt_mark_dirty(sufile);
369} 426}
370 427
@@ -395,7 +452,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
395 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); 452 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
396 NILFS_SUI(sufile)->ncleansegs -= clean; 453 NILFS_SUI(sufile)->ncleansegs -= clean;
397 454
398 nilfs_mdt_mark_buffer_dirty(su_bh); 455 mark_buffer_dirty(su_bh);
399 nilfs_mdt_mark_dirty(sufile); 456 nilfs_mdt_mark_dirty(sufile);
400} 457}
401 458
@@ -421,7 +478,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
421 sudirty = nilfs_segment_usage_dirty(su); 478 sudirty = nilfs_segment_usage_dirty(su);
422 nilfs_segment_usage_set_clean(su); 479 nilfs_segment_usage_set_clean(su);
423 kunmap_atomic(kaddr, KM_USER0); 480 kunmap_atomic(kaddr, KM_USER0);
424 nilfs_mdt_mark_buffer_dirty(su_bh); 481 mark_buffer_dirty(su_bh);
425 482
426 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); 483 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
427 NILFS_SUI(sufile)->ncleansegs++; 484 NILFS_SUI(sufile)->ncleansegs++;
@@ -441,7 +498,7 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
441 498
442 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); 499 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
443 if (!ret) { 500 if (!ret) {
444 nilfs_mdt_mark_buffer_dirty(bh); 501 mark_buffer_dirty(bh);
445 nilfs_mdt_mark_dirty(sufile); 502 nilfs_mdt_mark_dirty(sufile);
446 brelse(bh); 503 brelse(bh);
447 } 504 }
@@ -476,7 +533,7 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
476 su->su_nblocks = cpu_to_le32(nblocks); 533 su->su_nblocks = cpu_to_le32(nblocks);
477 kunmap_atomic(kaddr, KM_USER0); 534 kunmap_atomic(kaddr, KM_USER0);
478 535
479 nilfs_mdt_mark_buffer_dirty(bh); 536 mark_buffer_dirty(bh);
480 nilfs_mdt_mark_dirty(sufile); 537 nilfs_mdt_mark_dirty(sufile);
481 brelse(bh); 538 brelse(bh);
482 539
@@ -505,7 +562,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
505{ 562{
506 struct buffer_head *header_bh; 563 struct buffer_head *header_bh;
507 struct nilfs_sufile_header *header; 564 struct nilfs_sufile_header *header;
508 struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); 565 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
509 void *kaddr; 566 void *kaddr;
510 int ret; 567 int ret;
511 568
@@ -555,11 +612,183 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
555 nilfs_sufile_mod_counter(header_bh, -1, 0); 612 nilfs_sufile_mod_counter(header_bh, -1, 0);
556 NILFS_SUI(sufile)->ncleansegs--; 613 NILFS_SUI(sufile)->ncleansegs--;
557 } 614 }
558 nilfs_mdt_mark_buffer_dirty(su_bh); 615 mark_buffer_dirty(su_bh);
559 nilfs_mdt_mark_dirty(sufile); 616 nilfs_mdt_mark_dirty(sufile);
560} 617}
561 618
562/** 619/**
620 * nilfs_sufile_truncate_range - truncate range of segment array
621 * @sufile: inode of segment usage file
622 * @start: start segment number (inclusive)
623 * @end: end segment number (inclusive)
624 *
625 * Return Value: On success, 0 is returned. On error, one of the
626 * following negative error codes is returned.
627 *
628 * %-EIO - I/O error.
629 *
630 * %-ENOMEM - Insufficient amount of memory available.
631 *
632 * %-EINVAL - Invalid number of segments specified
633 *
634 * %-EBUSY - Dirty or active segments are present in the range
635 */
636static int nilfs_sufile_truncate_range(struct inode *sufile,
637 __u64 start, __u64 end)
638{
639 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
640 struct buffer_head *header_bh;
641 struct buffer_head *su_bh;
642 struct nilfs_segment_usage *su, *su2;
643 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
644 unsigned long segusages_per_block;
645 unsigned long nsegs, ncleaned;
646 __u64 segnum;
647 void *kaddr;
648 ssize_t n, nc;
649 int ret;
650 int j;
651
652 nsegs = nilfs_sufile_get_nsegments(sufile);
653
654 ret = -EINVAL;
655 if (start > end || start >= nsegs)
656 goto out;
657
658 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
659 if (ret < 0)
660 goto out;
661
662 segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile);
663 ncleaned = 0;
664
665 for (segnum = start; segnum <= end; segnum += n) {
666 n = min_t(unsigned long,
667 segusages_per_block -
668 nilfs_sufile_get_offset(sufile, segnum),
669 end - segnum + 1);
670 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0,
671 &su_bh);
672 if (ret < 0) {
673 if (ret != -ENOENT)
674 goto out_header;
675 /* hole */
676 continue;
677 }
678 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
679 su = nilfs_sufile_block_get_segment_usage(
680 sufile, segnum, su_bh, kaddr);
681 su2 = su;
682 for (j = 0; j < n; j++, su = (void *)su + susz) {
683 if ((le32_to_cpu(su->su_flags) &
684 ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) ||
685 nilfs_segment_is_active(nilfs, segnum + j)) {
686 ret = -EBUSY;
687 kunmap_atomic(kaddr, KM_USER0);
688 brelse(su_bh);
689 goto out_header;
690 }
691 }
692 nc = 0;
693 for (su = su2, j = 0; j < n; j++, su = (void *)su + susz) {
694 if (nilfs_segment_usage_error(su)) {
695 nilfs_segment_usage_set_clean(su);
696 nc++;
697 }
698 }
699 kunmap_atomic(kaddr, KM_USER0);
700 if (nc > 0) {
701 mark_buffer_dirty(su_bh);
702 ncleaned += nc;
703 }
704 brelse(su_bh);
705
706 if (n == segusages_per_block) {
707 /* make hole */
708 nilfs_sufile_delete_segment_usage_block(sufile, segnum);
709 }
710 }
711 ret = 0;
712
713out_header:
714 if (ncleaned > 0) {
715 NILFS_SUI(sufile)->ncleansegs += ncleaned;
716 nilfs_sufile_mod_counter(header_bh, ncleaned, 0);
717 nilfs_mdt_mark_dirty(sufile);
718 }
719 brelse(header_bh);
720out:
721 return ret;
722}
723
724/**
725 * nilfs_sufile_resize - resize segment array
726 * @sufile: inode of segment usage file
727 * @newnsegs: new number of segments
728 *
729 * Return Value: On success, 0 is returned. On error, one of the
730 * following negative error codes is returned.
731 *
732 * %-EIO - I/O error.
733 *
734 * %-ENOMEM - Insufficient amount of memory available.
735 *
736 * %-ENOSPC - Enough free space is not left for shrinking
737 *
738 * %-EBUSY - Dirty or active segments exist in the region to be truncated
739 */
740int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
741{
742 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
743 struct buffer_head *header_bh;
744 struct nilfs_sufile_header *header;
745 struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
746 void *kaddr;
747 unsigned long nsegs, nrsvsegs;
748 int ret = 0;
749
750 down_write(&NILFS_MDT(sufile)->mi_sem);
751
752 nsegs = nilfs_sufile_get_nsegments(sufile);
753 if (nsegs == newnsegs)
754 goto out;
755
756 ret = -ENOSPC;
757 nrsvsegs = nilfs_nrsvsegs(nilfs, newnsegs);
758 if (newnsegs < nsegs && nsegs - newnsegs + nrsvsegs > sui->ncleansegs)
759 goto out;
760
761 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
762 if (ret < 0)
763 goto out;
764
765 if (newnsegs > nsegs) {
766 sui->ncleansegs += newnsegs - nsegs;
767 } else /* newnsegs < nsegs */ {
768 ret = nilfs_sufile_truncate_range(sufile, newnsegs, nsegs - 1);
769 if (ret < 0)
770 goto out_header;
771
772 sui->ncleansegs -= nsegs - newnsegs;
773 }
774
775 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
776 header = kaddr + bh_offset(header_bh);
777 header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs);
778 kunmap_atomic(kaddr, KM_USER0);
779
780 mark_buffer_dirty(header_bh);
781 nilfs_mdt_mark_dirty(sufile);
782 nilfs_set_nsegments(nilfs, newnsegs);
783
784out_header:
785 brelse(header_bh);
786out:
787 up_write(&NILFS_MDT(sufile)->mi_sem);
788 return ret;
789}
790
791/**
563 * nilfs_sufile_get_suinfo - 792 * nilfs_sufile_get_suinfo -
564 * @sufile: inode of segment usage file 793 * @sufile: inode of segment usage file
565 * @segnum: segment number to start looking 794 * @segnum: segment number to start looking
@@ -583,7 +812,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
583 struct nilfs_segment_usage *su; 812 struct nilfs_segment_usage *su;
584 struct nilfs_suinfo *si = buf; 813 struct nilfs_suinfo *si = buf;
585 size_t susz = NILFS_MDT(sufile)->mi_entry_size; 814 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
586 struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); 815 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
587 void *kaddr; 816 void *kaddr;
588 unsigned long nsegs, segusages_per_block; 817 unsigned long nsegs, segusages_per_block;
589 ssize_t n; 818 ssize_t n;
@@ -679,6 +908,9 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
679 kunmap_atomic(kaddr, KM_USER0); 908 kunmap_atomic(kaddr, KM_USER0);
680 brelse(header_bh); 909 brelse(header_bh);
681 910
911 sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1;
912 sui->allocmin = 0;
913
682 unlock_new_inode(sufile); 914 unlock_new_inode(sufile);
683 out: 915 out:
684 *inodep = sufile; 916 *inodep = sufile;
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index a943fbacb45b..e84bc5b51fc1 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -31,11 +31,12 @@
31 31
32static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) 32static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
33{ 33{
34 return NILFS_I_NILFS(sufile)->ns_nsegments; 34 return ((struct the_nilfs *)sufile->i_sb->s_fs_info)->ns_nsegments;
35} 35}
36 36
37unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile); 37unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile);
38 38
39int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end);
39int nilfs_sufile_alloc(struct inode *, __u64 *); 40int nilfs_sufile_alloc(struct inode *, __u64 *);
40int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum); 41int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum);
41int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, 42int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
@@ -61,6 +62,7 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
61void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, 62void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
62 struct buffer_head *); 63 struct buffer_head *);
63 64
65int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs);
64int nilfs_sufile_read(struct super_block *sb, size_t susize, 66int nilfs_sufile_read(struct super_block *sb, size_t susize,
65 struct nilfs_inode *raw_inode, struct inode **inodep); 67 struct nilfs_inode *raw_inode, struct inode **inodep);
66 68
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 062cca065195..8351c44a7320 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -56,6 +56,7 @@
56#include "btnode.h" 56#include "btnode.h"
57#include "page.h" 57#include "page.h"
58#include "cpfile.h" 58#include "cpfile.h"
59#include "sufile.h" /* nilfs_sufile_resize(), nilfs_sufile_set_alloc_range() */
59#include "ifile.h" 60#include "ifile.h"
60#include "dat.h" 61#include "dat.h"
61#include "segment.h" 62#include "segment.h"
@@ -165,7 +166,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
165 ii->i_state = 0; 166 ii->i_state = 0;
166 ii->i_cno = 0; 167 ii->i_cno = 0;
167 ii->vfs_inode.i_version = 1; 168 ii->vfs_inode.i_version = 1;
168 nilfs_btnode_cache_init(&ii->i_btnode_cache, sb->s_bdi); 169 nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode, sb->s_bdi);
169 return &ii->vfs_inode; 170 return &ii->vfs_inode;
170} 171}
171 172
@@ -347,6 +348,134 @@ int nilfs_cleanup_super(struct super_block *sb)
347 return ret; 348 return ret;
348} 349}
349 350
351/**
352 * nilfs_move_2nd_super - relocate secondary super block
353 * @sb: super block instance
354 * @sb2off: new offset of the secondary super block (in bytes)
355 */
356static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
357{
358 struct the_nilfs *nilfs = sb->s_fs_info;
359 struct buffer_head *nsbh;
360 struct nilfs_super_block *nsbp;
361 sector_t blocknr, newblocknr;
362 unsigned long offset;
363 int sb2i = -1; /* array index of the secondary superblock */
364 int ret = 0;
365
366 /* nilfs->ns_sem must be locked by the caller. */
367 if (nilfs->ns_sbh[1] &&
368 nilfs->ns_sbh[1]->b_blocknr > nilfs->ns_first_data_block) {
369 sb2i = 1;
370 blocknr = nilfs->ns_sbh[1]->b_blocknr;
371 } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) {
372 sb2i = 0;
373 blocknr = nilfs->ns_sbh[0]->b_blocknr;
374 }
375 if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off)
376 goto out; /* super block location is unchanged */
377
378 /* Get new super block buffer */
379 newblocknr = sb2off >> nilfs->ns_blocksize_bits;
380 offset = sb2off & (nilfs->ns_blocksize - 1);
381 nsbh = sb_getblk(sb, newblocknr);
382 if (!nsbh) {
383 printk(KERN_WARNING
384 "NILFS warning: unable to move secondary superblock "
385 "to block %llu\n", (unsigned long long)newblocknr);
386 ret = -EIO;
387 goto out;
388 }
389 nsbp = (void *)nsbh->b_data + offset;
390 memset(nsbp, 0, nilfs->ns_blocksize);
391
392 if (sb2i >= 0) {
393 memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
394 brelse(nilfs->ns_sbh[sb2i]);
395 nilfs->ns_sbh[sb2i] = nsbh;
396 nilfs->ns_sbp[sb2i] = nsbp;
397 } else if (nilfs->ns_sbh[0]->b_blocknr < nilfs->ns_first_data_block) {
398 /* secondary super block will be restored to index 1 */
399 nilfs->ns_sbh[1] = nsbh;
400 nilfs->ns_sbp[1] = nsbp;
401 } else {
402 brelse(nsbh);
403 }
404out:
405 return ret;
406}
407
408/**
409 * nilfs_resize_fs - resize the filesystem
410 * @sb: super block instance
411 * @newsize: new size of the filesystem (in bytes)
412 */
413int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
414{
415 struct the_nilfs *nilfs = sb->s_fs_info;
416 struct nilfs_super_block **sbp;
417 __u64 devsize, newnsegs;
418 loff_t sb2off;
419 int ret;
420
421 ret = -ERANGE;
422 devsize = i_size_read(sb->s_bdev->bd_inode);
423 if (newsize > devsize)
424 goto out;
425
426 /*
427 * Write lock is required to protect some functions depending
428 * on the number of segments, the number of reserved segments,
429 * and so forth.
430 */
431 down_write(&nilfs->ns_segctor_sem);
432
433 sb2off = NILFS_SB2_OFFSET_BYTES(newsize);
434 newnsegs = sb2off >> nilfs->ns_blocksize_bits;
435 do_div(newnsegs, nilfs->ns_blocks_per_segment);
436
437 ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs);
438 up_write(&nilfs->ns_segctor_sem);
439 if (ret < 0)
440 goto out;
441
442 ret = nilfs_construct_segment(sb);
443 if (ret < 0)
444 goto out;
445
446 down_write(&nilfs->ns_sem);
447 nilfs_move_2nd_super(sb, sb2off);
448 ret = -EIO;
449 sbp = nilfs_prepare_super(sb, 0);
450 if (likely(sbp)) {
451 nilfs_set_log_cursor(sbp[0], nilfs);
452 /*
453 * Drop NILFS_RESIZE_FS flag for compatibility with
454 * mount-time resize which may be implemented in a
455 * future release.
456 */
457 sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) &
458 ~NILFS_RESIZE_FS);
459 sbp[0]->s_dev_size = cpu_to_le64(newsize);
460 sbp[0]->s_nsegments = cpu_to_le64(nilfs->ns_nsegments);
461 if (sbp[1])
462 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
463 ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
464 }
465 up_write(&nilfs->ns_sem);
466
467 /*
468 * Reset the range of allocatable segments last. This order
469 * is important in the case of expansion because the secondary
470 * superblock must be protected from log write until migration
471 * completes.
472 */
473 if (!ret)
474 nilfs_sufile_set_alloc_range(nilfs->ns_sufile, 0, newnsegs - 1);
475out:
476 return ret;
477}
478
350static void nilfs_put_super(struct super_block *sb) 479static void nilfs_put_super(struct super_block *sb)
351{ 480{
352 struct the_nilfs *nilfs = sb->s_fs_info; 481 struct the_nilfs *nilfs = sb->s_fs_info;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index d2acd1a651f3..d32714094375 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -363,6 +363,24 @@ static unsigned long long nilfs_max_size(unsigned int blkbits)
363 return res; 363 return res;
364} 364}
365 365
366/**
367 * nilfs_nrsvsegs - calculate the number of reserved segments
368 * @nilfs: nilfs object
369 * @nsegs: total number of segments
370 */
371unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs)
372{
373 return max_t(unsigned long, NILFS_MIN_NRSVSEGS,
374 DIV_ROUND_UP(nsegs * nilfs->ns_r_segments_percentage,
375 100));
376}
377
378void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
379{
380 nilfs->ns_nsegments = nsegs;
381 nilfs->ns_nrsvsegs = nilfs_nrsvsegs(nilfs, nsegs);
382}
383
366static int nilfs_store_disk_layout(struct the_nilfs *nilfs, 384static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
367 struct nilfs_super_block *sbp) 385 struct nilfs_super_block *sbp)
368{ 386{
@@ -389,13 +407,9 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
389 } 407 }
390 408
391 nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block); 409 nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block);
392 nilfs->ns_nsegments = le64_to_cpu(sbp->s_nsegments);
393 nilfs->ns_r_segments_percentage = 410 nilfs->ns_r_segments_percentage =
394 le32_to_cpu(sbp->s_r_segments_percentage); 411 le32_to_cpu(sbp->s_r_segments_percentage);
395 nilfs->ns_nrsvsegs = 412 nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
396 max_t(unsigned long, NILFS_MIN_NRSVSEGS,
397 DIV_ROUND_UP(nilfs->ns_nsegments *
398 nilfs->ns_r_segments_percentage, 100));
399 nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); 413 nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
400 return 0; 414 return 0;
401} 415}
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index f4968145c2a3..9992b11312ff 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -268,6 +268,8 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev);
268void destroy_nilfs(struct the_nilfs *nilfs); 268void destroy_nilfs(struct the_nilfs *nilfs);
269int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data); 269int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data);
270int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb); 270int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
271unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs);
272void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs);
271int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t); 273int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t);
272int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); 274int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
273struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno); 275struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 643720209a98..9a3e6bbff27b 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -539,25 +539,41 @@ static int o2hb_verify_crc(struct o2hb_region *reg,
539 539
540/* We want to make sure that nobody is heartbeating on top of us -- 540/* We want to make sure that nobody is heartbeating on top of us --
541 * this will help detect an invalid configuration. */ 541 * this will help detect an invalid configuration. */
542static int o2hb_check_last_timestamp(struct o2hb_region *reg) 542static void o2hb_check_last_timestamp(struct o2hb_region *reg)
543{ 543{
544 int node_num, ret;
545 struct o2hb_disk_slot *slot; 544 struct o2hb_disk_slot *slot;
546 struct o2hb_disk_heartbeat_block *hb_block; 545 struct o2hb_disk_heartbeat_block *hb_block;
546 char *errstr;
547 547
548 node_num = o2nm_this_node(); 548 slot = &reg->hr_slots[o2nm_this_node()];
549
550 ret = 1;
551 slot = &reg->hr_slots[node_num];
552 /* Don't check on our 1st timestamp */ 549 /* Don't check on our 1st timestamp */
553 if (slot->ds_last_time) { 550 if (!slot->ds_last_time)
554 hb_block = slot->ds_raw_block; 551 return;
555 552
556 if (le64_to_cpu(hb_block->hb_seq) != slot->ds_last_time) 553 hb_block = slot->ds_raw_block;
557 ret = 0; 554 if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time &&
558 } 555 le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation &&
556 hb_block->hb_node == slot->ds_node_num)
557 return;
559 558
560 return ret; 559#define ERRSTR1 "Another node is heartbeating on device"
560#define ERRSTR2 "Heartbeat generation mismatch on device"
561#define ERRSTR3 "Heartbeat sequence mismatch on device"
562
563 if (hb_block->hb_node != slot->ds_node_num)
564 errstr = ERRSTR1;
565 else if (le64_to_cpu(hb_block->hb_generation) !=
566 slot->ds_last_generation)
567 errstr = ERRSTR2;
568 else
569 errstr = ERRSTR3;
570
571 mlog(ML_ERROR, "%s (%s): expected(%u:0x%llx, 0x%llx), "
572 "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_dev_name,
573 slot->ds_node_num, (unsigned long long)slot->ds_last_generation,
574 (unsigned long long)slot->ds_last_time, hb_block->hb_node,
575 (unsigned long long)le64_to_cpu(hb_block->hb_generation),
576 (unsigned long long)le64_to_cpu(hb_block->hb_seq));
561} 577}
562 578
563static inline void o2hb_prepare_block(struct o2hb_region *reg, 579static inline void o2hb_prepare_block(struct o2hb_region *reg,
@@ -983,9 +999,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
983 /* With an up to date view of the slots, we can check that no 999 /* With an up to date view of the slots, we can check that no
984 * other node has been improperly configured to heartbeat in 1000 * other node has been improperly configured to heartbeat in
985 * our slot. */ 1001 * our slot. */
986 if (!o2hb_check_last_timestamp(reg)) 1002 o2hb_check_last_timestamp(reg);
987 mlog(ML_ERROR, "Device \"%s\": another node is heartbeating "
988 "in our slot!\n", reg->hr_dev_name);
989 1003
990 /* fill in the proper info for our next heartbeat */ 1004 /* fill in the proper info for our next heartbeat */
991 o2hb_prepare_block(reg, reg->hr_generation); 1005 o2hb_prepare_block(reg, reg->hr_generation);
@@ -999,8 +1013,8 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
999 } 1013 }
1000 1014
1001 i = -1; 1015 i = -1;
1002 while((i = find_next_bit(configured_nodes, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { 1016 while((i = find_next_bit(configured_nodes,
1003 1017 O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
1004 change |= o2hb_check_slot(reg, &reg->hr_slots[i]); 1018 change |= o2hb_check_slot(reg, &reg->hr_slots[i]);
1005 } 1019 }
1006 1020
@@ -1690,6 +1704,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1690 struct file *filp = NULL; 1704 struct file *filp = NULL;
1691 struct inode *inode = NULL; 1705 struct inode *inode = NULL;
1692 ssize_t ret = -EINVAL; 1706 ssize_t ret = -EINVAL;
1707 int live_threshold;
1693 1708
1694 if (reg->hr_bdev) 1709 if (reg->hr_bdev)
1695 goto out; 1710 goto out;
@@ -1766,8 +1781,18 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1766 * A node is considered live after it has beat LIVE_THRESHOLD 1781 * A node is considered live after it has beat LIVE_THRESHOLD
1767 * times. We're not steady until we've given them a chance 1782 * times. We're not steady until we've given them a chance
1768 * _after_ our first read. 1783 * _after_ our first read.
1784 * The default threshold is bare minimum so as to limit the delay
1785 * during mounts. For global heartbeat, the threshold doubled for the
1786 * first region.
1769 */ 1787 */
1770 atomic_set(&reg->hr_steady_iterations, O2HB_LIVE_THRESHOLD + 1); 1788 live_threshold = O2HB_LIVE_THRESHOLD;
1789 if (o2hb_global_heartbeat_active()) {
1790 spin_lock(&o2hb_live_lock);
1791 if (o2hb_pop_count(&o2hb_region_bitmap, O2NM_MAX_REGIONS) == 1)
1792 live_threshold <<= 1;
1793 spin_unlock(&o2hb_live_lock);
1794 }
1795 atomic_set(&reg->hr_steady_iterations, live_threshold + 1);
1771 1796
1772 hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s", 1797 hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s",
1773 reg->hr_item.ci_name); 1798 reg->hr_item.ci_name);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 9fe5b8fd658f..8582e3f4f120 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2868,7 +2868,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
2868 bytes = blocks_wanted << sb->s_blocksize_bits; 2868 bytes = blocks_wanted << sb->s_blocksize_bits;
2869 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 2869 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2870 struct ocfs2_inode_info *oi = OCFS2_I(dir); 2870 struct ocfs2_inode_info *oi = OCFS2_I(dir);
2871 struct ocfs2_alloc_context *data_ac; 2871 struct ocfs2_alloc_context *data_ac = NULL;
2872 struct ocfs2_alloc_context *meta_ac = NULL; 2872 struct ocfs2_alloc_context *meta_ac = NULL;
2873 struct buffer_head *dirdata_bh = NULL; 2873 struct buffer_head *dirdata_bh = NULL;
2874 struct buffer_head *dx_root_bh = NULL; 2874 struct buffer_head *dx_root_bh = NULL;
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 7540a492eaba..3b179d6cbde0 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1614,7 +1614,8 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm)
1614 spin_unlock(&dlm->spinlock); 1614 spin_unlock(&dlm->spinlock);
1615 1615
1616 /* Support for global heartbeat and node info was added in 1.1 */ 1616 /* Support for global heartbeat and node info was added in 1.1 */
1617 if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) { 1617 if (dlm->dlm_locking_proto.pv_major > 1 ||
1618 dlm->dlm_locking_proto.pv_minor > 0) {
1618 status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map); 1619 status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map);
1619 if (status) { 1620 if (status) {
1620 mlog_errno(status); 1621 mlog_errno(status);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index fede57ed005f..84d166328cf7 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2574,6 +2574,9 @@ fail:
2574 res->state &= ~DLM_LOCK_RES_MIGRATING; 2574 res->state &= ~DLM_LOCK_RES_MIGRATING;
2575 wake = 1; 2575 wake = 1;
2576 spin_unlock(&res->spinlock); 2576 spin_unlock(&res->spinlock);
2577 if (dlm_is_host_down(ret))
2578 dlm_wait_for_node_death(dlm, target,
2579 DLM_NODE_DEATH_WAIT_MAX);
2577 goto leave; 2580 goto leave;
2578 } 2581 }
2579 2582
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 41565ae52856..89659d6dc206 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1607,6 +1607,9 @@ static void ocfs2_calc_trunc_pos(struct inode *inode,
1607 range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); 1607 range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
1608 1608
1609 if (le32_to_cpu(rec->e_cpos) >= trunc_start) { 1609 if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
1610 /*
1611 * remove an entire extent record.
1612 */
1610 *trunc_cpos = le32_to_cpu(rec->e_cpos); 1613 *trunc_cpos = le32_to_cpu(rec->e_cpos);
1611 /* 1614 /*
1612 * Skip holes if any. 1615 * Skip holes if any.
@@ -1617,7 +1620,16 @@ static void ocfs2_calc_trunc_pos(struct inode *inode,
1617 *blkno = le64_to_cpu(rec->e_blkno); 1620 *blkno = le64_to_cpu(rec->e_blkno);
1618 *trunc_end = le32_to_cpu(rec->e_cpos); 1621 *trunc_end = le32_to_cpu(rec->e_cpos);
1619 } else if (range > trunc_start) { 1622 } else if (range > trunc_start) {
1623 /*
1624 * remove a partial extent record, which means we're
1625 * removing the last extent record.
1626 */
1620 *trunc_cpos = trunc_start; 1627 *trunc_cpos = trunc_start;
1628 /*
1629 * skip hole if any.
1630 */
1631 if (range < *trunc_end)
1632 *trunc_end = range;
1621 *trunc_len = *trunc_end - trunc_start; 1633 *trunc_len = *trunc_end - trunc_start;
1622 coff = trunc_start - le32_to_cpu(rec->e_cpos); 1634 coff = trunc_start - le32_to_cpu(rec->e_cpos);
1623 *blkno = le64_to_cpu(rec->e_blkno) + 1635 *blkno = le64_to_cpu(rec->e_blkno) +
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index b141a44605ca..295d56454e8b 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1260,6 +1260,9 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb)
1260{ 1260{
1261 struct ocfs2_journal *journal = osb->journal; 1261 struct ocfs2_journal *journal = osb->journal;
1262 1262
1263 if (ocfs2_is_hard_readonly(osb))
1264 return;
1265
1263 /* No need to queue up our truncate_log as regular cleanup will catch 1266 /* No need to queue up our truncate_log as regular cleanup will catch
1264 * that */ 1267 * that */
1265 ocfs2_queue_recovery_completion(journal, osb->slot_num, 1268 ocfs2_queue_recovery_completion(journal, osb->slot_num,
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 5d32749c896d..3c7606cff1ab 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3706,7 +3706,7 @@ int ocfs2_refcount_cow_xattr(struct inode *inode,
3706 context->cow_start = cow_start; 3706 context->cow_start = cow_start;
3707 context->cow_len = cow_len; 3707 context->cow_len = cow_len;
3708 context->ref_tree = ref_tree; 3708 context->ref_tree = ref_tree;
3709 context->ref_root_bh = ref_root_bh;; 3709 context->ref_root_bh = ref_root_bh;
3710 context->cow_object = xv; 3710 context->cow_object = xv;
3711 3711
3712 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd; 3712 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index d545e97d99c3..8ed4d3433199 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -255,7 +255,11 @@ ssize_t part_discard_alignment_show(struct device *dev,
255 struct device_attribute *attr, char *buf) 255 struct device_attribute *attr, char *buf)
256{ 256{
257 struct hd_struct *p = dev_to_part(dev); 257 struct hd_struct *p = dev_to_part(dev);
258 return sprintf(buf, "%u\n", p->discard_alignment); 258 struct gendisk *disk = dev_to_disk(dev);
259
260 return sprintf(buf, "%u\n",
261 queue_limit_discard_alignment(&disk->queue->limits,
262 p->start_sect));
259} 263}
260 264
261ssize_t part_stat_show(struct device *dev, 265ssize_t part_stat_show(struct device *dev,
@@ -449,8 +453,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
449 p->start_sect = start; 453 p->start_sect = start;
450 p->alignment_offset = 454 p->alignment_offset =
451 queue_limit_alignment_offset(&disk->queue->limits, start); 455 queue_limit_alignment_offset(&disk->queue->limits, start);
452 p->discard_alignment =
453 queue_limit_discard_alignment(&disk->queue->limits, start);
454 p->nr_sects = len; 456 p->nr_sects = len;
455 p->partno = partno; 457 p->partno = partno;
456 p->policy = get_disk_ro(disk); 458 p->policy = get_disk_ro(disk);
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index ac0ccb5026a2..19d6750d1d6c 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -348,6 +348,12 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
348 goto fail; 348 goto fail;
349 } 349 }
350 350
351 /* Check that sizeof_partition_entry has the correct value */
352 if (le32_to_cpu((*gpt)->sizeof_partition_entry) != sizeof(gpt_entry)) {
353 pr_debug("GUID Partitition Entry Size check failed.\n");
354 goto fail;
355 }
356
351 if (!(*ptes = alloc_read_gpt_entries(state, *gpt))) 357 if (!(*ptes = alloc_read_gpt_entries(state, *gpt)))
352 goto fail; 358 goto fail;
353 359
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index ce4f62440425..af9fdf046769 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -565,7 +565,7 @@ static bool ldm_validate_partition_table(struct parsed_partitions *state)
565 565
566 data = read_part_sector(state, 0, &sect); 566 data = read_part_sector(state, 0, &sect);
567 if (!data) { 567 if (!data) {
568 ldm_crit ("Disk read failed."); 568 ldm_info ("Disk read failed.");
569 return false; 569 return false;
570 } 570 }
571 571
@@ -1335,6 +1335,11 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1335 1335
1336 list_add_tail (&f->list, frags); 1336 list_add_tail (&f->list, frags);
1337found: 1337found:
1338 if (rec >= f->num) {
1339 ldm_error("REC value (%d) exceeds NUM value (%d)", rec, f->num);
1340 return false;
1341 }
1342
1338 if (f->map & (1 << rec)) { 1343 if (f->map & (1 << rec)) {
1339 ldm_error ("Duplicate VBLK, part %d.", rec); 1344 ldm_error ("Duplicate VBLK, part %d.", rec);
1340 f->map &= 0x7F; /* Mark the group as broken */ 1345 f->map &= 0x7F; /* Mark the group as broken */
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index df434c5f28fb..c1c729335924 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -20,6 +20,7 @@ proc-y += stat.o
20proc-y += uptime.o 20proc-y += uptime.o
21proc-y += version.o 21proc-y += version.o
22proc-y += softirqs.o 22proc-y += softirqs.o
23proc-y += namespaces.o
23proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o 24proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
24proc-$(CONFIG_NET) += proc_net.o 25proc-$(CONFIG_NET) += proc_net.o
25proc-$(CONFIG_PROC_KCORE) += kcore.o 26proc-$(CONFIG_PROC_KCORE) += kcore.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dfa532730e55..dc8bca72b002 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -600,7 +600,7 @@ static int proc_fd_access_allowed(struct inode *inode)
600 return allowed; 600 return allowed;
601} 601}
602 602
603static int proc_setattr(struct dentry *dentry, struct iattr *attr) 603int proc_setattr(struct dentry *dentry, struct iattr *attr)
604{ 604{
605 int error; 605 int error;
606 struct inode *inode = dentry->d_inode; 606 struct inode *inode = dentry->d_inode;
@@ -1736,8 +1736,7 @@ static int task_dumpable(struct task_struct *task)
1736 return 0; 1736 return 0;
1737} 1737}
1738 1738
1739 1739struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
1740static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
1741{ 1740{
1742 struct inode * inode; 1741 struct inode * inode;
1743 struct proc_inode *ei; 1742 struct proc_inode *ei;
@@ -1779,7 +1778,7 @@ out_unlock:
1779 return NULL; 1778 return NULL;
1780} 1779}
1781 1780
1782static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 1781int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1783{ 1782{
1784 struct inode *inode = dentry->d_inode; 1783 struct inode *inode = dentry->d_inode;
1785 struct task_struct *task; 1784 struct task_struct *task;
@@ -1820,7 +1819,7 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
1820 * made this apply to all per process world readable and executable 1819 * made this apply to all per process world readable and executable
1821 * directories. 1820 * directories.
1822 */ 1821 */
1823static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1822int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1824{ 1823{
1825 struct inode *inode; 1824 struct inode *inode;
1826 struct task_struct *task; 1825 struct task_struct *task;
@@ -1862,7 +1861,7 @@ static int pid_delete_dentry(const struct dentry * dentry)
1862 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; 1861 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1863} 1862}
1864 1863
1865static const struct dentry_operations pid_dentry_operations = 1864const struct dentry_operations pid_dentry_operations =
1866{ 1865{
1867 .d_revalidate = pid_revalidate, 1866 .d_revalidate = pid_revalidate,
1868 .d_delete = pid_delete_dentry, 1867 .d_delete = pid_delete_dentry,
@@ -1870,9 +1869,6 @@ static const struct dentry_operations pid_dentry_operations =
1870 1869
1871/* Lookups */ 1870/* Lookups */
1872 1871
1873typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
1874 struct task_struct *, const void *);
1875
1876/* 1872/*
1877 * Fill a directory entry. 1873 * Fill a directory entry.
1878 * 1874 *
@@ -1885,8 +1881,8 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
1885 * reported by readdir in sync with the inode numbers reported 1881 * reported by readdir in sync with the inode numbers reported
1886 * by stat. 1882 * by stat.
1887 */ 1883 */
1888static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 1884int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
1889 char *name, int len, 1885 const char *name, int len,
1890 instantiate_t instantiate, struct task_struct *task, const void *ptr) 1886 instantiate_t instantiate, struct task_struct *task, const void *ptr)
1891{ 1887{
1892 struct dentry *child, *dir = filp->f_path.dentry; 1888 struct dentry *child, *dir = filp->f_path.dentry;
@@ -2820,6 +2816,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2820 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), 2816 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
2821 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2817 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
2822 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 2818 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
2819 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
2823#ifdef CONFIG_NET 2820#ifdef CONFIG_NET
2824 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 2821 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
2825#endif 2822#endif
@@ -3168,6 +3165,7 @@ out_no_task:
3168static const struct pid_entry tid_base_stuff[] = { 3165static const struct pid_entry tid_base_stuff[] = {
3169 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 3166 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
3170 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 3167 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
3168 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
3171 REG("environ", S_IRUSR, proc_environ_operations), 3169 REG("environ", S_IRUSR, proc_environ_operations),
3172 INF("auxv", S_IRUSR, proc_pid_auxv), 3170 INF("auxv", S_IRUSR, proc_pid_auxv),
3173 ONE("status", S_IRUGO, proc_pid_status), 3171 ONE("status", S_IRUGO, proc_pid_status),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index f1281339b6fa..f1637f17c37c 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -674,6 +674,7 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
674 } 674 }
675 return ent; 675 return ent;
676} 676}
677EXPORT_SYMBOL(proc_mkdir_mode);
677 678
678struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, 679struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
679 struct proc_dir_entry *parent) 680 struct proc_dir_entry *parent)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d15aa1b1cc8f..74b48cfa1bb2 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -28,6 +28,7 @@ static void proc_evict_inode(struct inode *inode)
28{ 28{
29 struct proc_dir_entry *de; 29 struct proc_dir_entry *de;
30 struct ctl_table_header *head; 30 struct ctl_table_header *head;
31 const struct proc_ns_operations *ns_ops;
31 32
32 truncate_inode_pages(&inode->i_data, 0); 33 truncate_inode_pages(&inode->i_data, 0);
33 end_writeback(inode); 34 end_writeback(inode);
@@ -44,6 +45,10 @@ static void proc_evict_inode(struct inode *inode)
44 rcu_assign_pointer(PROC_I(inode)->sysctl, NULL); 45 rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
45 sysctl_head_put(head); 46 sysctl_head_put(head);
46 } 47 }
48 /* Release any associated namespace */
49 ns_ops = PROC_I(inode)->ns_ops;
50 if (ns_ops && ns_ops->put)
51 ns_ops->put(PROC_I(inode)->ns);
47} 52}
48 53
49static struct kmem_cache * proc_inode_cachep; 54static struct kmem_cache * proc_inode_cachep;
@@ -62,6 +67,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
62 ei->pde = NULL; 67 ei->pde = NULL;
63 ei->sysctl = NULL; 68 ei->sysctl = NULL;
64 ei->sysctl_entry = NULL; 69 ei->sysctl_entry = NULL;
70 ei->ns = NULL;
71 ei->ns_ops = NULL;
65 inode = &ei->vfs_inode; 72 inode = &ei->vfs_inode;
66 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 73 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
67 return inode; 74 return inode;
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c03e8d3a3a5b..7838e5cfec14 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -61,6 +61,14 @@ extern const struct file_operations proc_pagemap_operations;
61extern const struct file_operations proc_net_operations; 61extern const struct file_operations proc_net_operations;
62extern const struct inode_operations proc_net_inode_operations; 62extern const struct inode_operations proc_net_inode_operations;
63 63
64struct proc_maps_private {
65 struct pid *pid;
66 struct task_struct *task;
67#ifdef CONFIG_MMU
68 struct vm_area_struct *tail_vma;
69#endif
70};
71
64void proc_init_inodecache(void); 72void proc_init_inodecache(void);
65 73
66static inline struct pid *proc_pid(struct inode *inode) 74static inline struct pid *proc_pid(struct inode *inode)
@@ -119,3 +127,21 @@ struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
119 */ 127 */
120int proc_readdir(struct file *, void *, filldir_t); 128int proc_readdir(struct file *, void *, filldir_t);
121struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); 129struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
130
131
132
133/* Lookups */
134typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
135 struct task_struct *, const void *);
136int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
137 const char *name, int len,
138 instantiate_t instantiate, struct task_struct *task, const void *ptr);
139int pid_revalidate(struct dentry *dentry, struct nameidata *nd);
140struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
141extern const struct dentry_operations pid_dentry_operations;
142int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
143int proc_setattr(struct dentry *dentry, struct iattr *attr);
144
145extern const struct inode_operations proc_ns_dir_inode_operations;
146extern const struct file_operations proc_ns_dir_operations;
147
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
new file mode 100644
index 000000000000..781dec5bd682
--- /dev/null
+++ b/fs/proc/namespaces.c
@@ -0,0 +1,198 @@
1#include <linux/proc_fs.h>
2#include <linux/nsproxy.h>
3#include <linux/sched.h>
4#include <linux/ptrace.h>
5#include <linux/fs_struct.h>
6#include <linux/mount.h>
7#include <linux/path.h>
8#include <linux/namei.h>
9#include <linux/file.h>
10#include <linux/utsname.h>
11#include <net/net_namespace.h>
12#include <linux/mnt_namespace.h>
13#include <linux/ipc_namespace.h>
14#include <linux/pid_namespace.h>
15#include "internal.h"
16
17
18static const struct proc_ns_operations *ns_entries[] = {
19#ifdef CONFIG_NET_NS
20 &netns_operations,
21#endif
22#ifdef CONFIG_UTS_NS
23 &utsns_operations,
24#endif
25#ifdef CONFIG_IPC_NS
26 &ipcns_operations,
27#endif
28};
29
30static const struct file_operations ns_file_operations = {
31 .llseek = no_llseek,
32};
33
34static struct dentry *proc_ns_instantiate(struct inode *dir,
35 struct dentry *dentry, struct task_struct *task, const void *ptr)
36{
37 const struct proc_ns_operations *ns_ops = ptr;
38 struct inode *inode;
39 struct proc_inode *ei;
40 struct dentry *error = ERR_PTR(-ENOENT);
41
42 inode = proc_pid_make_inode(dir->i_sb, task);
43 if (!inode)
44 goto out;
45
46 ei = PROC_I(inode);
47 inode->i_mode = S_IFREG|S_IRUSR;
48 inode->i_fop = &ns_file_operations;
49 ei->ns_ops = ns_ops;
50 ei->ns = ns_ops->get(task);
51 if (!ei->ns)
52 goto out_iput;
53
54 dentry->d_op = &pid_dentry_operations;
55 d_add(dentry, inode);
56 /* Close the race of the process dying before we return the dentry */
57 if (pid_revalidate(dentry, NULL))
58 error = NULL;
59out:
60 return error;
61out_iput:
62 iput(inode);
63 goto out;
64}
65
66static int proc_ns_fill_cache(struct file *filp, void *dirent,
67 filldir_t filldir, struct task_struct *task,
68 const struct proc_ns_operations *ops)
69{
70 return proc_fill_cache(filp, dirent, filldir,
71 ops->name, strlen(ops->name),
72 proc_ns_instantiate, task, ops);
73}
74
75static int proc_ns_dir_readdir(struct file *filp, void *dirent,
76 filldir_t filldir)
77{
78 int i;
79 struct dentry *dentry = filp->f_path.dentry;
80 struct inode *inode = dentry->d_inode;
81 struct task_struct *task = get_proc_task(inode);
82 const struct proc_ns_operations **entry, **last;
83 ino_t ino;
84 int ret;
85
86 ret = -ENOENT;
87 if (!task)
88 goto out_no_task;
89
90 ret = -EPERM;
91 if (!ptrace_may_access(task, PTRACE_MODE_READ))
92 goto out;
93
94 ret = 0;
95 i = filp->f_pos;
96 switch (i) {
97 case 0:
98 ino = inode->i_ino;
99 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
100 goto out;
101 i++;
102 filp->f_pos++;
103 /* fall through */
104 case 1:
105 ino = parent_ino(dentry);
106 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
107 goto out;
108 i++;
109 filp->f_pos++;
110 /* fall through */
111 default:
112 i -= 2;
113 if (i >= ARRAY_SIZE(ns_entries)) {
114 ret = 1;
115 goto out;
116 }
117 entry = ns_entries + i;
118 last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
119 while (entry <= last) {
120 if (proc_ns_fill_cache(filp, dirent, filldir,
121 task, *entry) < 0)
122 goto out;
123 filp->f_pos++;
124 entry++;
125 }
126 }
127
128 ret = 1;
129out:
130 put_task_struct(task);
131out_no_task:
132 return ret;
133}
134
135const struct file_operations proc_ns_dir_operations = {
136 .read = generic_read_dir,
137 .readdir = proc_ns_dir_readdir,
138};
139
140static struct dentry *proc_ns_dir_lookup(struct inode *dir,
141 struct dentry *dentry, struct nameidata *nd)
142{
143 struct dentry *error;
144 struct task_struct *task = get_proc_task(dir);
145 const struct proc_ns_operations **entry, **last;
146 unsigned int len = dentry->d_name.len;
147
148 error = ERR_PTR(-ENOENT);
149
150 if (!task)
151 goto out_no_task;
152
153 error = ERR_PTR(-EPERM);
154 if (!ptrace_may_access(task, PTRACE_MODE_READ))
155 goto out;
156
157 last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
158 for (entry = ns_entries; entry <= last; entry++) {
159 if (strlen((*entry)->name) != len)
160 continue;
161 if (!memcmp(dentry->d_name.name, (*entry)->name, len))
162 break;
163 }
164 error = ERR_PTR(-ENOENT);
165 if (entry > last)
166 goto out;
167
168 error = proc_ns_instantiate(dir, dentry, task, *entry);
169out:
170 put_task_struct(task);
171out_no_task:
172 return error;
173}
174
175const struct inode_operations proc_ns_dir_inode_operations = {
176 .lookup = proc_ns_dir_lookup,
177 .getattr = pid_getattr,
178 .setattr = proc_setattr,
179};
180
181struct file *proc_ns_fget(int fd)
182{
183 struct file *file;
184
185 file = fget(fd);
186 if (!file)
187 return ERR_PTR(-EBADF);
188
189 if (file->f_op != &ns_file_operations)
190 goto out_invalid;
191
192 return file;
193
194out_invalid:
195 fput(file);
196 return ERR_PTR(-EINVAL);
197}
198
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2e7addfd9803..db15935fa757 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -211,10 +211,10 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
211{ 211{
212 struct mm_struct *mm = vma->vm_mm; 212 struct mm_struct *mm = vma->vm_mm;
213 struct file *file = vma->vm_file; 213 struct file *file = vma->vm_file;
214 int flags = vma->vm_flags; 214 vm_flags_t flags = vma->vm_flags;
215 unsigned long ino = 0; 215 unsigned long ino = 0;
216 unsigned long long pgoff = 0; 216 unsigned long long pgoff = 0;
217 unsigned long start; 217 unsigned long start, end;
218 dev_t dev = 0; 218 dev_t dev = 0;
219 int len; 219 int len;
220 220
@@ -227,13 +227,15 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
227 227
228 /* We don't show the stack guard page in /proc/maps */ 228 /* We don't show the stack guard page in /proc/maps */
229 start = vma->vm_start; 229 start = vma->vm_start;
230 if (vma->vm_flags & VM_GROWSDOWN) 230 if (stack_guard_page_start(vma, start))
231 if (!vma_stack_continue(vma->vm_prev, vma->vm_start)) 231 start += PAGE_SIZE;
232 start += PAGE_SIZE; 232 end = vma->vm_end;
233 if (stack_guard_page_end(vma, end))
234 end -= PAGE_SIZE;
233 235
234 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", 236 seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
235 start, 237 start,
236 vma->vm_end, 238 end,
237 flags & VM_READ ? 'r' : '-', 239 flags & VM_READ ? 'r' : '-',
238 flags & VM_WRITE ? 'w' : '-', 240 flags & VM_WRITE ? 'w' : '-',
239 flags & VM_EXEC ? 'x' : '-', 241 flags & VM_EXEC ? 'x' : '-',
@@ -856,7 +858,192 @@ const struct file_operations proc_pagemap_operations = {
856#endif /* CONFIG_PROC_PAGE_MONITOR */ 858#endif /* CONFIG_PROC_PAGE_MONITOR */
857 859
858#ifdef CONFIG_NUMA 860#ifdef CONFIG_NUMA
859extern int show_numa_map(struct seq_file *m, void *v); 861
862struct numa_maps {
863 struct vm_area_struct *vma;
864 unsigned long pages;
865 unsigned long anon;
866 unsigned long active;
867 unsigned long writeback;
868 unsigned long mapcount_max;
869 unsigned long dirty;
870 unsigned long swapcache;
871 unsigned long node[MAX_NUMNODES];
872};
873
874struct numa_maps_private {
875 struct proc_maps_private proc_maps;
876 struct numa_maps md;
877};
878
879static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty)
880{
881 int count = page_mapcount(page);
882
883 md->pages++;
884 if (pte_dirty || PageDirty(page))
885 md->dirty++;
886
887 if (PageSwapCache(page))
888 md->swapcache++;
889
890 if (PageActive(page) || PageUnevictable(page))
891 md->active++;
892
893 if (PageWriteback(page))
894 md->writeback++;
895
896 if (PageAnon(page))
897 md->anon++;
898
899 if (count > md->mapcount_max)
900 md->mapcount_max = count;
901
902 md->node[page_to_nid(page)]++;
903}
904
905static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
906 unsigned long end, struct mm_walk *walk)
907{
908 struct numa_maps *md;
909 spinlock_t *ptl;
910 pte_t *orig_pte;
911 pte_t *pte;
912
913 md = walk->private;
914 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
915 do {
916 struct page *page;
917 int nid;
918
919 if (!pte_present(*pte))
920 continue;
921
922 page = vm_normal_page(md->vma, addr, *pte);
923 if (!page)
924 continue;
925
926 if (PageReserved(page))
927 continue;
928
929 nid = page_to_nid(page);
930 if (!node_isset(nid, node_states[N_HIGH_MEMORY]))
931 continue;
932
933 gather_stats(page, md, pte_dirty(*pte));
934
935 } while (pte++, addr += PAGE_SIZE, addr != end);
936 pte_unmap_unlock(orig_pte, ptl);
937 return 0;
938}
939#ifdef CONFIG_HUGETLB_PAGE
940static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
941 unsigned long addr, unsigned long end, struct mm_walk *walk)
942{
943 struct numa_maps *md;
944 struct page *page;
945
946 if (pte_none(*pte))
947 return 0;
948
949 page = pte_page(*pte);
950 if (!page)
951 return 0;
952
953 md = walk->private;
954 gather_stats(page, md, pte_dirty(*pte));
955 return 0;
956}
957
958#else
959static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
960 unsigned long addr, unsigned long end, struct mm_walk *walk)
961{
962 return 0;
963}
964#endif
965
966/*
967 * Display pages allocated per node and memory policy via /proc.
968 */
969static int show_numa_map(struct seq_file *m, void *v)
970{
971 struct numa_maps_private *numa_priv = m->private;
972 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
973 struct vm_area_struct *vma = v;
974 struct numa_maps *md = &numa_priv->md;
975 struct file *file = vma->vm_file;
976 struct mm_struct *mm = vma->vm_mm;
977 struct mm_walk walk = {};
978 struct mempolicy *pol;
979 int n;
980 char buffer[50];
981
982 if (!mm)
983 return 0;
984
985 /* Ensure we start with an empty set of numa_maps statistics. */
986 memset(md, 0, sizeof(*md));
987
988 md->vma = vma;
989
990 walk.hugetlb_entry = gather_hugetbl_stats;
991 walk.pmd_entry = gather_pte_stats;
992 walk.private = md;
993 walk.mm = mm;
994
995 pol = get_vma_policy(proc_priv->task, vma, vma->vm_start);
996 mpol_to_str(buffer, sizeof(buffer), pol, 0);
997 mpol_cond_put(pol);
998
999 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1000
1001 if (file) {
1002 seq_printf(m, " file=");
1003 seq_path(m, &file->f_path, "\n\t= ");
1004 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1005 seq_printf(m, " heap");
1006 } else if (vma->vm_start <= mm->start_stack &&
1007 vma->vm_end >= mm->start_stack) {
1008 seq_printf(m, " stack");
1009 }
1010
1011 walk_page_range(vma->vm_start, vma->vm_end, &walk);
1012
1013 if (!md->pages)
1014 goto out;
1015
1016 if (md->anon)
1017 seq_printf(m, " anon=%lu", md->anon);
1018
1019 if (md->dirty)
1020 seq_printf(m, " dirty=%lu", md->dirty);
1021
1022 if (md->pages != md->anon && md->pages != md->dirty)
1023 seq_printf(m, " mapped=%lu", md->pages);
1024
1025 if (md->mapcount_max > 1)
1026 seq_printf(m, " mapmax=%lu", md->mapcount_max);
1027
1028 if (md->swapcache)
1029 seq_printf(m, " swapcache=%lu", md->swapcache);
1030
1031 if (md->active < md->pages && !is_vm_hugetlb_page(vma))
1032 seq_printf(m, " active=%lu", md->active);
1033
1034 if (md->writeback)
1035 seq_printf(m, " writeback=%lu", md->writeback);
1036
1037 for_each_node_state(n, N_HIGH_MEMORY)
1038 if (md->node[n])
1039 seq_printf(m, " N%d=%lu", n, md->node[n]);
1040out:
1041 seq_putc(m, '\n');
1042
1043 if (m->count < m->size)
1044 m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0;
1045 return 0;
1046}
860 1047
861static const struct seq_operations proc_pid_numa_maps_op = { 1048static const struct seq_operations proc_pid_numa_maps_op = {
862 .start = m_start, 1049 .start = m_start,
@@ -867,7 +1054,20 @@ static const struct seq_operations proc_pid_numa_maps_op = {
867 1054
868static int numa_maps_open(struct inode *inode, struct file *file) 1055static int numa_maps_open(struct inode *inode, struct file *file)
869{ 1056{
870 return do_maps_open(inode, file, &proc_pid_numa_maps_op); 1057 struct numa_maps_private *priv;
1058 int ret = -ENOMEM;
1059 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
1060 if (priv) {
1061 priv->proc_maps.pid = proc_pid(inode);
1062 ret = seq_open(file, &proc_pid_numa_maps_op);
1063 if (!ret) {
1064 struct seq_file *m = file->private_data;
1065 m->private = priv;
1066 } else {
1067 kfree(priv);
1068 }
1069 }
1070 return ret;
871} 1071}
872 1072
873const struct file_operations proc_numa_maps_operations = { 1073const struct file_operations proc_numa_maps_operations = {
@@ -876,4 +1076,4 @@ const struct file_operations proc_numa_maps_operations = {
876 .llseek = seq_lseek, 1076 .llseek = seq_lseek,
877 .release = seq_release_private, 1077 .release = seq_release_private,
878}; 1078};
879#endif 1079#endif /* CONFIG_NUMA */
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index f835a25625ff..f2c3ff20ea68 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -152,21 +152,27 @@ EXPORT_SYMBOL_GPL(pstore_register);
152void pstore_get_records(void) 152void pstore_get_records(void)
153{ 153{
154 struct pstore_info *psi = psinfo; 154 struct pstore_info *psi = psinfo;
155 size_t size; 155 ssize_t size;
156 u64 id; 156 u64 id;
157 enum pstore_type_id type; 157 enum pstore_type_id type;
158 struct timespec time; 158 struct timespec time;
159 int failed = 0; 159 int failed = 0, rc;
160 160
161 if (!psi) 161 if (!psi)
162 return; 162 return;
163 163
164 mutex_lock(&psinfo->buf_mutex); 164 mutex_lock(&psinfo->buf_mutex);
165 rc = psi->open(psi);
166 if (rc)
167 goto out;
168
165 while ((size = psi->read(&id, &type, &time)) > 0) { 169 while ((size = psi->read(&id, &type, &time)) > 0) {
166 if (pstore_mkfile(type, psi->name, id, psi->buf, size, 170 if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
167 time, psi->erase)) 171 time, psi->erase))
168 failed++; 172 failed++;
169 } 173 }
174 psi->close(psi);
175out:
170 mutex_unlock(&psinfo->buf_mutex); 176 mutex_unlock(&psinfo->buf_mutex);
171 177
172 if (failed) 178 if (failed)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index d3c032f5fa0a..5b572c89e6c4 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -691,8 +691,11 @@ static void prune_dqcache(int count)
691 * This is called from kswapd when we think we need some 691 * This is called from kswapd when we think we need some
692 * more memory 692 * more memory
693 */ 693 */
694static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 694static int shrink_dqcache_memory(struct shrinker *shrink,
695 struct shrink_control *sc)
695{ 696{
697 int nr = sc->nr_to_scan;
698
696 if (nr) { 699 if (nr) {
697 spin_lock(&dq_list_lock); 700 spin_lock(&dq_list_lock);
698 prune_dqcache(nr); 701 prune_dqcache(nr);
diff --git a/fs/splice.c b/fs/splice.c
index 50a5d978da16..aa866d309695 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -162,6 +162,14 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
162 .get = generic_pipe_buf_get, 162 .get = generic_pipe_buf_get,
163}; 163};
164 164
165static void wakeup_pipe_readers(struct pipe_inode_info *pipe)
166{
167 smp_mb();
168 if (waitqueue_active(&pipe->wait))
169 wake_up_interruptible(&pipe->wait);
170 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
171}
172
165/** 173/**
166 * splice_to_pipe - fill passed data into a pipe 174 * splice_to_pipe - fill passed data into a pipe
167 * @pipe: pipe to fill 175 * @pipe: pipe to fill
@@ -247,12 +255,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
247 255
248 pipe_unlock(pipe); 256 pipe_unlock(pipe);
249 257
250 if (do_wakeup) { 258 if (do_wakeup)
251 smp_mb(); 259 wakeup_pipe_readers(pipe);
252 if (waitqueue_active(&pipe->wait))
253 wake_up_interruptible(&pipe->wait);
254 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
255 }
256 260
257 while (page_nr < spd_pages) 261 while (page_nr < spd_pages)
258 spd->spd_release(spd, page_nr++); 262 spd->spd_release(spd, page_nr++);
@@ -1892,12 +1896,9 @@ retry:
1892 /* 1896 /*
1893 * If we put data in the output pipe, wakeup any potential readers. 1897 * If we put data in the output pipe, wakeup any potential readers.
1894 */ 1898 */
1895 if (ret > 0) { 1899 if (ret > 0)
1896 smp_mb(); 1900 wakeup_pipe_readers(opipe);
1897 if (waitqueue_active(&opipe->wait)) 1901
1898 wake_up_interruptible(&opipe->wait);
1899 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1900 }
1901 if (input_wakeup) 1902 if (input_wakeup)
1902 wakeup_pipe_writers(ipipe); 1903 wakeup_pipe_writers(ipipe);
1903 1904
@@ -1976,12 +1977,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1976 /* 1977 /*
1977 * If we put data in the output pipe, wakeup any potential readers. 1978 * If we put data in the output pipe, wakeup any potential readers.
1978 */ 1979 */
1979 if (ret > 0) { 1980 if (ret > 0)
1980 smp_mb(); 1981 wakeup_pipe_readers(opipe);
1981 if (waitqueue_active(&opipe->wait))
1982 wake_up_interruptible(&opipe->wait);
1983 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1984 }
1985 1982
1986 return ret; 1983 return ret;
1987} 1984}
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index efc309fa3035..7797218d0b30 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -42,7 +42,7 @@ config SQUASHFS_LZO
42 select LZO_DECOMPRESS 42 select LZO_DECOMPRESS
43 help 43 help
44 Saying Y here includes support for reading Squashfs file systems 44 Saying Y here includes support for reading Squashfs file systems
45 compressed with LZO compresssion. LZO compression is mainly 45 compressed with LZO compression. LZO compression is mainly
46 aimed at embedded systems with slower CPUs where the overheads 46 aimed at embedded systems with slower CPUs where the overheads
47 of zlib are too high. 47 of zlib are too high.
48 48
@@ -57,7 +57,7 @@ config SQUASHFS_XZ
57 select XZ_DEC 57 select XZ_DEC
58 help 58 help
59 Saying Y here includes support for reading Squashfs file systems 59 Saying Y here includes support for reading Squashfs file systems
60 compressed with XZ compresssion. XZ gives better compression than 60 compressed with XZ compression. XZ gives better compression than
61 the default zlib compression, at the expense of greater CPU and 61 the default zlib compression, at the expense of greater CPU and
62 memory overhead. 62 memory overhead.
63 63
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index c37b520132ff..4b5a3fbb1f1f 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -29,7 +29,7 @@
29 * plus functions layered ontop of the generic cache implementation to 29 * plus functions layered ontop of the generic cache implementation to
30 * access the metadata and fragment caches. 30 * access the metadata and fragment caches.
31 * 31 *
32 * To avoid out of memory and fragmentation isssues with vmalloc the cache 32 * To avoid out of memory and fragmentation issues with vmalloc the cache
33 * uses sequences of kmalloced PAGE_CACHE_SIZE buffers. 33 * uses sequences of kmalloced PAGE_CACHE_SIZE buffers.
34 * 34 *
35 * It should be noted that the cache is not used for file datablocks, these 35 * It should be noted that the cache is not used for file datablocks, these
diff --git a/fs/super.c b/fs/super.c
index 8a06881b1920..c04f7e0b7ed2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -948,8 +948,7 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
948 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE 948 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
949 * but s_maxbytes was an unsigned long long for many releases. Throw 949 * but s_maxbytes was an unsigned long long for many releases. Throw
950 * this warning for a little while to try and catch filesystems that 950 * this warning for a little while to try and catch filesystems that
951 * violate this rule. This warning should be either removed or 951 * violate this rule.
952 * converted to a BUG() in 2.6.34.
953 */ 952 */
954 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " 953 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
955 "negative value (%lld)\n", type->name, sb->s_maxbytes); 954 "negative value (%lld)\n", type->name, sb->s_maxbytes);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index da3fefe91a8f..1ad8c93c1b85 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -24,13 +24,6 @@
24 24
25#include "sysfs.h" 25#include "sysfs.h"
26 26
27/* used in crash dumps to help with debugging */
28static char last_sysfs_file[PATH_MAX];
29void sysfs_printk_last_file(void)
30{
31 printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file);
32}
33
34/* 27/*
35 * There's one sysfs_buffer for each open file and one 28 * There's one sysfs_buffer for each open file and one
36 * sysfs_open_dirent for each sysfs_dirent with one or more open 29 * sysfs_open_dirent for each sysfs_dirent with one or more open
@@ -337,11 +330,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
337 struct sysfs_buffer *buffer; 330 struct sysfs_buffer *buffer;
338 const struct sysfs_ops *ops; 331 const struct sysfs_ops *ops;
339 int error = -EACCES; 332 int error = -EACCES;
340 char *p;
341
342 p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
343 if (!IS_ERR(p))
344 memmove(last_sysfs_file, p, strlen(p) + 1);
345 333
346 /* need attr_sd for attr and ops, its parent for kobj */ 334 /* need attr_sd for attr and ops, its parent for kobj */
347 if (!sysfs_get_active(attr_sd)) 335 if (!sysfs_get_active(attr_sd))
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index c8769dc222d8..194414f8298c 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -101,9 +101,9 @@ int sysfs_create_group(struct kobject *kobj,
101} 101}
102 102
103/** 103/**
104 * sysfs_update_group - given a directory kobject, create an attribute group 104 * sysfs_update_group - given a directory kobject, update an attribute group
105 * @kobj: The kobject to create the group on 105 * @kobj: The kobject to update the group on
106 * @grp: The attribute group to create 106 * @grp: The attribute group to update
107 * 107 *
108 * This function updates an attribute group. Unlike 108 * This function updates an attribute group. Unlike
109 * sysfs_create_group(), it will explicitly not warn or error if any 109 * sysfs_create_group(), it will explicitly not warn or error if any
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 8c4fc1425b3e..f67acbdda5e8 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -22,16 +22,24 @@
22#include <linux/anon_inodes.h> 22#include <linux/anon_inodes.h>
23#include <linux/timerfd.h> 23#include <linux/timerfd.h>
24#include <linux/syscalls.h> 24#include <linux/syscalls.h>
25#include <linux/rcupdate.h>
25 26
26struct timerfd_ctx { 27struct timerfd_ctx {
27 struct hrtimer tmr; 28 struct hrtimer tmr;
28 ktime_t tintv; 29 ktime_t tintv;
30 ktime_t moffs;
29 wait_queue_head_t wqh; 31 wait_queue_head_t wqh;
30 u64 ticks; 32 u64 ticks;
31 int expired; 33 int expired;
32 int clockid; 34 int clockid;
35 struct rcu_head rcu;
36 struct list_head clist;
37 bool might_cancel;
33}; 38};
34 39
40static LIST_HEAD(cancel_list);
41static DEFINE_SPINLOCK(cancel_lock);
42
35/* 43/*
36 * This gets called when the timer event triggers. We set the "expired" 44 * This gets called when the timer event triggers. We set the "expired"
37 * flag, but we do not re-arm the timer (in case it's necessary, 45 * flag, but we do not re-arm the timer (in case it's necessary,
@@ -51,6 +59,63 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
51 return HRTIMER_NORESTART; 59 return HRTIMER_NORESTART;
52} 60}
53 61
62/*
63 * Called when the clock was set to cancel the timers in the cancel
64 * list.
65 */
66void timerfd_clock_was_set(void)
67{
68 ktime_t moffs = ktime_get_monotonic_offset();
69 struct timerfd_ctx *ctx;
70 unsigned long flags;
71
72 rcu_read_lock();
73 list_for_each_entry_rcu(ctx, &cancel_list, clist) {
74 if (!ctx->might_cancel)
75 continue;
76 spin_lock_irqsave(&ctx->wqh.lock, flags);
77 if (ctx->moffs.tv64 != moffs.tv64) {
78 ctx->moffs.tv64 = KTIME_MAX;
79 wake_up_locked(&ctx->wqh);
80 }
81 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
82 }
83 rcu_read_unlock();
84}
85
86static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
87{
88 if (ctx->might_cancel) {
89 ctx->might_cancel = false;
90 spin_lock(&cancel_lock);
91 list_del_rcu(&ctx->clist);
92 spin_unlock(&cancel_lock);
93 }
94}
95
96static bool timerfd_canceled(struct timerfd_ctx *ctx)
97{
98 if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
99 return false;
100 ctx->moffs = ktime_get_monotonic_offset();
101 return true;
102}
103
104static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
105{
106 if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) &&
107 (flags & TFD_TIMER_CANCEL_ON_SET)) {
108 if (!ctx->might_cancel) {
109 ctx->might_cancel = true;
110 spin_lock(&cancel_lock);
111 list_add_rcu(&ctx->clist, &cancel_list);
112 spin_unlock(&cancel_lock);
113 }
114 } else if (ctx->might_cancel) {
115 timerfd_remove_cancel(ctx);
116 }
117}
118
54static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) 119static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
55{ 120{
56 ktime_t remaining; 121 ktime_t remaining;
@@ -59,11 +124,12 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
59 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; 124 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
60} 125}
61 126
62static void timerfd_setup(struct timerfd_ctx *ctx, int flags, 127static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
63 const struct itimerspec *ktmr) 128 const struct itimerspec *ktmr)
64{ 129{
65 enum hrtimer_mode htmode; 130 enum hrtimer_mode htmode;
66 ktime_t texp; 131 ktime_t texp;
132 int clockid = ctx->clockid;
67 133
68 htmode = (flags & TFD_TIMER_ABSTIME) ? 134 htmode = (flags & TFD_TIMER_ABSTIME) ?
69 HRTIMER_MODE_ABS: HRTIMER_MODE_REL; 135 HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
@@ -72,19 +138,24 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
72 ctx->expired = 0; 138 ctx->expired = 0;
73 ctx->ticks = 0; 139 ctx->ticks = 0;
74 ctx->tintv = timespec_to_ktime(ktmr->it_interval); 140 ctx->tintv = timespec_to_ktime(ktmr->it_interval);
75 hrtimer_init(&ctx->tmr, ctx->clockid, htmode); 141 hrtimer_init(&ctx->tmr, clockid, htmode);
76 hrtimer_set_expires(&ctx->tmr, texp); 142 hrtimer_set_expires(&ctx->tmr, texp);
77 ctx->tmr.function = timerfd_tmrproc; 143 ctx->tmr.function = timerfd_tmrproc;
78 if (texp.tv64 != 0) 144 if (texp.tv64 != 0) {
79 hrtimer_start(&ctx->tmr, texp, htmode); 145 hrtimer_start(&ctx->tmr, texp, htmode);
146 if (timerfd_canceled(ctx))
147 return -ECANCELED;
148 }
149 return 0;
80} 150}
81 151
82static int timerfd_release(struct inode *inode, struct file *file) 152static int timerfd_release(struct inode *inode, struct file *file)
83{ 153{
84 struct timerfd_ctx *ctx = file->private_data; 154 struct timerfd_ctx *ctx = file->private_data;
85 155
156 timerfd_remove_cancel(ctx);
86 hrtimer_cancel(&ctx->tmr); 157 hrtimer_cancel(&ctx->tmr);
87 kfree(ctx); 158 kfree_rcu(ctx, rcu);
88 return 0; 159 return 0;
89} 160}
90 161
@@ -118,8 +189,21 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
118 res = -EAGAIN; 189 res = -EAGAIN;
119 else 190 else
120 res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); 191 res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
192
193 /*
194 * If clock has changed, we do not care about the
195 * ticks and we do not rearm the timer. Userspace must
196 * reevaluate anyway.
197 */
198 if (timerfd_canceled(ctx)) {
199 ctx->ticks = 0;
200 ctx->expired = 0;
201 res = -ECANCELED;
202 }
203
121 if (ctx->ticks) { 204 if (ctx->ticks) {
122 ticks = ctx->ticks; 205 ticks = ctx->ticks;
206
123 if (ctx->expired && ctx->tintv.tv64) { 207 if (ctx->expired && ctx->tintv.tv64) {
124 /* 208 /*
125 * If tintv.tv64 != 0, this is a periodic timer that 209 * If tintv.tv64 != 0, this is a periodic timer that
@@ -183,6 +267,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
183 init_waitqueue_head(&ctx->wqh); 267 init_waitqueue_head(&ctx->wqh);
184 ctx->clockid = clockid; 268 ctx->clockid = clockid;
185 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 269 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
270 ctx->moffs = ktime_get_monotonic_offset();
186 271
187 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, 272 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
188 O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); 273 O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
@@ -199,6 +284,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
199 struct file *file; 284 struct file *file;
200 struct timerfd_ctx *ctx; 285 struct timerfd_ctx *ctx;
201 struct itimerspec ktmr, kotmr; 286 struct itimerspec ktmr, kotmr;
287 int ret;
202 288
203 if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) 289 if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
204 return -EFAULT; 290 return -EFAULT;
@@ -213,6 +299,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
213 return PTR_ERR(file); 299 return PTR_ERR(file);
214 ctx = file->private_data; 300 ctx = file->private_data;
215 301
302 timerfd_setup_cancel(ctx, flags);
303
216 /* 304 /*
217 * We need to stop the existing timer before reprogramming 305 * We need to stop the existing timer before reprogramming
218 * it to the new values. 306 * it to the new values.
@@ -240,14 +328,14 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
240 /* 328 /*
241 * Re-program the timer to the new value ... 329 * Re-program the timer to the new value ...
242 */ 330 */
243 timerfd_setup(ctx, flags, &ktmr); 331 ret = timerfd_setup(ctx, flags, &ktmr);
244 332
245 spin_unlock_irq(&ctx->wqh.lock); 333 spin_unlock_irq(&ctx->wqh.lock);
246 fput(file); 334 fput(file);
247 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) 335 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
248 return -EFAULT; 336 return -EFAULT;
249 337
250 return 0; 338 return ret;
251} 339}
252 340
253SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) 341SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 8b3a7da531eb..315de66e52b2 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -106,7 +106,7 @@ static long long get_liability(struct ubifs_info *c)
106 long long liab; 106 long long liab;
107 107
108 spin_lock(&c->space_lock); 108 spin_lock(&c->space_lock);
109 liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth; 109 liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth;
110 spin_unlock(&c->space_lock); 110 spin_unlock(&c->space_lock);
111 return liab; 111 return liab;
112} 112}
@@ -180,7 +180,7 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
180 int idx_lebs; 180 int idx_lebs;
181 long long idx_size; 181 long long idx_size;
182 182
183 idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; 183 idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx;
184 /* And make sure we have thrice the index size of space reserved */ 184 /* And make sure we have thrice the index size of space reserved */
185 idx_size += idx_size << 1; 185 idx_size += idx_size << 1;
186 /* 186 /*
@@ -292,13 +292,13 @@ static int can_use_rp(struct ubifs_info *c)
292 * budgeted index space to the size of the current index, multiplies this by 3, 292 * budgeted index space to the size of the current index, multiplies this by 3,
293 * and makes sure this does not exceed the amount of free LEBs. 293 * and makes sure this does not exceed the amount of free LEBs.
294 * 294 *
295 * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: 295 * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables:
296 * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might 296 * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
297 * be large, because UBIFS does not do any index consolidation as long as 297 * be large, because UBIFS does not do any index consolidation as long as
298 * there is free space. IOW, the index may take a lot of LEBs, but the LEBs 298 * there is free space. IOW, the index may take a lot of LEBs, but the LEBs
299 * will contain a lot of dirt. 299 * will contain a lot of dirt.
300 * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW, 300 * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW,
301 * the index may be consolidated to take up to @c->min_idx_lebs LEBs. 301 * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs.
302 * 302 *
303 * This function returns zero in case of success, and %-ENOSPC in case of 303 * This function returns zero in case of success, and %-ENOSPC in case of
304 * failure. 304 * failure.
@@ -343,13 +343,13 @@ static int do_budget_space(struct ubifs_info *c)
343 c->lst.taken_empty_lebs; 343 c->lst.taken_empty_lebs;
344 if (unlikely(rsvd_idx_lebs > lebs)) { 344 if (unlikely(rsvd_idx_lebs > lebs)) {
345 dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " 345 dbg_budg("out of indexing space: min_idx_lebs %d (old %d), "
346 "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs, 346 "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs,
347 rsvd_idx_lebs); 347 rsvd_idx_lebs);
348 return -ENOSPC; 348 return -ENOSPC;
349 } 349 }
350 350
351 available = ubifs_calc_available(c, min_idx_lebs); 351 available = ubifs_calc_available(c, min_idx_lebs);
352 outstanding = c->budg_data_growth + c->budg_dd_growth; 352 outstanding = c->bi.data_growth + c->bi.dd_growth;
353 353
354 if (unlikely(available < outstanding)) { 354 if (unlikely(available < outstanding)) {
355 dbg_budg("out of data space: available %lld, outstanding %lld", 355 dbg_budg("out of data space: available %lld, outstanding %lld",
@@ -360,7 +360,7 @@ static int do_budget_space(struct ubifs_info *c)
360 if (available - outstanding <= c->rp_size && !can_use_rp(c)) 360 if (available - outstanding <= c->rp_size && !can_use_rp(c))
361 return -ENOSPC; 361 return -ENOSPC;
362 362
363 c->min_idx_lebs = min_idx_lebs; 363 c->bi.min_idx_lebs = min_idx_lebs;
364 return 0; 364 return 0;
365} 365}
366 366
@@ -393,11 +393,11 @@ static int calc_data_growth(const struct ubifs_info *c,
393{ 393{
394 int data_growth; 394 int data_growth;
395 395
396 data_growth = req->new_ino ? c->inode_budget : 0; 396 data_growth = req->new_ino ? c->bi.inode_budget : 0;
397 if (req->new_page) 397 if (req->new_page)
398 data_growth += c->page_budget; 398 data_growth += c->bi.page_budget;
399 if (req->new_dent) 399 if (req->new_dent)
400 data_growth += c->dent_budget; 400 data_growth += c->bi.dent_budget;
401 data_growth += req->new_ino_d; 401 data_growth += req->new_ino_d;
402 return data_growth; 402 return data_growth;
403} 403}
@@ -413,12 +413,12 @@ static int calc_dd_growth(const struct ubifs_info *c,
413{ 413{
414 int dd_growth; 414 int dd_growth;
415 415
416 dd_growth = req->dirtied_page ? c->page_budget : 0; 416 dd_growth = req->dirtied_page ? c->bi.page_budget : 0;
417 417
418 if (req->dirtied_ino) 418 if (req->dirtied_ino)
419 dd_growth += c->inode_budget << (req->dirtied_ino - 1); 419 dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1);
420 if (req->mod_dent) 420 if (req->mod_dent)
421 dd_growth += c->dent_budget; 421 dd_growth += c->bi.dent_budget;
422 dd_growth += req->dirtied_ino_d; 422 dd_growth += req->dirtied_ino_d;
423 return dd_growth; 423 return dd_growth;
424} 424}
@@ -460,19 +460,19 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
460 460
461again: 461again:
462 spin_lock(&c->space_lock); 462 spin_lock(&c->space_lock);
463 ubifs_assert(c->budg_idx_growth >= 0); 463 ubifs_assert(c->bi.idx_growth >= 0);
464 ubifs_assert(c->budg_data_growth >= 0); 464 ubifs_assert(c->bi.data_growth >= 0);
465 ubifs_assert(c->budg_dd_growth >= 0); 465 ubifs_assert(c->bi.dd_growth >= 0);
466 466
467 if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) { 467 if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) {
468 dbg_budg("no space"); 468 dbg_budg("no space");
469 spin_unlock(&c->space_lock); 469 spin_unlock(&c->space_lock);
470 return -ENOSPC; 470 return -ENOSPC;
471 } 471 }
472 472
473 c->budg_idx_growth += idx_growth; 473 c->bi.idx_growth += idx_growth;
474 c->budg_data_growth += data_growth; 474 c->bi.data_growth += data_growth;
475 c->budg_dd_growth += dd_growth; 475 c->bi.dd_growth += dd_growth;
476 476
477 err = do_budget_space(c); 477 err = do_budget_space(c);
478 if (likely(!err)) { 478 if (likely(!err)) {
@@ -484,9 +484,9 @@ again:
484 } 484 }
485 485
486 /* Restore the old values */ 486 /* Restore the old values */
487 c->budg_idx_growth -= idx_growth; 487 c->bi.idx_growth -= idx_growth;
488 c->budg_data_growth -= data_growth; 488 c->bi.data_growth -= data_growth;
489 c->budg_dd_growth -= dd_growth; 489 c->bi.dd_growth -= dd_growth;
490 spin_unlock(&c->space_lock); 490 spin_unlock(&c->space_lock);
491 491
492 if (req->fast) { 492 if (req->fast) {
@@ -506,9 +506,9 @@ again:
506 goto again; 506 goto again;
507 } 507 }
508 dbg_budg("FS is full, -ENOSPC"); 508 dbg_budg("FS is full, -ENOSPC");
509 c->nospace = 1; 509 c->bi.nospace = 1;
510 if (can_use_rp(c) || c->rp_size == 0) 510 if (can_use_rp(c) || c->rp_size == 0)
511 c->nospace_rp = 1; 511 c->bi.nospace_rp = 1;
512 smp_wmb(); 512 smp_wmb();
513 } else 513 } else
514 ubifs_err("cannot budget space, error %d", err); 514 ubifs_err("cannot budget space, error %d", err);
@@ -523,8 +523,8 @@ again:
523 * This function releases the space budgeted by 'ubifs_budget_space()'. Note, 523 * This function releases the space budgeted by 'ubifs_budget_space()'. Note,
524 * since the index changes (which were budgeted for in @req->idx_growth) will 524 * since the index changes (which were budgeted for in @req->idx_growth) will
525 * only be written to the media on commit, this function moves the index budget 525 * only be written to the media on commit, this function moves the index budget
526 * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be 526 * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed
527 * zeroed by the commit operation. 527 * by the commit operation.
528 */ 528 */
529void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) 529void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
530{ 530{
@@ -553,23 +553,23 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
553 if (!req->data_growth && !req->dd_growth) 553 if (!req->data_growth && !req->dd_growth)
554 return; 554 return;
555 555
556 c->nospace = c->nospace_rp = 0; 556 c->bi.nospace = c->bi.nospace_rp = 0;
557 smp_wmb(); 557 smp_wmb();
558 558
559 spin_lock(&c->space_lock); 559 spin_lock(&c->space_lock);
560 c->budg_idx_growth -= req->idx_growth; 560 c->bi.idx_growth -= req->idx_growth;
561 c->budg_uncommitted_idx += req->idx_growth; 561 c->bi.uncommitted_idx += req->idx_growth;
562 c->budg_data_growth -= req->data_growth; 562 c->bi.data_growth -= req->data_growth;
563 c->budg_dd_growth -= req->dd_growth; 563 c->bi.dd_growth -= req->dd_growth;
564 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 564 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
565 565
566 ubifs_assert(c->budg_idx_growth >= 0); 566 ubifs_assert(c->bi.idx_growth >= 0);
567 ubifs_assert(c->budg_data_growth >= 0); 567 ubifs_assert(c->bi.data_growth >= 0);
568 ubifs_assert(c->budg_dd_growth >= 0); 568 ubifs_assert(c->bi.dd_growth >= 0);
569 ubifs_assert(c->min_idx_lebs < c->main_lebs); 569 ubifs_assert(c->bi.min_idx_lebs < c->main_lebs);
570 ubifs_assert(!(c->budg_idx_growth & 7)); 570 ubifs_assert(!(c->bi.idx_growth & 7));
571 ubifs_assert(!(c->budg_data_growth & 7)); 571 ubifs_assert(!(c->bi.data_growth & 7));
572 ubifs_assert(!(c->budg_dd_growth & 7)); 572 ubifs_assert(!(c->bi.dd_growth & 7));
573 spin_unlock(&c->space_lock); 573 spin_unlock(&c->space_lock);
574} 574}
575 575
@@ -586,13 +586,13 @@ void ubifs_convert_page_budget(struct ubifs_info *c)
586{ 586{
587 spin_lock(&c->space_lock); 587 spin_lock(&c->space_lock);
588 /* Release the index growth reservation */ 588 /* Release the index growth reservation */
589 c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; 589 c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
590 /* Release the data growth reservation */ 590 /* Release the data growth reservation */
591 c->budg_data_growth -= c->page_budget; 591 c->bi.data_growth -= c->bi.page_budget;
592 /* Increase the dirty data growth reservation instead */ 592 /* Increase the dirty data growth reservation instead */
593 c->budg_dd_growth += c->page_budget; 593 c->bi.dd_growth += c->bi.page_budget;
594 /* And re-calculate the indexing space reservation */ 594 /* And re-calculate the indexing space reservation */
595 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 595 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
596 spin_unlock(&c->space_lock); 596 spin_unlock(&c->space_lock);
597} 597}
598 598
@@ -612,7 +612,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
612 612
613 memset(&req, 0, sizeof(struct ubifs_budget_req)); 613 memset(&req, 0, sizeof(struct ubifs_budget_req));
614 /* The "no space" flags will be cleared because dd_growth is > 0 */ 614 /* The "no space" flags will be cleared because dd_growth is > 0 */
615 req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); 615 req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8);
616 ubifs_release_budget(c, &req); 616 ubifs_release_budget(c, &req);
617} 617}
618 618
@@ -682,9 +682,9 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c)
682 int rsvd_idx_lebs, lebs; 682 int rsvd_idx_lebs, lebs;
683 long long available, outstanding, free; 683 long long available, outstanding, free;
684 684
685 ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); 685 ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
686 outstanding = c->budg_data_growth + c->budg_dd_growth; 686 outstanding = c->bi.data_growth + c->bi.dd_growth;
687 available = ubifs_calc_available(c, c->min_idx_lebs); 687 available = ubifs_calc_available(c, c->bi.min_idx_lebs);
688 688
689 /* 689 /*
690 * When reporting free space to user-space, UBIFS guarantees that it is 690 * When reporting free space to user-space, UBIFS guarantees that it is
@@ -697,8 +697,8 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c)
697 * Note, the calculations below are similar to what we have in 697 * Note, the calculations below are similar to what we have in
698 * 'do_budget_space()', so refer there for comments. 698 * 'do_budget_space()', so refer there for comments.
699 */ 699 */
700 if (c->min_idx_lebs > c->lst.idx_lebs) 700 if (c->bi.min_idx_lebs > c->lst.idx_lebs)
701 rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; 701 rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
702 else 702 else
703 rsvd_idx_lebs = 0; 703 rsvd_idx_lebs = 0;
704 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - 704 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 1bd01ded7123..87cd0ead8633 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -182,7 +182,7 @@ static int do_commit(struct ubifs_info *c)
182 c->mst_node->root_len = cpu_to_le32(zroot.len); 182 c->mst_node->root_len = cpu_to_le32(zroot.len);
183 c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); 183 c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum);
184 c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); 184 c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs);
185 c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); 185 c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz);
186 c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); 186 c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum);
187 c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); 187 c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs);
188 c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); 188 c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 004d3745dc45..0bb2bcef0de9 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -34,7 +34,6 @@
34#include <linux/moduleparam.h> 34#include <linux/moduleparam.h>
35#include <linux/debugfs.h> 35#include <linux/debugfs.h>
36#include <linux/math64.h> 36#include <linux/math64.h>
37#include <linux/slab.h>
38 37
39#ifdef CONFIG_UBIFS_FS_DEBUG 38#ifdef CONFIG_UBIFS_FS_DEBUG
40 39
@@ -43,15 +42,12 @@ DEFINE_SPINLOCK(dbg_lock);
43static char dbg_key_buf0[128]; 42static char dbg_key_buf0[128];
44static char dbg_key_buf1[128]; 43static char dbg_key_buf1[128];
45 44
46unsigned int ubifs_msg_flags;
47unsigned int ubifs_chk_flags; 45unsigned int ubifs_chk_flags;
48unsigned int ubifs_tst_flags; 46unsigned int ubifs_tst_flags;
49 47
50module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR);
51module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); 48module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR);
52module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); 49module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR);
53 50
54MODULE_PARM_DESC(debug_msgs, "Debug message type flags");
55MODULE_PARM_DESC(debug_chks, "Debug check flags"); 51MODULE_PARM_DESC(debug_chks, "Debug check flags");
56MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); 52MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
57 53
@@ -317,6 +313,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
317 printk(KERN_DEBUG "\tflags %#x\n", sup_flags); 313 printk(KERN_DEBUG "\tflags %#x\n", sup_flags);
318 printk(KERN_DEBUG "\t big_lpt %u\n", 314 printk(KERN_DEBUG "\t big_lpt %u\n",
319 !!(sup_flags & UBIFS_FLG_BIGLPT)); 315 !!(sup_flags & UBIFS_FLG_BIGLPT));
316 printk(KERN_DEBUG "\t space_fixup %u\n",
317 !!(sup_flags & UBIFS_FLG_SPACE_FIXUP));
320 printk(KERN_DEBUG "\tmin_io_size %u\n", 318 printk(KERN_DEBUG "\tmin_io_size %u\n",
321 le32_to_cpu(sup->min_io_size)); 319 le32_to_cpu(sup->min_io_size));
322 printk(KERN_DEBUG "\tleb_size %u\n", 320 printk(KERN_DEBUG "\tleb_size %u\n",
@@ -602,7 +600,7 @@ void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
602 spin_unlock(&dbg_lock); 600 spin_unlock(&dbg_lock);
603} 601}
604 602
605void dbg_dump_budg(struct ubifs_info *c) 603void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
606{ 604{
607 int i; 605 int i;
608 struct rb_node *rb; 606 struct rb_node *rb;
@@ -610,26 +608,42 @@ void dbg_dump_budg(struct ubifs_info *c)
610 struct ubifs_gced_idx_leb *idx_gc; 608 struct ubifs_gced_idx_leb *idx_gc;
611 long long available, outstanding, free; 609 long long available, outstanding, free;
612 610
613 ubifs_assert(spin_is_locked(&c->space_lock)); 611 spin_lock(&c->space_lock);
614 spin_lock(&dbg_lock); 612 spin_lock(&dbg_lock);
615 printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " 613 printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, "
616 "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, 614 "total budget sum %lld\n", current->pid,
617 c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); 615 bi->data_growth + bi->dd_growth,
618 printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " 616 bi->data_growth + bi->dd_growth + bi->idx_growth);
619 "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, 617 printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, "
620 c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth, 618 "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth,
621 c->freeable_cnt); 619 bi->idx_growth);
622 printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " 620 printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, "
623 "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs, 621 "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz,
624 c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); 622 bi->uncommitted_idx);
623 printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n",
624 bi->page_budget, bi->inode_budget, bi->dent_budget);
625 printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n",
626 bi->nospace, bi->nospace_rp);
627 printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
628 c->dark_wm, c->dead_wm, c->max_idx_node_sz);
629
630 if (bi != &c->bi)
631 /*
632 * If we are dumping saved budgeting data, do not print
633 * additional information which is about the current state, not
634 * the old one which corresponded to the saved budgeting data.
635 */
636 goto out_unlock;
637
638 printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
639 c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt);
625 printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " 640 printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
626 "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), 641 "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt),
627 atomic_long_read(&c->dirty_zn_cnt), 642 atomic_long_read(&c->dirty_zn_cnt),
628 atomic_long_read(&c->clean_zn_cnt)); 643 atomic_long_read(&c->clean_zn_cnt));
629 printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
630 c->dark_wm, c->dead_wm, c->max_idx_node_sz);
631 printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", 644 printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n",
632 c->gc_lnum, c->ihead_lnum); 645 c->gc_lnum, c->ihead_lnum);
646
633 /* If we are in R/O mode, journal heads do not exist */ 647 /* If we are in R/O mode, journal heads do not exist */
634 if (c->jheads) 648 if (c->jheads)
635 for (i = 0; i < c->jhead_cnt; i++) 649 for (i = 0; i < c->jhead_cnt; i++)
@@ -648,13 +662,15 @@ void dbg_dump_budg(struct ubifs_info *c)
648 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); 662 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state);
649 663
650 /* Print budgeting predictions */ 664 /* Print budgeting predictions */
651 available = ubifs_calc_available(c, c->min_idx_lebs); 665 available = ubifs_calc_available(c, c->bi.min_idx_lebs);
652 outstanding = c->budg_data_growth + c->budg_dd_growth; 666 outstanding = c->bi.data_growth + c->bi.dd_growth;
653 free = ubifs_get_free_space_nolock(c); 667 free = ubifs_get_free_space_nolock(c);
654 printk(KERN_DEBUG "Budgeting predictions:\n"); 668 printk(KERN_DEBUG "Budgeting predictions:\n");
655 printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", 669 printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n",
656 available, outstanding, free); 670 available, outstanding, free);
671out_unlock:
657 spin_unlock(&dbg_lock); 672 spin_unlock(&dbg_lock);
673 spin_unlock(&c->space_lock);
658} 674}
659 675
660void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) 676void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
@@ -729,7 +745,13 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
729 if (bud->lnum == lp->lnum) { 745 if (bud->lnum == lp->lnum) {
730 int head = 0; 746 int head = 0;
731 for (i = 0; i < c->jhead_cnt; i++) { 747 for (i = 0; i < c->jhead_cnt; i++) {
732 if (lp->lnum == c->jheads[i].wbuf.lnum) { 748 /*
749 * Note, if we are in R/O mode or in the middle
750 * of mounting/re-mounting, the write-buffers do
751 * not exist.
752 */
753 if (c->jheads &&
754 lp->lnum == c->jheads[i].wbuf.lnum) {
733 printk(KERN_CONT ", jhead %s", 755 printk(KERN_CONT ", jhead %s",
734 dbg_jhead(i)); 756 dbg_jhead(i));
735 head = 1; 757 head = 1;
@@ -976,6 +998,8 @@ void dbg_save_space_info(struct ubifs_info *c)
976 998
977 spin_lock(&c->space_lock); 999 spin_lock(&c->space_lock);
978 memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats)); 1000 memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats));
1001 memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info));
1002 d->saved_idx_gc_cnt = c->idx_gc_cnt;
979 1003
980 /* 1004 /*
981 * We use a dirty hack here and zero out @c->freeable_cnt, because it 1005 * We use a dirty hack here and zero out @c->freeable_cnt, because it
@@ -1042,14 +1066,14 @@ int dbg_check_space_info(struct ubifs_info *c)
1042out: 1066out:
1043 ubifs_msg("saved lprops statistics dump"); 1067 ubifs_msg("saved lprops statistics dump");
1044 dbg_dump_lstats(&d->saved_lst); 1068 dbg_dump_lstats(&d->saved_lst);
1045 ubifs_get_lp_stats(c, &lst); 1069 ubifs_msg("saved budgeting info dump");
1046 1070 dbg_dump_budg(c, &d->saved_bi);
1071 ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt);
1047 ubifs_msg("current lprops statistics dump"); 1072 ubifs_msg("current lprops statistics dump");
1073 ubifs_get_lp_stats(c, &lst);
1048 dbg_dump_lstats(&lst); 1074 dbg_dump_lstats(&lst);
1049 1075 ubifs_msg("current budgeting info dump");
1050 spin_lock(&c->space_lock); 1076 dbg_dump_budg(c, &c->bi);
1051 dbg_dump_budg(c);
1052 spin_unlock(&c->space_lock);
1053 dump_stack(); 1077 dump_stack();
1054 return -EINVAL; 1078 return -EINVAL;
1055} 1079}
@@ -1793,6 +1817,8 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
1793 struct rb_node **p, *parent = NULL; 1817 struct rb_node **p, *parent = NULL;
1794 struct fsck_inode *fscki; 1818 struct fsck_inode *fscki;
1795 ino_t inum = key_inum_flash(c, &ino->key); 1819 ino_t inum = key_inum_flash(c, &ino->key);
1820 struct inode *inode;
1821 struct ubifs_inode *ui;
1796 1822
1797 p = &fsckd->inodes.rb_node; 1823 p = &fsckd->inodes.rb_node;
1798 while (*p) { 1824 while (*p) {
@@ -1816,19 +1842,46 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
1816 if (!fscki) 1842 if (!fscki)
1817 return ERR_PTR(-ENOMEM); 1843 return ERR_PTR(-ENOMEM);
1818 1844
1845 inode = ilookup(c->vfs_sb, inum);
1846
1819 fscki->inum = inum; 1847 fscki->inum = inum;
1820 fscki->nlink = le32_to_cpu(ino->nlink); 1848 /*
1821 fscki->size = le64_to_cpu(ino->size); 1849 * If the inode is present in the VFS inode cache, use it instead of
1822 fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); 1850 * the on-flash inode which might be out-of-date. E.g., the size might
1823 fscki->xattr_sz = le32_to_cpu(ino->xattr_size); 1851 * be out-of-date. If we do not do this, the following may happen, for
1824 fscki->xattr_nms = le32_to_cpu(ino->xattr_names); 1852 * example:
1825 fscki->mode = le32_to_cpu(ino->mode); 1853 * 1. A power cut happens
1854 * 2. We mount the file-system R/O, the replay process fixes up the
1855 * inode size in the VFS cache, but on on-flash.
1856 * 3. 'check_leaf()' fails because it hits a data node beyond inode
1857 * size.
1858 */
1859 if (!inode) {
1860 fscki->nlink = le32_to_cpu(ino->nlink);
1861 fscki->size = le64_to_cpu(ino->size);
1862 fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
1863 fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
1864 fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
1865 fscki->mode = le32_to_cpu(ino->mode);
1866 } else {
1867 ui = ubifs_inode(inode);
1868 fscki->nlink = inode->i_nlink;
1869 fscki->size = inode->i_size;
1870 fscki->xattr_cnt = ui->xattr_cnt;
1871 fscki->xattr_sz = ui->xattr_size;
1872 fscki->xattr_nms = ui->xattr_names;
1873 fscki->mode = inode->i_mode;
1874 iput(inode);
1875 }
1876
1826 if (S_ISDIR(fscki->mode)) { 1877 if (S_ISDIR(fscki->mode)) {
1827 fscki->calc_sz = UBIFS_INO_NODE_SZ; 1878 fscki->calc_sz = UBIFS_INO_NODE_SZ;
1828 fscki->calc_cnt = 2; 1879 fscki->calc_cnt = 2;
1829 } 1880 }
1881
1830 rb_link_node(&fscki->rb, parent, p); 1882 rb_link_node(&fscki->rb, parent, p);
1831 rb_insert_color(&fscki->rb, &fsckd->inodes); 1883 rb_insert_color(&fscki->rb, &fsckd->inodes);
1884
1832 return fscki; 1885 return fscki;
1833} 1886}
1834 1887
@@ -2421,7 +2474,8 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
2421 hashb = key_block(c, &sb->key); 2474 hashb = key_block(c, &sb->key);
2422 2475
2423 if (hasha > hashb) { 2476 if (hasha > hashb) {
2424 ubifs_err("larger hash %u goes before %u", hasha, hashb); 2477 ubifs_err("larger hash %u goes before %u",
2478 hasha, hashb);
2425 goto error_dump; 2479 goto error_dump;
2426 } 2480 }
2427 } 2481 }
@@ -2437,14 +2491,12 @@ error_dump:
2437 return 0; 2491 return 0;
2438} 2492}
2439 2493
2440static int invocation_cnt;
2441
2442int dbg_force_in_the_gaps(void) 2494int dbg_force_in_the_gaps(void)
2443{ 2495{
2444 if (!dbg_force_in_the_gaps_enabled) 2496 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
2445 return 0; 2497 return 0;
2446 /* Force in-the-gaps every 8th commit */ 2498
2447 return !((invocation_cnt++) & 0x7); 2499 return !(random32() & 7);
2448} 2500}
2449 2501
2450/* Failure mode for recovery testing */ 2502/* Failure mode for recovery testing */
@@ -2632,7 +2684,7 @@ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
2632 int len, int check) 2684 int len, int check)
2633{ 2685{
2634 if (in_failure_mode(desc)) 2686 if (in_failure_mode(desc))
2635 return -EIO; 2687 return -EROFS;
2636 return ubi_leb_read(desc, lnum, buf, offset, len, check); 2688 return ubi_leb_read(desc, lnum, buf, offset, len, check);
2637} 2689}
2638 2690
@@ -2642,7 +2694,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
2642 int err, failing; 2694 int err, failing;
2643 2695
2644 if (in_failure_mode(desc)) 2696 if (in_failure_mode(desc))
2645 return -EIO; 2697 return -EROFS;
2646 failing = do_fail(desc, lnum, 1); 2698 failing = do_fail(desc, lnum, 1);
2647 if (failing) 2699 if (failing)
2648 cut_data(buf, len); 2700 cut_data(buf, len);
@@ -2650,7 +2702,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
2650 if (err) 2702 if (err)
2651 return err; 2703 return err;
2652 if (failing) 2704 if (failing)
2653 return -EIO; 2705 return -EROFS;
2654 return 0; 2706 return 0;
2655} 2707}
2656 2708
@@ -2660,12 +2712,12 @@ int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
2660 int err; 2712 int err;
2661 2713
2662 if (do_fail(desc, lnum, 1)) 2714 if (do_fail(desc, lnum, 1))
2663 return -EIO; 2715 return -EROFS;
2664 err = ubi_leb_change(desc, lnum, buf, len, dtype); 2716 err = ubi_leb_change(desc, lnum, buf, len, dtype);
2665 if (err) 2717 if (err)
2666 return err; 2718 return err;
2667 if (do_fail(desc, lnum, 1)) 2719 if (do_fail(desc, lnum, 1))
2668 return -EIO; 2720 return -EROFS;
2669 return 0; 2721 return 0;
2670} 2722}
2671 2723
@@ -2674,12 +2726,12 @@ int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum)
2674 int err; 2726 int err;
2675 2727
2676 if (do_fail(desc, lnum, 0)) 2728 if (do_fail(desc, lnum, 0))
2677 return -EIO; 2729 return -EROFS;
2678 err = ubi_leb_erase(desc, lnum); 2730 err = ubi_leb_erase(desc, lnum);
2679 if (err) 2731 if (err)
2680 return err; 2732 return err;
2681 if (do_fail(desc, lnum, 0)) 2733 if (do_fail(desc, lnum, 0))
2682 return -EIO; 2734 return -EROFS;
2683 return 0; 2735 return 0;
2684} 2736}
2685 2737
@@ -2688,19 +2740,19 @@ int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum)
2688 int err; 2740 int err;
2689 2741
2690 if (do_fail(desc, lnum, 0)) 2742 if (do_fail(desc, lnum, 0))
2691 return -EIO; 2743 return -EROFS;
2692 err = ubi_leb_unmap(desc, lnum); 2744 err = ubi_leb_unmap(desc, lnum);
2693 if (err) 2745 if (err)
2694 return err; 2746 return err;
2695 if (do_fail(desc, lnum, 0)) 2747 if (do_fail(desc, lnum, 0))
2696 return -EIO; 2748 return -EROFS;
2697 return 0; 2749 return 0;
2698} 2750}
2699 2751
2700int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) 2752int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum)
2701{ 2753{
2702 if (in_failure_mode(desc)) 2754 if (in_failure_mode(desc))
2703 return -EIO; 2755 return -EROFS;
2704 return ubi_is_mapped(desc, lnum); 2756 return ubi_is_mapped(desc, lnum);
2705} 2757}
2706 2758
@@ -2709,12 +2761,12 @@ int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
2709 int err; 2761 int err;
2710 2762
2711 if (do_fail(desc, lnum, 0)) 2763 if (do_fail(desc, lnum, 0))
2712 return -EIO; 2764 return -EROFS;
2713 err = ubi_leb_map(desc, lnum, dtype); 2765 err = ubi_leb_map(desc, lnum, dtype);
2714 if (err) 2766 if (err)
2715 return err; 2767 return err;
2716 if (do_fail(desc, lnum, 0)) 2768 if (do_fail(desc, lnum, 0))
2717 return -EIO; 2769 return -EROFS;
2718 return 0; 2770 return 0;
2719} 2771}
2720 2772
@@ -2784,7 +2836,7 @@ void dbg_debugfs_exit(void)
2784static int open_debugfs_file(struct inode *inode, struct file *file) 2836static int open_debugfs_file(struct inode *inode, struct file *file)
2785{ 2837{
2786 file->private_data = inode->i_private; 2838 file->private_data = inode->i_private;
2787 return 0; 2839 return nonseekable_open(inode, file);
2788} 2840}
2789 2841
2790static ssize_t write_debugfs_file(struct file *file, const char __user *buf, 2842static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
@@ -2795,18 +2847,15 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
2795 2847
2796 if (file->f_path.dentry == d->dfs_dump_lprops) 2848 if (file->f_path.dentry == d->dfs_dump_lprops)
2797 dbg_dump_lprops(c); 2849 dbg_dump_lprops(c);
2798 else if (file->f_path.dentry == d->dfs_dump_budg) { 2850 else if (file->f_path.dentry == d->dfs_dump_budg)
2799 spin_lock(&c->space_lock); 2851 dbg_dump_budg(c, &c->bi);
2800 dbg_dump_budg(c); 2852 else if (file->f_path.dentry == d->dfs_dump_tnc) {
2801 spin_unlock(&c->space_lock);
2802 } else if (file->f_path.dentry == d->dfs_dump_tnc) {
2803 mutex_lock(&c->tnc_mutex); 2853 mutex_lock(&c->tnc_mutex);
2804 dbg_dump_tnc(c); 2854 dbg_dump_tnc(c);
2805 mutex_unlock(&c->tnc_mutex); 2855 mutex_unlock(&c->tnc_mutex);
2806 } else 2856 } else
2807 return -EINVAL; 2857 return -EINVAL;
2808 2858
2809 *ppos += count;
2810 return count; 2859 return count;
2811} 2860}
2812 2861
@@ -2814,7 +2863,7 @@ static const struct file_operations dfs_fops = {
2814 .open = open_debugfs_file, 2863 .open = open_debugfs_file,
2815 .write = write_debugfs_file, 2864 .write = write_debugfs_file,
2816 .owner = THIS_MODULE, 2865 .owner = THIS_MODULE,
2817 .llseek = default_llseek, 2866 .llseek = no_llseek,
2818}; 2867};
2819 2868
2820/** 2869/**
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index e6493cac193d..a811ac4a26bb 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -31,6 +31,8 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
31 31
32#ifdef CONFIG_UBIFS_FS_DEBUG 32#ifdef CONFIG_UBIFS_FS_DEBUG
33 33
34#include <linux/random.h>
35
34/** 36/**
35 * ubifs_debug_info - per-FS debugging information. 37 * ubifs_debug_info - per-FS debugging information.
36 * @old_zroot: old index root - used by 'dbg_check_old_index()' 38 * @old_zroot: old index root - used by 'dbg_check_old_index()'
@@ -50,13 +52,15 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
50 * @new_ihead_offs: used by debugging to check @c->ihead_offs 52 * @new_ihead_offs: used by debugging to check @c->ihead_offs
51 * 53 *
52 * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') 54 * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()')
53 * @saved_free: saved free space (used by 'dbg_save_space_info()') 55 * @saved_bi: saved budgeting information
56 * @saved_free: saved amount of free space
57 * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt
54 * 58 *
55 * dfs_dir_name: name of debugfs directory containing this file-system's files 59 * @dfs_dir_name: name of debugfs directory containing this file-system's files
56 * dfs_dir: direntry object of the file-system debugfs directory 60 * @dfs_dir: direntry object of the file-system debugfs directory
57 * dfs_dump_lprops: "dump lprops" debugfs knob 61 * @dfs_dump_lprops: "dump lprops" debugfs knob
58 * dfs_dump_budg: "dump budgeting information" debugfs knob 62 * @dfs_dump_budg: "dump budgeting information" debugfs knob
59 * dfs_dump_tnc: "dump TNC" debugfs knob 63 * @dfs_dump_tnc: "dump TNC" debugfs knob
60 */ 64 */
61struct ubifs_debug_info { 65struct ubifs_debug_info {
62 struct ubifs_zbranch old_zroot; 66 struct ubifs_zbranch old_zroot;
@@ -76,7 +80,9 @@ struct ubifs_debug_info {
76 int new_ihead_offs; 80 int new_ihead_offs;
77 81
78 struct ubifs_lp_stats saved_lst; 82 struct ubifs_lp_stats saved_lst;
83 struct ubifs_budg_info saved_bi;
79 long long saved_free; 84 long long saved_free;
85 int saved_idx_gc_cnt;
80 86
81 char dfs_dir_name[100]; 87 char dfs_dir_name[100];
82 struct dentry *dfs_dir; 88 struct dentry *dfs_dir;
@@ -101,23 +107,7 @@ struct ubifs_debug_info {
101 } \ 107 } \
102} while (0) 108} while (0)
103 109
104#define dbg_dump_stack() do { \ 110#define dbg_dump_stack() dump_stack()
105 if (!dbg_failure_mode) \
106 dump_stack(); \
107} while (0)
108
109/* Generic debugging messages */
110#define dbg_msg(fmt, ...) do { \
111 spin_lock(&dbg_lock); \
112 printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \
113 __func__, ##__VA_ARGS__); \
114 spin_unlock(&dbg_lock); \
115} while (0)
116
117#define dbg_do_msg(typ, fmt, ...) do { \
118 if (ubifs_msg_flags & typ) \
119 dbg_msg(fmt, ##__VA_ARGS__); \
120} while (0)
121 111
122#define dbg_err(fmt, ...) do { \ 112#define dbg_err(fmt, ...) do { \
123 spin_lock(&dbg_lock); \ 113 spin_lock(&dbg_lock); \
@@ -137,77 +127,40 @@ const char *dbg_key_str1(const struct ubifs_info *c,
137#define DBGKEY(key) dbg_key_str0(c, (key)) 127#define DBGKEY(key) dbg_key_str0(c, (key))
138#define DBGKEY1(key) dbg_key_str1(c, (key)) 128#define DBGKEY1(key) dbg_key_str1(c, (key))
139 129
140/* General messages */ 130#define ubifs_dbg_msg(type, fmt, ...) do { \
141#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) 131 spin_lock(&dbg_lock); \
132 pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \
133 spin_unlock(&dbg_lock); \
134} while (0)
142 135
136/* Just a debugging messages not related to any specific UBIFS subsystem */
137#define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__)
138/* General messages */
139#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__)
143/* Additional journal messages */ 140/* Additional journal messages */
144#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) 141#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__)
145
146/* Additional TNC messages */ 142/* Additional TNC messages */
147#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) 143#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__)
148
149/* Additional lprops messages */ 144/* Additional lprops messages */
150#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) 145#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__)
151
152/* Additional LEB find messages */ 146/* Additional LEB find messages */
153#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) 147#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__)
154
155/* Additional mount messages */ 148/* Additional mount messages */
156#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) 149#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__)
157
158/* Additional I/O messages */ 150/* Additional I/O messages */
159#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) 151#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__)
160
161/* Additional commit messages */ 152/* Additional commit messages */
162#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) 153#define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__)
163
164/* Additional budgeting messages */ 154/* Additional budgeting messages */
165#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) 155#define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__)
166
167/* Additional log messages */ 156/* Additional log messages */
168#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) 157#define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__)
169
170/* Additional gc messages */ 158/* Additional gc messages */
171#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) 159#define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__)
172
173/* Additional scan messages */ 160/* Additional scan messages */
174#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) 161#define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__)
175
176/* Additional recovery messages */ 162/* Additional recovery messages */
177#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) 163#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
178
179/*
180 * Debugging message type flags.
181 *
182 * UBIFS_MSG_GEN: general messages
183 * UBIFS_MSG_JNL: journal messages
184 * UBIFS_MSG_MNT: mount messages
185 * UBIFS_MSG_CMT: commit messages
186 * UBIFS_MSG_FIND: LEB find messages
187 * UBIFS_MSG_BUDG: budgeting messages
188 * UBIFS_MSG_GC: garbage collection messages
189 * UBIFS_MSG_TNC: TNC messages
190 * UBIFS_MSG_LP: lprops messages
191 * UBIFS_MSG_IO: I/O messages
192 * UBIFS_MSG_LOG: log messages
193 * UBIFS_MSG_SCAN: scan messages
194 * UBIFS_MSG_RCVRY: recovery messages
195 */
196enum {
197 UBIFS_MSG_GEN = 0x1,
198 UBIFS_MSG_JNL = 0x2,
199 UBIFS_MSG_MNT = 0x4,
200 UBIFS_MSG_CMT = 0x8,
201 UBIFS_MSG_FIND = 0x10,
202 UBIFS_MSG_BUDG = 0x20,
203 UBIFS_MSG_GC = 0x40,
204 UBIFS_MSG_TNC = 0x80,
205 UBIFS_MSG_LP = 0x100,
206 UBIFS_MSG_IO = 0x200,
207 UBIFS_MSG_LOG = 0x400,
208 UBIFS_MSG_SCAN = 0x800,
209 UBIFS_MSG_RCVRY = 0x1000,
210};
211 164
212/* 165/*
213 * Debugging check flags. 166 * Debugging check flags.
@@ -233,11 +186,9 @@ enum {
233/* 186/*
234 * Special testing flags. 187 * Special testing flags.
235 * 188 *
236 * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method
237 * UBIFS_TST_RCVRY: failure mode for recovery testing 189 * UBIFS_TST_RCVRY: failure mode for recovery testing
238 */ 190 */
239enum { 191enum {
240 UBIFS_TST_FORCE_IN_THE_GAPS = 0x2,
241 UBIFS_TST_RCVRY = 0x4, 192 UBIFS_TST_RCVRY = 0x4,
242}; 193};
243 194
@@ -262,7 +213,7 @@ void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum,
262 int offs); 213 int offs);
263void dbg_dump_budget_req(const struct ubifs_budget_req *req); 214void dbg_dump_budget_req(const struct ubifs_budget_req *req);
264void dbg_dump_lstats(const struct ubifs_lp_stats *lst); 215void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
265void dbg_dump_budg(struct ubifs_info *c); 216void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi);
266void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); 217void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp);
267void dbg_dump_lprops(struct ubifs_info *c); 218void dbg_dump_lprops(struct ubifs_info *c);
268void dbg_dump_lpt_info(struct ubifs_info *c); 219void dbg_dump_lpt_info(struct ubifs_info *c);
@@ -304,18 +255,16 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head);
304int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); 255int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head);
305 256
306/* Force the use of in-the-gaps method for testing */ 257/* Force the use of in-the-gaps method for testing */
307 258static inline int dbg_force_in_the_gaps_enabled(void)
308#define dbg_force_in_the_gaps_enabled \ 259{
309 (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS) 260 return ubifs_chk_flags & UBIFS_CHK_GEN;
310 261}
311int dbg_force_in_the_gaps(void); 262int dbg_force_in_the_gaps(void);
312 263
313/* Failure mode for recovery testing */ 264/* Failure mode for recovery testing */
314
315#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) 265#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
316 266
317#ifndef UBIFS_DBG_PRESERVE_UBI 267#ifndef UBIFS_DBG_PRESERVE_UBI
318
319#define ubi_leb_read dbg_leb_read 268#define ubi_leb_read dbg_leb_read
320#define ubi_leb_write dbg_leb_write 269#define ubi_leb_write dbg_leb_write
321#define ubi_leb_change dbg_leb_change 270#define ubi_leb_change dbg_leb_change
@@ -323,7 +272,6 @@ int dbg_force_in_the_gaps(void);
323#define ubi_leb_unmap dbg_leb_unmap 272#define ubi_leb_unmap dbg_leb_unmap
324#define ubi_is_mapped dbg_is_mapped 273#define ubi_is_mapped dbg_is_mapped
325#define ubi_leb_map dbg_leb_map 274#define ubi_leb_map dbg_leb_map
326
327#endif 275#endif
328 276
329int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, 277int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
@@ -370,33 +318,33 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
370 __func__, __LINE__, current->pid); \ 318 __func__, __LINE__, current->pid); \
371} while (0) 319} while (0)
372 320
373#define dbg_err(fmt, ...) do { \ 321#define dbg_err(fmt, ...) do { \
374 if (0) \ 322 if (0) \
375 ubifs_err(fmt, ##__VA_ARGS__); \ 323 ubifs_err(fmt, ##__VA_ARGS__); \
376} while (0) 324} while (0)
377 325
378#define dbg_msg(fmt, ...) do { \ 326#define ubifs_dbg_msg(fmt, ...) do { \
379 if (0) \ 327 if (0) \
380 printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ 328 pr_debug(fmt "\n", ##__VA_ARGS__); \
381 current->pid, __func__, ##__VA_ARGS__); \
382} while (0) 329} while (0)
383 330
384#define dbg_dump_stack() 331#define dbg_dump_stack()
385#define ubifs_assert_cmt_locked(c) 332#define ubifs_assert_cmt_locked(c)
386 333
387#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 334#define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
388#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 335#define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
389#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 336#define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
390#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 337#define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
391#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 338#define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
392#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 339#define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
393#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 340#define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
394#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 341#define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
395#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 342#define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
396#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 343#define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
397#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 344#define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
398#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 345#define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
399#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 346#define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
347#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
400 348
401#define DBGKEY(key) ((char *)(key)) 349#define DBGKEY(key) ((char *)(key))
402#define DBGKEY1(key) ((char *)(key)) 350#define DBGKEY1(key) ((char *)(key))
@@ -420,7 +368,9 @@ static inline void
420dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; } 368dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; }
421static inline void 369static inline void
422dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; } 370dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; }
423static inline void dbg_dump_budg(struct ubifs_info *c) { return; } 371static inline void
372dbg_dump_budg(struct ubifs_info *c,
373 const struct ubifs_budg_info *bi) { return; }
424static inline void dbg_dump_lprop(const struct ubifs_info *c, 374static inline void dbg_dump_lprop(const struct ubifs_info *c,
425 const struct ubifs_lprops *lp) { return; } 375 const struct ubifs_lprops *lp) { return; }
426static inline void dbg_dump_lprops(struct ubifs_info *c) { return; } 376static inline void dbg_dump_lprops(struct ubifs_info *c) { return; }
@@ -482,8 +432,8 @@ dbg_check_nondata_nodes_order(struct ubifs_info *c,
482 struct list_head *head) { return 0; } 432 struct list_head *head) { return 0; }
483 433
484static inline int dbg_force_in_the_gaps(void) { return 0; } 434static inline int dbg_force_in_the_gaps(void) { return 0; }
485#define dbg_force_in_the_gaps_enabled 0 435#define dbg_force_in_the_gaps_enabled() 0
486#define dbg_failure_mode 0 436#define dbg_failure_mode 0
487 437
488static inline int dbg_debugfs_init(void) { return 0; } 438static inline int dbg_debugfs_init(void) { return 0; }
489static inline void dbg_debugfs_exit(void) { return; } 439static inline void dbg_debugfs_exit(void) { return; }
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index d80810bb4c37..c2b80943560d 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -603,7 +603,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
603 ubifs_release_budget(c, &req); 603 ubifs_release_budget(c, &req);
604 else { 604 else {
605 /* We've deleted something - clean the "no space" flags */ 605 /* We've deleted something - clean the "no space" flags */
606 c->nospace = c->nospace_rp = 0; 606 c->bi.nospace = c->bi.nospace_rp = 0;
607 smp_wmb(); 607 smp_wmb();
608 } 608 }
609 return 0; 609 return 0;
@@ -695,7 +695,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
695 ubifs_release_budget(c, &req); 695 ubifs_release_budget(c, &req);
696 else { 696 else {
697 /* We've deleted something - clean the "no space" flags */ 697 /* We've deleted something - clean the "no space" flags */
698 c->nospace = c->nospace_rp = 0; 698 c->bi.nospace = c->bi.nospace_rp = 0;
699 smp_wmb(); 699 smp_wmb();
700 } 700 }
701 return 0; 701 return 0;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index b286db79c686..5e7fccfc4b29 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -212,7 +212,7 @@ static void release_new_page_budget(struct ubifs_info *c)
212 */ 212 */
213static void release_existing_page_budget(struct ubifs_info *c) 213static void release_existing_page_budget(struct ubifs_info *c)
214{ 214{
215 struct ubifs_budget_req req = { .dd_growth = c->page_budget}; 215 struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget};
216 216
217 ubifs_release_budget(c, &req); 217 ubifs_release_budget(c, &req);
218} 218}
@@ -971,11 +971,11 @@ static int do_writepage(struct page *page, int len)
971 * the page locked, and it locks @ui_mutex. However, write-back does take inode 971 * the page locked, and it locks @ui_mutex. However, write-back does take inode
972 * @i_mutex, which means other VFS operations may be run on this inode at the 972 * @i_mutex, which means other VFS operations may be run on this inode at the
973 * same time. And the problematic one is truncation to smaller size, from where 973 * same time. And the problematic one is truncation to smaller size, from where
974 * we have to call 'truncate_setsize()', which first changes @inode->i_size, then 974 * we have to call 'truncate_setsize()', which first changes @inode->i_size,
975 * drops the truncated pages. And while dropping the pages, it takes the page 975 * then drops the truncated pages. And while dropping the pages, it takes the
976 * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with 976 * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()'
977 * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This 977 * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'.
978 * means that @inode->i_size is changed while @ui_mutex is unlocked. 978 * This means that @inode->i_size is changed while @ui_mutex is unlocked.
979 * 979 *
980 * XXX(truncate): with the new truncate sequence this is not true anymore, 980 * XXX(truncate): with the new truncate sequence this is not true anymore,
981 * and the calls to truncate_setsize can be move around freely. They should 981 * and the calls to truncate_setsize can be move around freely. They should
@@ -1189,7 +1189,7 @@ out_budg:
1189 if (budgeted) 1189 if (budgeted)
1190 ubifs_release_budget(c, &req); 1190 ubifs_release_budget(c, &req);
1191 else { 1191 else {
1192 c->nospace = c->nospace_rp = 0; 1192 c->bi.nospace = c->bi.nospace_rp = 0;
1193 smp_wmb(); 1193 smp_wmb();
1194 } 1194 }
1195 return err; 1195 return err;
@@ -1312,7 +1312,11 @@ int ubifs_fsync(struct file *file, int datasync)
1312 1312
1313 dbg_gen("syncing inode %lu", inode->i_ino); 1313 dbg_gen("syncing inode %lu", inode->i_ino);
1314 1314
1315 if (inode->i_sb->s_flags & MS_RDONLY) 1315 if (c->ro_mount)
1316 /*
1317 * For some really strange reasons VFS does not filter out
1318 * 'fsync()' for R/O mounted file-systems as per 2.6.39.
1319 */
1316 return 0; 1320 return 0;
1317 1321
1318 /* 1322 /*
@@ -1432,10 +1436,11 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
1432} 1436}
1433 1437
1434/* 1438/*
1435 * mmap()d file has taken write protection fault and is being made 1439 * mmap()d file has taken write protection fault and is being made writable.
1436 * writable. UBIFS must ensure page is budgeted for. 1440 * UBIFS must ensure page is budgeted for.
1437 */ 1441 */
1438static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 1442static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
1443 struct vm_fault *vmf)
1439{ 1444{
1440 struct page *page = vmf->page; 1445 struct page *page = vmf->page;
1441 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 1446 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
@@ -1536,7 +1541,6 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1536{ 1541{
1537 int err; 1542 int err;
1538 1543
1539 /* 'generic_file_mmap()' takes care of NOMMU case */
1540 err = generic_file_mmap(file, vma); 1544 err = generic_file_mmap(file, vma);
1541 if (err) 1545 if (err)
1542 return err; 1546 return err;
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 1d54383d1269..2559d174e004 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -252,8 +252,8 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
252 * But if the index takes fewer LEBs than it is reserved for it, 252 * But if the index takes fewer LEBs than it is reserved for it,
253 * this function must avoid picking those reserved LEBs. 253 * this function must avoid picking those reserved LEBs.
254 */ 254 */
255 if (c->min_idx_lebs >= c->lst.idx_lebs) { 255 if (c->bi.min_idx_lebs >= c->lst.idx_lebs) {
256 rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; 256 rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
257 exclude_index = 1; 257 exclude_index = 1;
258 } 258 }
259 spin_unlock(&c->space_lock); 259 spin_unlock(&c->space_lock);
@@ -276,7 +276,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
276 pick_free = 0; 276 pick_free = 0;
277 } else { 277 } else {
278 spin_lock(&c->space_lock); 278 spin_lock(&c->space_lock);
279 exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs); 279 exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs);
280 spin_unlock(&c->space_lock); 280 spin_unlock(&c->space_lock);
281 } 281 }
282 282
@@ -501,8 +501,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
501 501
502 /* Check if there are enough empty LEBs for commit */ 502 /* Check if there are enough empty LEBs for commit */
503 spin_lock(&c->space_lock); 503 spin_lock(&c->space_lock);
504 if (c->min_idx_lebs > c->lst.idx_lebs) 504 if (c->bi.min_idx_lebs > c->lst.idx_lebs)
505 rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; 505 rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
506 else 506 else
507 rsvd_idx_lebs = 0; 507 rsvd_idx_lebs = 0;
508 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - 508 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 151f10882820..ded29f6224c2 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -100,6 +100,10 @@ static int switch_gc_head(struct ubifs_info *c)
100 if (err) 100 if (err)
101 return err; 101 return err;
102 102
103 err = ubifs_wbuf_sync_nolock(wbuf);
104 if (err)
105 return err;
106
103 err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); 107 err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0);
104 if (err) 108 if (err)
105 return err; 109 return err;
@@ -118,7 +122,7 @@ static int switch_gc_head(struct ubifs_info *c)
118 * This function compares data nodes @a and @b. Returns %1 if @a has greater 122 * This function compares data nodes @a and @b. Returns %1 if @a has greater
119 * inode or block number, and %-1 otherwise. 123 * inode or block number, and %-1 otherwise.
120 */ 124 */
121int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) 125static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
122{ 126{
123 ino_t inuma, inumb; 127 ino_t inuma, inumb;
124 struct ubifs_info *c = priv; 128 struct ubifs_info *c = priv;
@@ -161,7 +165,8 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
161 * first and sorted by length in descending order. Directory entry nodes go 165 * first and sorted by length in descending order. Directory entry nodes go
162 * after inode nodes and are sorted in ascending hash valuer order. 166 * after inode nodes and are sorted in ascending hash valuer order.
163 */ 167 */
164int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) 168static int nondata_nodes_cmp(void *priv, struct list_head *a,
169 struct list_head *b)
165{ 170{
166 ino_t inuma, inumb; 171 ino_t inuma, inumb;
167 struct ubifs_info *c = priv; 172 struct ubifs_info *c = priv;
@@ -473,6 +478,37 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
473 ubifs_assert(c->gc_lnum != lnum); 478 ubifs_assert(c->gc_lnum != lnum);
474 ubifs_assert(wbuf->lnum != lnum); 479 ubifs_assert(wbuf->lnum != lnum);
475 480
481 if (lp->free + lp->dirty == c->leb_size) {
482 /* Special case - a free LEB */
483 dbg_gc("LEB %d is free, return it", lp->lnum);
484 ubifs_assert(!(lp->flags & LPROPS_INDEX));
485
486 if (lp->free != c->leb_size) {
487 /*
488 * Write buffers must be sync'd before unmapping
489 * freeable LEBs, because one of them may contain data
490 * which obsoletes something in 'lp->pnum'.
491 */
492 err = gc_sync_wbufs(c);
493 if (err)
494 return err;
495 err = ubifs_change_one_lp(c, lp->lnum, c->leb_size,
496 0, 0, 0, 0);
497 if (err)
498 return err;
499 }
500 err = ubifs_leb_unmap(c, lp->lnum);
501 if (err)
502 return err;
503
504 if (c->gc_lnum == -1) {
505 c->gc_lnum = lnum;
506 return LEB_RETAINED;
507 }
508
509 return LEB_FREED;
510 }
511
476 /* 512 /*
477 * We scan the entire LEB even though we only really need to scan up to 513 * We scan the entire LEB even though we only really need to scan up to
478 * (c->leb_size - lp->free). 514 * (c->leb_size - lp->free).
@@ -682,37 +718,6 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
682 "(min. space %d)", lp.lnum, lp.free, lp.dirty, 718 "(min. space %d)", lp.lnum, lp.free, lp.dirty,
683 lp.free + lp.dirty, min_space); 719 lp.free + lp.dirty, min_space);
684 720
685 if (lp.free + lp.dirty == c->leb_size) {
686 /* An empty LEB was returned */
687 dbg_gc("LEB %d is free, return it", lp.lnum);
688 /*
689 * ubifs_find_dirty_leb() doesn't return freeable index
690 * LEBs.
691 */
692 ubifs_assert(!(lp.flags & LPROPS_INDEX));
693 if (lp.free != c->leb_size) {
694 /*
695 * Write buffers must be sync'd before
696 * unmapping freeable LEBs, because one of them
697 * may contain data which obsoletes something
698 * in 'lp.pnum'.
699 */
700 ret = gc_sync_wbufs(c);
701 if (ret)
702 goto out;
703 ret = ubifs_change_one_lp(c, lp.lnum,
704 c->leb_size, 0, 0, 0,
705 0);
706 if (ret)
707 goto out;
708 }
709 ret = ubifs_leb_unmap(c, lp.lnum);
710 if (ret)
711 goto out;
712 ret = lp.lnum;
713 break;
714 }
715
716 space_before = c->leb_size - wbuf->offs - wbuf->used; 721 space_before = c->leb_size - wbuf->offs - wbuf->used;
717 if (wbuf->lnum == -1) 722 if (wbuf->lnum == -1)
718 space_before = 0; 723 space_before = 0;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index dfd168b7807e..166951e0dcd3 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -393,7 +393,7 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
393 ubifs_assert(wbuf->size % c->min_io_size == 0); 393 ubifs_assert(wbuf->size % c->min_io_size == 0);
394 ubifs_assert(!c->ro_media && !c->ro_mount); 394 ubifs_assert(!c->ro_media && !c->ro_mount);
395 if (c->leb_size - wbuf->offs >= c->max_write_size) 395 if (c->leb_size - wbuf->offs >= c->max_write_size)
396 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); 396 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
397 397
398 if (c->ro_error) 398 if (c->ro_error)
399 return -EROFS; 399 return -EROFS;
@@ -452,8 +452,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
452 * @dtype: data type 452 * @dtype: data type
453 * 453 *
454 * This function targets the write-buffer to logical eraseblock @lnum:@offs. 454 * This function targets the write-buffer to logical eraseblock @lnum:@offs.
455 * The write-buffer is synchronized if it is not empty. Returns zero in case of 455 * The write-buffer has to be empty. Returns zero in case of success and a
456 * success and a negative error code in case of failure. 456 * negative error code in case of failure.
457 */ 457 */
458int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, 458int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
459 int dtype) 459 int dtype)
@@ -465,13 +465,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
465 ubifs_assert(offs >= 0 && offs <= c->leb_size); 465 ubifs_assert(offs >= 0 && offs <= c->leb_size);
466 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); 466 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
467 ubifs_assert(lnum != wbuf->lnum); 467 ubifs_assert(lnum != wbuf->lnum);
468 468 ubifs_assert(wbuf->used == 0);
469 if (wbuf->used > 0) {
470 int err = ubifs_wbuf_sync_nolock(wbuf);
471
472 if (err)
473 return err;
474 }
475 469
476 spin_lock(&wbuf->lock); 470 spin_lock(&wbuf->lock);
477 wbuf->lnum = lnum; 471 wbuf->lnum = lnum;
@@ -573,7 +567,7 @@ out_timers:
573int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) 567int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
574{ 568{
575 struct ubifs_info *c = wbuf->c; 569 struct ubifs_info *c = wbuf->c;
576 int err, written, n, aligned_len = ALIGN(len, 8), offs; 570 int err, written, n, aligned_len = ALIGN(len, 8);
577 571
578 dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, 572 dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len,
579 dbg_ntype(((struct ubifs_ch *)buf)->node_type), 573 dbg_ntype(((struct ubifs_ch *)buf)->node_type),
@@ -588,7 +582,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
588 ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); 582 ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
589 ubifs_assert(!c->ro_media && !c->ro_mount); 583 ubifs_assert(!c->ro_media && !c->ro_mount);
590 if (c->leb_size - wbuf->offs >= c->max_write_size) 584 if (c->leb_size - wbuf->offs >= c->max_write_size)
591 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); 585 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
592 586
593 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { 587 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
594 err = -ENOSPC; 588 err = -ENOSPC;
@@ -636,7 +630,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
636 goto exit; 630 goto exit;
637 } 631 }
638 632
639 offs = wbuf->offs;
640 written = 0; 633 written = 0;
641 634
642 if (wbuf->used) { 635 if (wbuf->used) {
@@ -653,7 +646,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
653 if (err) 646 if (err)
654 goto out; 647 goto out;
655 648
656 offs += wbuf->size; 649 wbuf->offs += wbuf->size;
657 len -= wbuf->avail; 650 len -= wbuf->avail;
658 aligned_len -= wbuf->avail; 651 aligned_len -= wbuf->avail;
659 written += wbuf->avail; 652 written += wbuf->avail;
@@ -672,7 +665,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
672 if (err) 665 if (err)
673 goto out; 666 goto out;
674 667
675 offs += wbuf->size; 668 wbuf->offs += wbuf->size;
676 len -= wbuf->size; 669 len -= wbuf->size;
677 aligned_len -= wbuf->size; 670 aligned_len -= wbuf->size;
678 written += wbuf->size; 671 written += wbuf->size;
@@ -687,12 +680,13 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
687 n = aligned_len >> c->max_write_shift; 680 n = aligned_len >> c->max_write_shift;
688 if (n) { 681 if (n) {
689 n <<= c->max_write_shift; 682 n <<= c->max_write_shift;
690 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); 683 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum,
691 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, 684 wbuf->offs);
692 wbuf->dtype); 685 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written,
686 wbuf->offs, n, wbuf->dtype);
693 if (err) 687 if (err)
694 goto out; 688 goto out;
695 offs += n; 689 wbuf->offs += n;
696 aligned_len -= n; 690 aligned_len -= n;
697 len -= n; 691 len -= n;
698 written += n; 692 written += n;
@@ -707,7 +701,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
707 */ 701 */
708 memcpy(wbuf->buf, buf + written, len); 702 memcpy(wbuf->buf, buf + written, len);
709 703
710 wbuf->offs = offs;
711 if (c->leb_size - wbuf->offs >= c->max_write_size) 704 if (c->leb_size - wbuf->offs >= c->max_write_size)
712 wbuf->size = c->max_write_size; 705 wbuf->size = c->max_write_size;
713 else 706 else
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index aed25e864227..34b1679e6e3a 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -141,14 +141,8 @@ again:
141 * LEB with some empty space. 141 * LEB with some empty space.
142 */ 142 */
143 lnum = ubifs_find_free_space(c, len, &offs, squeeze); 143 lnum = ubifs_find_free_space(c, len, &offs, squeeze);
144 if (lnum >= 0) { 144 if (lnum >= 0)
145 /* Found an LEB, add it to the journal head */
146 err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
147 if (err)
148 goto out_return;
149 /* A new bud was successfully allocated and added to the log */
150 goto out; 145 goto out;
151 }
152 146
153 err = lnum; 147 err = lnum;
154 if (err != -ENOSPC) 148 if (err != -ENOSPC)
@@ -203,12 +197,23 @@ again:
203 return 0; 197 return 0;
204 } 198 }
205 199
206 err = ubifs_add_bud_to_log(c, jhead, lnum, 0);
207 if (err)
208 goto out_return;
209 offs = 0; 200 offs = 0;
210 201
211out: 202out:
203 /*
204 * Make sure we synchronize the write-buffer before we add the new bud
205 * to the log. Otherwise we may have a power cut after the log
206 * reference node for the last bud (@lnum) is written but before the
207 * write-buffer data are written to the next-to-last bud
208 * (@wbuf->lnum). And the effect would be that the recovery would see
209 * that there is corruption in the next-to-last bud.
210 */
211 err = ubifs_wbuf_sync_nolock(wbuf);
212 if (err)
213 goto out_return;
214 err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
215 if (err)
216 goto out_return;
212 err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); 217 err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype);
213 if (err) 218 if (err)
214 goto out_unlock; 219 goto out_unlock;
@@ -380,10 +385,8 @@ out:
380 if (err == -ENOSPC) { 385 if (err == -ENOSPC) {
381 /* This are some budgeting problems, print useful information */ 386 /* This are some budgeting problems, print useful information */
382 down_write(&c->commit_sem); 387 down_write(&c->commit_sem);
383 spin_lock(&c->space_lock);
384 dbg_dump_stack(); 388 dbg_dump_stack();
385 dbg_dump_budg(c); 389 dbg_dump_budg(c, &c->bi);
386 spin_unlock(&c->space_lock);
387 dbg_dump_lprops(c); 390 dbg_dump_lprops(c);
388 cmt_retries = dbg_check_lprops(c); 391 cmt_retries = dbg_check_lprops(c);
389 up_write(&c->commit_sem); 392 up_write(&c->commit_sem);
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 4d0cb1241460..affea9494ae2 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -100,20 +100,6 @@ struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum)
100} 100}
101 101
102/** 102/**
103 * next_log_lnum - switch to the next log LEB.
104 * @c: UBIFS file-system description object
105 * @lnum: current log LEB
106 */
107static inline int next_log_lnum(const struct ubifs_info *c, int lnum)
108{
109 lnum += 1;
110 if (lnum > c->log_last)
111 lnum = UBIFS_LOG_LNUM;
112
113 return lnum;
114}
115
116/**
117 * empty_log_bytes - calculate amount of empty space in the log. 103 * empty_log_bytes - calculate amount of empty space in the log.
118 * @c: UBIFS file-system description object 104 * @c: UBIFS file-system description object
119 */ 105 */
@@ -175,26 +161,6 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
175} 161}
176 162
177/** 163/**
178 * ubifs_create_buds_lists - create journal head buds lists for remount rw.
179 * @c: UBIFS file-system description object
180 */
181void ubifs_create_buds_lists(struct ubifs_info *c)
182{
183 struct rb_node *p;
184
185 spin_lock(&c->buds_lock);
186 p = rb_first(&c->buds);
187 while (p) {
188 struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb);
189 struct ubifs_jhead *jhead = &c->jheads[bud->jhead];
190
191 list_add_tail(&bud->list, &jhead->buds_list);
192 p = rb_next(p);
193 }
194 spin_unlock(&c->buds_lock);
195}
196
197/**
198 * ubifs_add_bud_to_log - add a new bud to the log. 164 * ubifs_add_bud_to_log - add a new bud to the log.
199 * @c: UBIFS file-system description object 165 * @c: UBIFS file-system description object
200 * @jhead: journal head the bud belongs to 166 * @jhead: journal head the bud belongs to
@@ -277,7 +243,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
277 ref->jhead = cpu_to_le32(jhead); 243 ref->jhead = cpu_to_le32(jhead);
278 244
279 if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { 245 if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
280 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); 246 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
281 c->lhead_offs = 0; 247 c->lhead_offs = 0;
282 } 248 }
283 249
@@ -445,7 +411,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
445 411
446 /* Switch to the next log LEB */ 412 /* Switch to the next log LEB */
447 if (c->lhead_offs) { 413 if (c->lhead_offs) {
448 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); 414 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
449 c->lhead_offs = 0; 415 c->lhead_offs = 0;
450 } 416 }
451 417
@@ -466,7 +432,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
466 432
467 c->lhead_offs += len; 433 c->lhead_offs += len;
468 if (c->lhead_offs == c->leb_size) { 434 if (c->lhead_offs == c->leb_size) {
469 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); 435 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
470 c->lhead_offs = 0; 436 c->lhead_offs = 0;
471 } 437 }
472 438
@@ -553,7 +519,7 @@ int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
553 } 519 }
554 mutex_lock(&c->log_mutex); 520 mutex_lock(&c->log_mutex);
555 for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; 521 for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
556 lnum = next_log_lnum(c, lnum)) { 522 lnum = ubifs_next_log_lnum(c, lnum)) {
557 dbg_log("unmap log LEB %d", lnum); 523 dbg_log("unmap log LEB %d", lnum);
558 err = ubifs_leb_unmap(c, lnum); 524 err = ubifs_leb_unmap(c, lnum);
559 if (err) 525 if (err)
@@ -662,7 +628,7 @@ static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs,
662 err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); 628 err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM);
663 if (err) 629 if (err)
664 return err; 630 return err;
665 *lnum = next_log_lnum(c, *lnum); 631 *lnum = ubifs_next_log_lnum(c, *lnum);
666 *offs = 0; 632 *offs = 0;
667 } 633 }
668 memcpy(buf + *offs, node, len); 634 memcpy(buf + *offs, node, len);
@@ -732,7 +698,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
732 ubifs_scan_destroy(sleb); 698 ubifs_scan_destroy(sleb);
733 if (lnum == c->lhead_lnum) 699 if (lnum == c->lhead_lnum)
734 break; 700 break;
735 lnum = next_log_lnum(c, lnum); 701 lnum = ubifs_next_log_lnum(c, lnum);
736 } 702 }
737 if (offs) { 703 if (offs) {
738 int sz = ALIGN(offs, c->min_io_size); 704 int sz = ALIGN(offs, c->min_io_size);
@@ -752,7 +718,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
752 /* Unmap remaining LEBs */ 718 /* Unmap remaining LEBs */
753 lnum = write_lnum; 719 lnum = write_lnum;
754 do { 720 do {
755 lnum = next_log_lnum(c, lnum); 721 lnum = ubifs_next_log_lnum(c, lnum);
756 err = ubifs_leb_unmap(c, lnum); 722 err = ubifs_leb_unmap(c, lnum);
757 if (err) 723 if (err)
758 return err; 724 return err;
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 0ee0847f2421..667884f4a615 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -1007,21 +1007,11 @@ out:
1007} 1007}
1008 1008
1009/** 1009/**
1010 * struct scan_check_data - data provided to scan callback function.
1011 * @lst: LEB properties statistics
1012 * @err: error code
1013 */
1014struct scan_check_data {
1015 struct ubifs_lp_stats lst;
1016 int err;
1017};
1018
1019/**
1020 * scan_check_cb - scan callback. 1010 * scan_check_cb - scan callback.
1021 * @c: the UBIFS file-system description object 1011 * @c: the UBIFS file-system description object
1022 * @lp: LEB properties to scan 1012 * @lp: LEB properties to scan
1023 * @in_tree: whether the LEB properties are in main memory 1013 * @in_tree: whether the LEB properties are in main memory
1024 * @data: information passed to and from the caller of the scan 1014 * @lst: lprops statistics to update
1025 * 1015 *
1026 * This function returns a code that indicates whether the scan should continue 1016 * This function returns a code that indicates whether the scan should continue
1027 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree 1017 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
@@ -1030,11 +1020,10 @@ struct scan_check_data {
1030 */ 1020 */
1031static int scan_check_cb(struct ubifs_info *c, 1021static int scan_check_cb(struct ubifs_info *c,
1032 const struct ubifs_lprops *lp, int in_tree, 1022 const struct ubifs_lprops *lp, int in_tree,
1033 struct scan_check_data *data) 1023 struct ubifs_lp_stats *lst)
1034{ 1024{
1035 struct ubifs_scan_leb *sleb; 1025 struct ubifs_scan_leb *sleb;
1036 struct ubifs_scan_node *snod; 1026 struct ubifs_scan_node *snod;
1037 struct ubifs_lp_stats *lst = &data->lst;
1038 int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret; 1027 int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret;
1039 void *buf = NULL; 1028 void *buf = NULL;
1040 1029
@@ -1044,7 +1033,7 @@ static int scan_check_cb(struct ubifs_info *c,
1044 if (cat != (lp->flags & LPROPS_CAT_MASK)) { 1033 if (cat != (lp->flags & LPROPS_CAT_MASK)) {
1045 ubifs_err("bad LEB category %d expected %d", 1034 ubifs_err("bad LEB category %d expected %d",
1046 (lp->flags & LPROPS_CAT_MASK), cat); 1035 (lp->flags & LPROPS_CAT_MASK), cat);
1047 goto out; 1036 return -EINVAL;
1048 } 1037 }
1049 } 1038 }
1050 1039
@@ -1078,7 +1067,7 @@ static int scan_check_cb(struct ubifs_info *c,
1078 } 1067 }
1079 if (!found) { 1068 if (!found) {
1080 ubifs_err("bad LPT list (category %d)", cat); 1069 ubifs_err("bad LPT list (category %d)", cat);
1081 goto out; 1070 return -EINVAL;
1082 } 1071 }
1083 } 1072 }
1084 } 1073 }
@@ -1090,45 +1079,40 @@ static int scan_check_cb(struct ubifs_info *c,
1090 if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || 1079 if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) ||
1091 lp != heap->arr[lp->hpos]) { 1080 lp != heap->arr[lp->hpos]) {
1092 ubifs_err("bad LPT heap (category %d)", cat); 1081 ubifs_err("bad LPT heap (category %d)", cat);
1093 goto out; 1082 return -EINVAL;
1094 } 1083 }
1095 } 1084 }
1096 1085
1097 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); 1086 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
1098 if (!buf) { 1087 if (!buf)
1099 ubifs_err("cannot allocate memory to scan LEB %d", lnum); 1088 return -ENOMEM;
1100 goto out; 1089
1090 /*
1091 * After an unclean unmount, empty and freeable LEBs
1092 * may contain garbage - do not scan them.
1093 */
1094 if (lp->free == c->leb_size) {
1095 lst->empty_lebs += 1;
1096 lst->total_free += c->leb_size;
1097 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1098 return LPT_SCAN_CONTINUE;
1099 }
1100 if (lp->free + lp->dirty == c->leb_size &&
1101 !(lp->flags & LPROPS_INDEX)) {
1102 lst->total_free += lp->free;
1103 lst->total_dirty += lp->dirty;
1104 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1105 return LPT_SCAN_CONTINUE;
1101 } 1106 }
1102 1107
1103 sleb = ubifs_scan(c, lnum, 0, buf, 0); 1108 sleb = ubifs_scan(c, lnum, 0, buf, 0);
1104 if (IS_ERR(sleb)) { 1109 if (IS_ERR(sleb)) {
1105 /* 1110 ret = PTR_ERR(sleb);
1106 * After an unclean unmount, empty and freeable LEBs 1111 if (ret == -EUCLEAN) {
1107 * may contain garbage. 1112 dbg_dump_lprops(c);
1108 */ 1113 dbg_dump_budg(c, &c->bi);
1109 if (lp->free == c->leb_size) {
1110 ubifs_err("scan errors were in empty LEB "
1111 "- continuing checking");
1112 lst->empty_lebs += 1;
1113 lst->total_free += c->leb_size;
1114 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1115 ret = LPT_SCAN_CONTINUE;
1116 goto exit;
1117 }
1118
1119 if (lp->free + lp->dirty == c->leb_size &&
1120 !(lp->flags & LPROPS_INDEX)) {
1121 ubifs_err("scan errors were in freeable LEB "
1122 "- continuing checking");
1123 lst->total_free += lp->free;
1124 lst->total_dirty += lp->dirty;
1125 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1126 ret = LPT_SCAN_CONTINUE;
1127 goto exit;
1128 } 1114 }
1129 data->err = PTR_ERR(sleb); 1115 goto out;
1130 ret = LPT_SCAN_STOP;
1131 goto exit;
1132 } 1116 }
1133 1117
1134 is_idx = -1; 1118 is_idx = -1;
@@ -1246,10 +1230,8 @@ static int scan_check_cb(struct ubifs_info *c,
1246 } 1230 }
1247 1231
1248 ubifs_scan_destroy(sleb); 1232 ubifs_scan_destroy(sleb);
1249 ret = LPT_SCAN_CONTINUE;
1250exit:
1251 vfree(buf); 1233 vfree(buf);
1252 return ret; 1234 return LPT_SCAN_CONTINUE;
1253 1235
1254out_print: 1236out_print:
1255 ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " 1237 ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, "
@@ -1258,10 +1240,10 @@ out_print:
1258 dbg_dump_leb(c, lnum); 1240 dbg_dump_leb(c, lnum);
1259out_destroy: 1241out_destroy:
1260 ubifs_scan_destroy(sleb); 1242 ubifs_scan_destroy(sleb);
1243 ret = -EINVAL;
1261out: 1244out:
1262 vfree(buf); 1245 vfree(buf);
1263 data->err = -EINVAL; 1246 return ret;
1264 return LPT_SCAN_STOP;
1265} 1247}
1266 1248
1267/** 1249/**
@@ -1278,8 +1260,7 @@ out:
1278int dbg_check_lprops(struct ubifs_info *c) 1260int dbg_check_lprops(struct ubifs_info *c)
1279{ 1261{
1280 int i, err; 1262 int i, err;
1281 struct scan_check_data data; 1263 struct ubifs_lp_stats lst;
1282 struct ubifs_lp_stats *lst = &data.lst;
1283 1264
1284 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) 1265 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1285 return 0; 1266 return 0;
@@ -1294,29 +1275,23 @@ int dbg_check_lprops(struct ubifs_info *c)
1294 return err; 1275 return err;
1295 } 1276 }
1296 1277
1297 memset(lst, 0, sizeof(struct ubifs_lp_stats)); 1278 memset(&lst, 0, sizeof(struct ubifs_lp_stats));
1298
1299 data.err = 0;
1300 err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, 1279 err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1,
1301 (ubifs_lpt_scan_callback)scan_check_cb, 1280 (ubifs_lpt_scan_callback)scan_check_cb,
1302 &data); 1281 &lst);
1303 if (err && err != -ENOSPC) 1282 if (err && err != -ENOSPC)
1304 goto out; 1283 goto out;
1305 if (data.err) {
1306 err = data.err;
1307 goto out;
1308 }
1309 1284
1310 if (lst->empty_lebs != c->lst.empty_lebs || 1285 if (lst.empty_lebs != c->lst.empty_lebs ||
1311 lst->idx_lebs != c->lst.idx_lebs || 1286 lst.idx_lebs != c->lst.idx_lebs ||
1312 lst->total_free != c->lst.total_free || 1287 lst.total_free != c->lst.total_free ||
1313 lst->total_dirty != c->lst.total_dirty || 1288 lst.total_dirty != c->lst.total_dirty ||
1314 lst->total_used != c->lst.total_used) { 1289 lst.total_used != c->lst.total_used) {
1315 ubifs_err("bad overall accounting"); 1290 ubifs_err("bad overall accounting");
1316 ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " 1291 ubifs_err("calculated: empty_lebs %d, idx_lebs %d, "
1317 "total_free %lld, total_dirty %lld, total_used %lld", 1292 "total_free %lld, total_dirty %lld, total_used %lld",
1318 lst->empty_lebs, lst->idx_lebs, lst->total_free, 1293 lst.empty_lebs, lst.idx_lebs, lst.total_free,
1319 lst->total_dirty, lst->total_used); 1294 lst.total_dirty, lst.total_used);
1320 ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " 1295 ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, "
1321 "total_free %lld, total_dirty %lld, total_used %lld", 1296 "total_free %lld, total_dirty %lld, total_used %lld",
1322 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, 1297 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
@@ -1325,11 +1300,11 @@ int dbg_check_lprops(struct ubifs_info *c)
1325 goto out; 1300 goto out;
1326 } 1301 }
1327 1302
1328 if (lst->total_dead != c->lst.total_dead || 1303 if (lst.total_dead != c->lst.total_dead ||
1329 lst->total_dark != c->lst.total_dark) { 1304 lst.total_dark != c->lst.total_dark) {
1330 ubifs_err("bad dead/dark space accounting"); 1305 ubifs_err("bad dead/dark space accounting");
1331 ubifs_err("calculated: total_dead %lld, total_dark %lld", 1306 ubifs_err("calculated: total_dead %lld, total_dark %lld",
1332 lst->total_dead, lst->total_dark); 1307 lst.total_dead, lst.total_dark);
1333 ubifs_err("read from lprops: total_dead %lld, total_dark %lld", 1308 ubifs_err("read from lprops: total_dead %lld, total_dark %lld",
1334 c->lst.total_dead, c->lst.total_dark); 1309 c->lst.total_dead, c->lst.total_dark);
1335 err = -EINVAL; 1310 err = -EINVAL;
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 0c9c69bd983a..dfcb5748a7dc 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -29,6 +29,12 @@
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include "ubifs.h" 30#include "ubifs.h"
31 31
32#ifdef CONFIG_UBIFS_FS_DEBUG
33static int dbg_populate_lsave(struct ubifs_info *c);
34#else
35#define dbg_populate_lsave(c) 0
36#endif
37
32/** 38/**
33 * first_dirty_cnode - find first dirty cnode. 39 * first_dirty_cnode - find first dirty cnode.
34 * @c: UBIFS file-system description object 40 * @c: UBIFS file-system description object
@@ -586,7 +592,7 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c,
586 if (nnode->nbranch[iip].lnum) 592 if (nnode->nbranch[iip].lnum)
587 break; 593 break;
588 } 594 }
589 } while (iip >= UBIFS_LPT_FANOUT); 595 } while (iip >= UBIFS_LPT_FANOUT);
590 596
591 /* Go right */ 597 /* Go right */
592 nnode = ubifs_get_nnode(c, nnode, iip); 598 nnode = ubifs_get_nnode(c, nnode, iip);
@@ -815,6 +821,10 @@ static void populate_lsave(struct ubifs_info *c)
815 c->lpt_drty_flgs |= LSAVE_DIRTY; 821 c->lpt_drty_flgs |= LSAVE_DIRTY;
816 ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); 822 ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz);
817 } 823 }
824
825 if (dbg_populate_lsave(c))
826 return;
827
818 list_for_each_entry(lprops, &c->empty_list, list) { 828 list_for_each_entry(lprops, &c->empty_list, list) {
819 c->lsave[cnt++] = lprops->lnum; 829 c->lsave[cnt++] = lprops->lnum;
820 if (cnt >= c->lsave_cnt) 830 if (cnt >= c->lsave_cnt)
@@ -1994,4 +2004,47 @@ void dbg_dump_lpt_lebs(const struct ubifs_info *c)
1994 current->pid); 2004 current->pid);
1995} 2005}
1996 2006
2007/**
2008 * dbg_populate_lsave - debugging version of 'populate_lsave()'
2009 * @c: UBIFS file-system description object
2010 *
2011 * This is a debugging version for 'populate_lsave()' which populates lsave
2012 * with random LEBs instead of useful LEBs, which is good for test coverage.
2013 * Returns zero if lsave has not been populated (this debugging feature is
2014 * disabled) an non-zero if lsave has been populated.
2015 */
2016static int dbg_populate_lsave(struct ubifs_info *c)
2017{
2018 struct ubifs_lprops *lprops;
2019 struct ubifs_lpt_heap *heap;
2020 int i;
2021
2022 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
2023 return 0;
2024 if (random32() & 3)
2025 return 0;
2026
2027 for (i = 0; i < c->lsave_cnt; i++)
2028 c->lsave[i] = c->main_first;
2029
2030 list_for_each_entry(lprops, &c->empty_list, list)
2031 c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
2032 list_for_each_entry(lprops, &c->freeable_list, list)
2033 c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
2034 list_for_each_entry(lprops, &c->frdi_idx_list, list)
2035 c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
2036
2037 heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
2038 for (i = 0; i < heap->cnt; i++)
2039 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
2040 heap = &c->lpt_heap[LPROPS_DIRTY - 1];
2041 for (i = 0; i < heap->cnt; i++)
2042 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
2043 heap = &c->lpt_heap[LPROPS_FREE - 1];
2044 for (i = 0; i < heap->cnt; i++)
2045 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
2046
2047 return 1;
2048}
2049
1997#endif /* CONFIG_UBIFS_FS_DEBUG */ 2050#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index 21f47afdacff..278c2382e8c2 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -148,7 +148,7 @@ static int validate_master(const struct ubifs_info *c)
148 } 148 }
149 149
150 main_sz = (long long)c->main_lebs * c->leb_size; 150 main_sz = (long long)c->main_lebs * c->leb_size;
151 if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { 151 if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) {
152 err = 9; 152 err = 9;
153 goto out; 153 goto out;
154 } 154 }
@@ -218,7 +218,7 @@ static int validate_master(const struct ubifs_info *c)
218 } 218 }
219 219
220 if (c->lst.total_dead + c->lst.total_dark + 220 if (c->lst.total_dead + c->lst.total_dark +
221 c->lst.total_used + c->old_idx_sz > main_sz) { 221 c->lst.total_used + c->bi.old_idx_sz > main_sz) {
222 err = 21; 222 err = 21;
223 goto out; 223 goto out;
224 } 224 }
@@ -286,7 +286,7 @@ int ubifs_read_master(struct ubifs_info *c)
286 c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); 286 c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum);
287 c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); 287 c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum);
288 c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); 288 c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs);
289 c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); 289 c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size);
290 c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); 290 c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum);
291 c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); 291 c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs);
292 c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); 292 c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum);
@@ -305,7 +305,7 @@ int ubifs_read_master(struct ubifs_info *c)
305 c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); 305 c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead);
306 c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); 306 c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark);
307 307
308 c->calc_idx_sz = c->old_idx_sz; 308 c->calc_idx_sz = c->bi.old_idx_sz;
309 309
310 if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) 310 if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))
311 c->no_orphs = 1; 311 c->no_orphs = 1;
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index c3de04dc952a..0b5296a9a4c5 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -340,4 +340,21 @@ static inline void ubifs_release_lprops(struct ubifs_info *c)
340 mutex_unlock(&c->lp_mutex); 340 mutex_unlock(&c->lp_mutex);
341} 341}
342 342
343/**
344 * ubifs_next_log_lnum - switch to the next log LEB.
345 * @c: UBIFS file-system description object
346 * @lnum: current log LEB
347 *
348 * This helper function returns the log LEB number which goes next after LEB
349 * 'lnum'.
350 */
351static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum)
352{
353 lnum += 1;
354 if (lnum > c->log_last)
355 lnum = UBIFS_LOG_LNUM;
356
357 return lnum;
358}
359
343#endif /* __UBIFS_MISC_H__ */ 360#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 09df318e368f..bd644bf587a8 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -673,7 +673,8 @@ static int kill_orphans(struct ubifs_info *c)
673 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); 673 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
674 if (IS_ERR(sleb)) { 674 if (IS_ERR(sleb)) {
675 if (PTR_ERR(sleb) == -EUCLEAN) 675 if (PTR_ERR(sleb) == -EUCLEAN)
676 sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); 676 sleb = ubifs_recover_leb(c, lnum, 0,
677 c->sbuf, 0);
677 if (IS_ERR(sleb)) { 678 if (IS_ERR(sleb)) {
678 err = PTR_ERR(sleb); 679 err = PTR_ERR(sleb);
679 break; 680 break;
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 3dbad6fbd1eb..731d9e2e7b50 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -564,13 +564,16 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
564} 564}
565 565
566/** 566/**
567 * drop_incomplete_group - drop nodes from an incomplete group. 567 * drop_last_node - drop the last node or group of nodes.
568 * @sleb: scanned LEB information 568 * @sleb: scanned LEB information
569 * @offs: offset of dropped nodes is returned here 569 * @offs: offset of dropped nodes is returned here
570 * @grouped: non-zero if whole group of nodes have to be dropped
570 * 571 *
571 * This function returns %1 if nodes are dropped and %0 otherwise. 572 * This is a helper function for 'ubifs_recover_leb()' which drops the last
573 * node of the scanned LEB or the last group of nodes if @grouped is not zero.
574 * This function returns %1 if a node was dropped and %0 otherwise.
572 */ 575 */
573static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) 576static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped)
574{ 577{
575 int dropped = 0; 578 int dropped = 0;
576 579
@@ -589,6 +592,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
589 kfree(snod); 592 kfree(snod);
590 sleb->nodes_cnt -= 1; 593 sleb->nodes_cnt -= 1;
591 dropped = 1; 594 dropped = 1;
595 if (!grouped)
596 break;
592 } 597 }
593 return dropped; 598 return dropped;
594} 599}
@@ -609,8 +614,7 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
609struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 614struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
610 int offs, void *sbuf, int grouped) 615 int offs, void *sbuf, int grouped)
611{ 616{
612 int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; 617 int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
613 int empty_chkd = 0, start = offs;
614 struct ubifs_scan_leb *sleb; 618 struct ubifs_scan_leb *sleb;
615 void *buf = sbuf + offs; 619 void *buf = sbuf + offs;
616 620
@@ -620,12 +624,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
620 if (IS_ERR(sleb)) 624 if (IS_ERR(sleb))
621 return sleb; 625 return sleb;
622 626
623 if (sleb->ecc) 627 ubifs_assert(len >= 8);
624 need_clean = 1;
625
626 while (len >= 8) { 628 while (len >= 8) {
627 int ret;
628
629 dbg_scan("look at LEB %d:%d (%d bytes left)", 629 dbg_scan("look at LEB %d:%d (%d bytes left)",
630 lnum, offs, len); 630 lnum, offs, len);
631 631
@@ -635,8 +635,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
635 * Scan quietly until there is an error from which we cannot 635 * Scan quietly until there is an error from which we cannot
636 * recover 636 * recover
637 */ 637 */
638 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); 638 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
639
640 if (ret == SCANNED_A_NODE) { 639 if (ret == SCANNED_A_NODE) {
641 /* A valid node, and not a padding node */ 640 /* A valid node, and not a padding node */
642 struct ubifs_ch *ch = buf; 641 struct ubifs_ch *ch = buf;
@@ -649,70 +648,32 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
649 offs += node_len; 648 offs += node_len;
650 buf += node_len; 649 buf += node_len;
651 len -= node_len; 650 len -= node_len;
652 continue; 651 } else if (ret > 0) {
653 }
654
655 if (ret > 0) {
656 /* Padding bytes or a valid padding node */ 652 /* Padding bytes or a valid padding node */
657 offs += ret; 653 offs += ret;
658 buf += ret; 654 buf += ret;
659 len -= ret; 655 len -= ret;
660 continue; 656 } else if (ret == SCANNED_EMPTY_SPACE ||
661 } 657 ret == SCANNED_GARBAGE ||
662 658 ret == SCANNED_A_BAD_PAD_NODE ||
663 if (ret == SCANNED_EMPTY_SPACE) { 659 ret == SCANNED_A_CORRUPT_NODE) {
664 if (!is_empty(buf, len)) { 660 dbg_rcvry("found corruption - %d", ret);
665 if (!is_last_write(c, buf, offs))
666 break;
667 clean_buf(c, &buf, lnum, &offs, &len);
668 need_clean = 1;
669 }
670 empty_chkd = 1;
671 break; 661 break;
672 } 662 } else {
673 663 dbg_err("unexpected return value %d", ret);
674 if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
675 if (is_last_write(c, buf, offs)) {
676 clean_buf(c, &buf, lnum, &offs, &len);
677 need_clean = 1;
678 empty_chkd = 1;
679 break;
680 }
681
682 if (ret == SCANNED_A_CORRUPT_NODE)
683 if (no_more_nodes(c, buf, len, lnum, offs)) {
684 clean_buf(c, &buf, lnum, &offs, &len);
685 need_clean = 1;
686 empty_chkd = 1;
687 break;
688 }
689
690 if (quiet) {
691 /* Redo the last scan but noisily */
692 quiet = 0;
693 continue;
694 }
695
696 switch (ret) {
697 case SCANNED_GARBAGE:
698 dbg_err("garbage");
699 goto corrupted;
700 case SCANNED_A_CORRUPT_NODE:
701 case SCANNED_A_BAD_PAD_NODE:
702 dbg_err("bad node");
703 goto corrupted;
704 default:
705 dbg_err("unknown");
706 err = -EINVAL; 664 err = -EINVAL;
707 goto error; 665 goto error;
708 } 666 }
709 } 667 }
710 668
711 if (!empty_chkd && !is_empty(buf, len)) { 669 if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
712 if (is_last_write(c, buf, offs)) { 670 if (!is_last_write(c, buf, offs))
713 clean_buf(c, &buf, lnum, &offs, &len); 671 goto corrupted_rescan;
714 need_clean = 1; 672 } else if (ret == SCANNED_A_CORRUPT_NODE) {
715 } else { 673 if (!no_more_nodes(c, buf, len, lnum, offs))
674 goto corrupted_rescan;
675 } else if (!is_empty(buf, len)) {
676 if (!is_last_write(c, buf, offs)) {
716 int corruption = first_non_ff(buf, len); 677 int corruption = first_non_ff(buf, len);
717 678
718 /* 679 /*
@@ -728,29 +689,82 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
728 } 689 }
729 } 690 }
730 691
731 /* Drop nodes from incomplete group */ 692 min_io_unit = round_down(offs, c->min_io_size);
732 if (grouped && drop_incomplete_group(sleb, &offs)) { 693 if (grouped)
733 buf = sbuf + offs; 694 /*
734 len = c->leb_size - offs; 695 * If nodes are grouped, always drop the incomplete group at
735 clean_buf(c, &buf, lnum, &offs, &len); 696 * the end.
736 need_clean = 1; 697 */
737 } 698 drop_last_node(sleb, &offs, 1);
738 699
739 if (offs % c->min_io_size) { 700 /*
740 clean_buf(c, &buf, lnum, &offs, &len); 701 * While we are in the middle of the same min. I/O unit keep dropping
741 need_clean = 1; 702 * nodes. So basically, what we want is to make sure that the last min.
742 } 703 * I/O unit where we saw the corruption is dropped completely with all
704 * the uncorrupted node which may possibly sit there.
705 *
706 * In other words, let's name the min. I/O unit where the corruption
707 * starts B, and the previous min. I/O unit A. The below code tries to
708 * deal with a situation when half of B contains valid nodes or the end
709 * of a valid node, and the second half of B contains corrupted data or
710 * garbage. This means that UBIFS had been writing to B just before the
711 * power cut happened. I do not know how realistic is this scenario
712 * that half of the min. I/O unit had been written successfully and the
713 * other half not, but this is possible in our 'failure mode emulation'
714 * infrastructure at least.
715 *
716 * So what is the problem, why we need to drop those nodes? Whey can't
717 * we just clean-up the second half of B by putting a padding node
718 * there? We can, and this works fine with one exception which was
719 * reproduced with power cut emulation testing and happens extremely
720 * rarely. The description follows, but it is worth noting that that is
721 * only about the GC head, so we could do this trick only if the bud
722 * belongs to the GC head, but it does not seem to be worth an
723 * additional "if" statement.
724 *
725 * So, imagine the file-system is full, we run GC which is moving valid
726 * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head
727 * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X
728 * and will try to continue. Imagine that LEB X is currently the
729 * dirtiest LEB, and the amount of used space in LEB Y is exactly the
730 * same as amount of free space in LEB X.
731 *
732 * And a power cut happens when nodes are moved from LEB X to LEB Y. We
733 * are here trying to recover LEB Y which is the GC head LEB. We find
734 * the min. I/O unit B as described above. Then we clean-up LEB Y by
735 * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function
736 * fails, because it cannot find a dirty LEB which could be GC'd into
737 * LEB Y! Even LEB X does not match because the amount of valid nodes
738 * there does not fit the free space in LEB Y any more! And this is
739 * because of the padding node which we added to LEB Y. The
740 * user-visible effect of this which I once observed and analysed is
741 * that we cannot mount the file-system with -ENOSPC error.
742 *
743 * So obviously, to make sure that situation does not happen we should
744 * free min. I/O unit B in LEB Y completely and the last used min. I/O
745 * unit in LEB Y should be A. This is basically what the below code
746 * tries to do.
747 */
748 while (min_io_unit == round_down(offs, c->min_io_size) &&
749 min_io_unit != offs &&
750 drop_last_node(sleb, &offs, grouped));
751
752 buf = sbuf + offs;
753 len = c->leb_size - offs;
743 754
755 clean_buf(c, &buf, lnum, &offs, &len);
744 ubifs_end_scan(c, sleb, lnum, offs); 756 ubifs_end_scan(c, sleb, lnum, offs);
745 757
746 if (need_clean) { 758 err = fix_unclean_leb(c, sleb, start);
747 err = fix_unclean_leb(c, sleb, start); 759 if (err)
748 if (err) 760 goto error;
749 goto error;
750 }
751 761
752 return sleb; 762 return sleb;
753 763
764corrupted_rescan:
765 /* Re-scan the corrupted data with verbose messages */
766 dbg_err("corruptio %d", ret);
767 ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
754corrupted: 768corrupted:
755 ubifs_scanned_corruption(c, lnum, offs, buf); 769 ubifs_scanned_corruption(c, lnum, offs, buf);
756 err = -EUCLEAN; 770 err = -EUCLEAN;
@@ -1070,6 +1084,53 @@ int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
1070} 1084}
1071 1085
1072/** 1086/**
1087 * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit.
1088 * @c: UBIFS file-system description object
1089 *
1090 * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty
1091 * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns
1092 * zero in case of success and a negative error code in case of failure.
1093 */
1094static int grab_empty_leb(struct ubifs_info *c)
1095{
1096 int lnum, err;
1097
1098 /*
1099 * Note, it is very important to first search for an empty LEB and then
1100 * run the commit, not vice-versa. The reason is that there might be
1101 * only one empty LEB at the moment, the one which has been the
1102 * @c->gc_lnum just before the power cut happened. During the regular
1103 * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
1104 * one but GC can grab it. But at this moment this single empty LEB is
1105 * not marked as taken, so if we run commit - what happens? Right, the
1106 * commit will grab it and write the index there. Remember that the
1107 * index always expands as long as there is free space, and it only
1108 * starts consolidating when we run out of space.
1109 *
1110 * IOW, if we run commit now, we might not be able to find a free LEB
1111 * after this.
1112 */
1113 lnum = ubifs_find_free_leb_for_idx(c);
1114 if (lnum < 0) {
1115 dbg_err("could not find an empty LEB");
1116 dbg_dump_lprops(c);
1117 dbg_dump_budg(c, &c->bi);
1118 return lnum;
1119 }
1120
1121 /* Reset the index flag */
1122 err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1123 LPROPS_INDEX, 0);
1124 if (err)
1125 return err;
1126
1127 c->gc_lnum = lnum;
1128 dbg_rcvry("found empty LEB %d, run commit", lnum);
1129
1130 return ubifs_run_commit(c);
1131}
1132
1133/**
1073 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. 1134 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
1074 * @c: UBIFS file-system description object 1135 * @c: UBIFS file-system description object
1075 * 1136 *
@@ -1091,71 +1152,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1091{ 1152{
1092 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; 1153 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
1093 struct ubifs_lprops lp; 1154 struct ubifs_lprops lp;
1094 int lnum, err; 1155 int err;
1156
1157 dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
1095 1158
1096 c->gc_lnum = -1; 1159 c->gc_lnum = -1;
1097 if (wbuf->lnum == -1) { 1160 if (wbuf->lnum == -1 || wbuf->offs == c->leb_size)
1098 dbg_rcvry("no GC head LEB"); 1161 return grab_empty_leb(c);
1099 goto find_free; 1162
1100 }
1101 /*
1102 * See whether the used space in the dirtiest LEB fits in the GC head
1103 * LEB.
1104 */
1105 if (wbuf->offs == c->leb_size) {
1106 dbg_rcvry("no room in GC head LEB");
1107 goto find_free;
1108 }
1109 err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); 1163 err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
1110 if (err) { 1164 if (err) {
1111 /* 1165 if (err != -ENOSPC)
1112 * There are no dirty or empty LEBs subject to here being
1113 * enough for the index. Try to use
1114 * 'ubifs_find_free_leb_for_idx()', which will return any empty
1115 * LEBs (ignoring index requirements). If the index then
1116 * doesn't have enough LEBs the recovery commit will fail -
1117 * which is the same result anyway i.e. recovery fails. So
1118 * there is no problem ignoring index requirements and just
1119 * grabbing a free LEB since we have already established there
1120 * is not a dirty LEB we could have used instead.
1121 */
1122 if (err == -ENOSPC) {
1123 dbg_rcvry("could not find a dirty LEB");
1124 goto find_free;
1125 }
1126 return err;
1127 }
1128 ubifs_assert(!(lp.flags & LPROPS_INDEX));
1129 lnum = lp.lnum;
1130 if (lp.free + lp.dirty == c->leb_size) {
1131 /* An empty LEB was returned */
1132 if (lp.free != c->leb_size) {
1133 err = ubifs_change_one_lp(c, lnum, c->leb_size,
1134 0, 0, 0, 0);
1135 if (err)
1136 return err;
1137 }
1138 err = ubifs_leb_unmap(c, lnum);
1139 if (err)
1140 return err; 1166 return err;
1141 c->gc_lnum = lnum; 1167
1142 dbg_rcvry("allocated LEB %d for GC", lnum); 1168 dbg_rcvry("could not find a dirty LEB");
1143 /* Run the commit */ 1169 return grab_empty_leb(c);
1144 dbg_rcvry("committing");
1145 return ubifs_run_commit(c);
1146 }
1147 /*
1148 * There was no empty LEB so the used space in the dirtiest LEB must fit
1149 * in the GC head LEB.
1150 */
1151 if (lp.free + lp.dirty < wbuf->offs) {
1152 dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
1153 lnum, wbuf->lnum, wbuf->offs);
1154 err = ubifs_return_leb(c, lnum);
1155 if (err)
1156 return err;
1157 goto find_free;
1158 } 1170 }
1171
1172 ubifs_assert(!(lp.flags & LPROPS_INDEX));
1173 ubifs_assert(lp.free + lp.dirty >= wbuf->offs);
1174
1159 /* 1175 /*
1160 * We run the commit before garbage collection otherwise subsequent 1176 * We run the commit before garbage collection otherwise subsequent
1161 * mounts will see the GC and orphan deletion in a different order. 1177 * mounts will see the GC and orphan deletion in a different order.
@@ -1164,11 +1180,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1164 err = ubifs_run_commit(c); 1180 err = ubifs_run_commit(c);
1165 if (err) 1181 if (err)
1166 return err; 1182 return err;
1167 /* 1183
1168 * The data in the dirtiest LEB fits in the GC head LEB, so do the GC 1184 dbg_rcvry("GC'ing LEB %d", lp.lnum);
1169 * - use locking to keep 'ubifs_assert()' happy.
1170 */
1171 dbg_rcvry("GC'ing LEB %d", lnum);
1172 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 1185 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
1173 err = ubifs_garbage_collect_leb(c, &lp); 1186 err = ubifs_garbage_collect_leb(c, &lp);
1174 if (err >= 0) { 1187 if (err >= 0) {
@@ -1184,37 +1197,17 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1184 err = -EINVAL; 1197 err = -EINVAL;
1185 return err; 1198 return err;
1186 } 1199 }
1187 if (err != LEB_RETAINED) { 1200
1188 dbg_err("GC returned %d", err); 1201 ubifs_assert(err == LEB_RETAINED);
1202 if (err != LEB_RETAINED)
1189 return -EINVAL; 1203 return -EINVAL;
1190 } 1204
1191 err = ubifs_leb_unmap(c, c->gc_lnum); 1205 err = ubifs_leb_unmap(c, c->gc_lnum);
1192 if (err) 1206 if (err)
1193 return err; 1207 return err;
1194 dbg_rcvry("allocated LEB %d for GC", lnum);
1195 return 0;
1196 1208
1197find_free: 1209 dbg_rcvry("allocated LEB %d for GC", lp.lnum);
1198 /* 1210 return 0;
1199 * There is no GC head LEB or the free space in the GC head LEB is too
1200 * small, or there are not dirty LEBs. Allocate gc_lnum by calling
1201 * 'ubifs_find_free_leb_for_idx()' so GC is not run.
1202 */
1203 lnum = ubifs_find_free_leb_for_idx(c);
1204 if (lnum < 0) {
1205 dbg_err("could not find an empty LEB");
1206 return lnum;
1207 }
1208 /* And reset the index flag */
1209 err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1210 LPROPS_INDEX, 0);
1211 if (err)
1212 return err;
1213 c->gc_lnum = lnum;
1214 dbg_rcvry("allocated LEB %d for GC", lnum);
1215 /* Run the commit */
1216 dbg_rcvry("committing");
1217 return ubifs_run_commit(c);
1218} 1211}
1219 1212
1220/** 1213/**
@@ -1456,7 +1449,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1456 err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); 1449 err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
1457 if (err) 1450 if (err)
1458 goto out; 1451 goto out;
1459 dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", 1452 dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
1460 (unsigned long)e->inum, lnum, offs, i_size, e->d_size); 1453 (unsigned long)e->inum, lnum, offs, i_size, e->d_size);
1461 return 0; 1454 return 0;
1462 1455
@@ -1505,20 +1498,27 @@ int ubifs_recover_size(struct ubifs_info *c)
1505 e->i_size = le64_to_cpu(ino->size); 1498 e->i_size = le64_to_cpu(ino->size);
1506 } 1499 }
1507 } 1500 }
1501
1508 if (e->exists && e->i_size < e->d_size) { 1502 if (e->exists && e->i_size < e->d_size) {
1509 if (!e->inode && c->ro_mount) { 1503 if (c->ro_mount) {
1510 /* Fix the inode size and pin it in memory */ 1504 /* Fix the inode size and pin it in memory */
1511 struct inode *inode; 1505 struct inode *inode;
1506 struct ubifs_inode *ui;
1507
1508 ubifs_assert(!e->inode);
1512 1509
1513 inode = ubifs_iget(c->vfs_sb, e->inum); 1510 inode = ubifs_iget(c->vfs_sb, e->inum);
1514 if (IS_ERR(inode)) 1511 if (IS_ERR(inode))
1515 return PTR_ERR(inode); 1512 return PTR_ERR(inode);
1513
1514 ui = ubifs_inode(inode);
1516 if (inode->i_size < e->d_size) { 1515 if (inode->i_size < e->d_size) {
1517 dbg_rcvry("ino %lu size %lld -> %lld", 1516 dbg_rcvry("ino %lu size %lld -> %lld",
1518 (unsigned long)e->inum, 1517 (unsigned long)e->inum,
1519 e->d_size, inode->i_size); 1518 inode->i_size, e->d_size);
1520 inode->i_size = e->d_size; 1519 inode->i_size = e->d_size;
1521 ubifs_inode(inode)->ui_size = e->d_size; 1520 ui->ui_size = e->d_size;
1521 ui->synced_i_size = e->d_size;
1522 e->inode = inode; 1522 e->inode = inode;
1523 this = rb_next(this); 1523 this = rb_next(this);
1524 continue; 1524 continue;
@@ -1533,9 +1533,11 @@ int ubifs_recover_size(struct ubifs_info *c)
1533 iput(e->inode); 1533 iput(e->inode);
1534 } 1534 }
1535 } 1535 }
1536
1536 this = rb_next(this); 1537 this = rb_next(this);
1537 rb_erase(&e->rb, &c->size_tree); 1538 rb_erase(&e->rb, &c->size_tree);
1538 kfree(e); 1539 kfree(e);
1539 } 1540 }
1541
1540 return 0; 1542 return 0;
1541} 1543}
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index eed0fcff8d73..6617280d1679 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -33,43 +33,32 @@
33 */ 33 */
34 34
35#include "ubifs.h" 35#include "ubifs.h"
36 36#include <linux/list_sort.h>
37/*
38 * Replay flags.
39 *
40 * REPLAY_DELETION: node was deleted
41 * REPLAY_REF: node is a reference node
42 */
43enum {
44 REPLAY_DELETION = 1,
45 REPLAY_REF = 2,
46};
47 37
48/** 38/**
49 * struct replay_entry - replay tree entry. 39 * struct replay_entry - replay list entry.
50 * @lnum: logical eraseblock number of the node 40 * @lnum: logical eraseblock number of the node
51 * @offs: node offset 41 * @offs: node offset
52 * @len: node length 42 * @len: node length
43 * @deletion: non-zero if this entry corresponds to a node deletion
53 * @sqnum: node sequence number 44 * @sqnum: node sequence number
54 * @flags: replay flags 45 * @list: links the replay list
55 * @rb: links the replay tree
56 * @key: node key 46 * @key: node key
57 * @nm: directory entry name 47 * @nm: directory entry name
58 * @old_size: truncation old size 48 * @old_size: truncation old size
59 * @new_size: truncation new size 49 * @new_size: truncation new size
60 * @free: amount of free space in a bud
61 * @dirty: amount of dirty space in a bud from padding and deletion nodes
62 * 50 *
63 * UBIFS journal replay must compare node sequence numbers, which means it must 51 * The replay process first scans all buds and builds the replay list, then
64 * build a tree of node information to insert into the TNC. 52 * sorts the replay list in nodes sequence number order, and then inserts all
53 * the replay entries to the TNC.
65 */ 54 */
66struct replay_entry { 55struct replay_entry {
67 int lnum; 56 int lnum;
68 int offs; 57 int offs;
69 int len; 58 int len;
59 unsigned int deletion:1;
70 unsigned long long sqnum; 60 unsigned long long sqnum;
71 int flags; 61 struct list_head list;
72 struct rb_node rb;
73 union ubifs_key key; 62 union ubifs_key key;
74 union { 63 union {
75 struct qstr nm; 64 struct qstr nm;
@@ -77,10 +66,6 @@ struct replay_entry {
77 loff_t old_size; 66 loff_t old_size;
78 loff_t new_size; 67 loff_t new_size;
79 }; 68 };
80 struct {
81 int free;
82 int dirty;
83 };
84 }; 69 };
85}; 70};
86 71
@@ -88,57 +73,64 @@ struct replay_entry {
88 * struct bud_entry - entry in the list of buds to replay. 73 * struct bud_entry - entry in the list of buds to replay.
89 * @list: next bud in the list 74 * @list: next bud in the list
90 * @bud: bud description object 75 * @bud: bud description object
91 * @free: free bytes in the bud
92 * @sqnum: reference node sequence number 76 * @sqnum: reference node sequence number
77 * @free: free bytes in the bud
78 * @dirty: dirty bytes in the bud
93 */ 79 */
94struct bud_entry { 80struct bud_entry {
95 struct list_head list; 81 struct list_head list;
96 struct ubifs_bud *bud; 82 struct ubifs_bud *bud;
97 int free;
98 unsigned long long sqnum; 83 unsigned long long sqnum;
84 int free;
85 int dirty;
99}; 86};
100 87
101/** 88/**
102 * set_bud_lprops - set free and dirty space used by a bud. 89 * set_bud_lprops - set free and dirty space used by a bud.
103 * @c: UBIFS file-system description object 90 * @c: UBIFS file-system description object
104 * @r: replay entry of bud 91 * @b: bud entry which describes the bud
92 *
93 * This function makes sure the LEB properties of bud @b are set correctly
94 * after the replay. Returns zero in case of success and a negative error code
95 * in case of failure.
105 */ 96 */
106static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) 97static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
107{ 98{
108 const struct ubifs_lprops *lp; 99 const struct ubifs_lprops *lp;
109 int err = 0, dirty; 100 int err = 0, dirty;
110 101
111 ubifs_get_lprops(c); 102 ubifs_get_lprops(c);
112 103
113 lp = ubifs_lpt_lookup_dirty(c, r->lnum); 104 lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum);
114 if (IS_ERR(lp)) { 105 if (IS_ERR(lp)) {
115 err = PTR_ERR(lp); 106 err = PTR_ERR(lp);
116 goto out; 107 goto out;
117 } 108 }
118 109
119 dirty = lp->dirty; 110 dirty = lp->dirty;
120 if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 111 if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
121 /* 112 /*
122 * The LEB was added to the journal with a starting offset of 113 * The LEB was added to the journal with a starting offset of
123 * zero which means the LEB must have been empty. The LEB 114 * zero which means the LEB must have been empty. The LEB
124 * property values should be lp->free == c->leb_size and 115 * property values should be @lp->free == @c->leb_size and
125 * lp->dirty == 0, but that is not the case. The reason is that 116 * @lp->dirty == 0, but that is not the case. The reason is that
126 * the LEB was garbage collected. The garbage collector resets 117 * the LEB had been garbage collected before it became the bud,
127 * the free and dirty space without recording it anywhere except 118 * and there was not commit inbetween. The garbage collector
128 * lprops, so if there is not a commit then lprops does not have 119 * resets the free and dirty space without recording it
129 * that information next time the file system is mounted. 120 * anywhere except lprops, so if there was no commit then
121 * lprops does not have that information.
130 * 122 *
131 * We do not need to adjust free space because the scan has told 123 * We do not need to adjust free space because the scan has told
132 * us the exact value which is recorded in the replay entry as 124 * us the exact value which is recorded in the replay entry as
133 * r->free. 125 * @b->free.
134 * 126 *
135 * However we do need to subtract from the dirty space the 127 * However we do need to subtract from the dirty space the
136 * amount of space that the garbage collector reclaimed, which 128 * amount of space that the garbage collector reclaimed, which
137 * is the whole LEB minus the amount of space that was free. 129 * is the whole LEB minus the amount of space that was free.
138 */ 130 */
139 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 131 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
140 lp->free, lp->dirty); 132 lp->free, lp->dirty);
141 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 133 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
142 lp->free, lp->dirty); 134 lp->free, lp->dirty);
143 dirty -= c->leb_size - lp->free; 135 dirty -= c->leb_size - lp->free;
144 /* 136 /*
@@ -150,21 +142,48 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
150 */ 142 */
151 if (dirty != 0) 143 if (dirty != 0)
152 dbg_msg("LEB %d lp: %d free %d dirty " 144 dbg_msg("LEB %d lp: %d free %d dirty "
153 "replay: %d free %d dirty", r->lnum, lp->free, 145 "replay: %d free %d dirty", b->bud->lnum,
154 lp->dirty, r->free, r->dirty); 146 lp->free, lp->dirty, b->free, b->dirty);
155 } 147 }
156 lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, 148 lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
157 lp->flags | LPROPS_TAKEN, 0); 149 lp->flags | LPROPS_TAKEN, 0);
158 if (IS_ERR(lp)) { 150 if (IS_ERR(lp)) {
159 err = PTR_ERR(lp); 151 err = PTR_ERR(lp);
160 goto out; 152 goto out;
161 } 153 }
154
155 /* Make sure the journal head points to the latest bud */
156 err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf,
157 b->bud->lnum, c->leb_size - b->free,
158 UBI_SHORTTERM);
159
162out: 160out:
163 ubifs_release_lprops(c); 161 ubifs_release_lprops(c);
164 return err; 162 return err;
165} 163}
166 164
167/** 165/**
166 * set_buds_lprops - set free and dirty space for all replayed buds.
167 * @c: UBIFS file-system description object
168 *
169 * This function sets LEB properties for all replayed buds. Returns zero in
170 * case of success and a negative error code in case of failure.
171 */
172static int set_buds_lprops(struct ubifs_info *c)
173{
174 struct bud_entry *b;
175 int err;
176
177 list_for_each_entry(b, &c->replay_buds, list) {
178 err = set_bud_lprops(c, b);
179 if (err)
180 return err;
181 }
182
183 return 0;
184}
185
186/**
168 * trun_remove_range - apply a replay entry for a truncation to the TNC. 187 * trun_remove_range - apply a replay entry for a truncation to the TNC.
169 * @c: UBIFS file-system description object 188 * @c: UBIFS file-system description object
170 * @r: replay entry of truncation 189 * @r: replay entry of truncation
@@ -200,24 +219,22 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
200 */ 219 */
201static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 220static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
202{ 221{
203 int err, deletion = ((r->flags & REPLAY_DELETION) != 0); 222 int err;
204 223
205 dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, 224 dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum,
206 r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); 225 r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key));
207 226
208 /* Set c->replay_sqnum to help deal with dangling branches. */ 227 /* Set c->replay_sqnum to help deal with dangling branches. */
209 c->replay_sqnum = r->sqnum; 228 c->replay_sqnum = r->sqnum;
210 229
211 if (r->flags & REPLAY_REF) 230 if (is_hash_key(c, &r->key)) {
212 err = set_bud_lprops(c, r); 231 if (r->deletion)
213 else if (is_hash_key(c, &r->key)) {
214 if (deletion)
215 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 232 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
216 else 233 else
217 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 234 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
218 r->len, &r->nm); 235 r->len, &r->nm);
219 } else { 236 } else {
220 if (deletion) 237 if (r->deletion)
221 switch (key_type(c, &r->key)) { 238 switch (key_type(c, &r->key)) {
222 case UBIFS_INO_KEY: 239 case UBIFS_INO_KEY:
223 { 240 {
@@ -240,7 +257,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
240 return err; 257 return err;
241 258
242 if (c->need_recovery) 259 if (c->need_recovery)
243 err = ubifs_recover_size_accum(c, &r->key, deletion, 260 err = ubifs_recover_size_accum(c, &r->key, r->deletion,
244 r->new_size); 261 r->new_size);
245 } 262 }
246 263
@@ -248,68 +265,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
248} 265}
249 266
250/** 267/**
251 * destroy_replay_tree - destroy the replay. 268 * replay_entries_cmp - compare 2 replay entries.
252 * @c: UBIFS file-system description object 269 * @priv: UBIFS file-system description object
270 * @a: first replay entry
271 * @a: second replay entry
253 * 272 *
254 * Destroy the replay tree. 273 * This is a comparios function for 'list_sort()' which compares 2 replay
274 * entries @a and @b by comparing their sequence numer. Returns %1 if @a has
275 * greater sequence number and %-1 otherwise.
255 */ 276 */
256static void destroy_replay_tree(struct ubifs_info *c) 277static int replay_entries_cmp(void *priv, struct list_head *a,
278 struct list_head *b)
257{ 279{
258 struct rb_node *this = c->replay_tree.rb_node; 280 struct replay_entry *ra, *rb;
259 struct replay_entry *r; 281
260 282 cond_resched();
261 while (this) { 283 if (a == b)
262 if (this->rb_left) { 284 return 0;
263 this = this->rb_left; 285
264 continue; 286 ra = list_entry(a, struct replay_entry, list);
265 } else if (this->rb_right) { 287 rb = list_entry(b, struct replay_entry, list);
266 this = this->rb_right; 288 ubifs_assert(ra->sqnum != rb->sqnum);
267 continue; 289 if (ra->sqnum > rb->sqnum)
268 } 290 return 1;
269 r = rb_entry(this, struct replay_entry, rb); 291 return -1;
270 this = rb_parent(this);
271 if (this) {
272 if (this->rb_left == &r->rb)
273 this->rb_left = NULL;
274 else
275 this->rb_right = NULL;
276 }
277 if (is_hash_key(c, &r->key))
278 kfree(r->nm.name);
279 kfree(r);
280 }
281 c->replay_tree = RB_ROOT;
282} 292}
283 293
284/** 294/**
285 * apply_replay_tree - apply the replay tree to the TNC. 295 * apply_replay_list - apply the replay list to the TNC.
286 * @c: UBIFS file-system description object 296 * @c: UBIFS file-system description object
287 * 297 *
288 * Apply the replay tree. 298 * Apply all entries in the replay list to the TNC. Returns zero in case of
289 * Returns zero in case of success and a negative error code in case of 299 * success and a negative error code in case of failure.
290 * failure.
291 */ 300 */
292static int apply_replay_tree(struct ubifs_info *c) 301static int apply_replay_list(struct ubifs_info *c)
293{ 302{
294 struct rb_node *this = rb_first(&c->replay_tree); 303 struct replay_entry *r;
304 int err;
295 305
296 while (this) { 306 list_sort(c, &c->replay_list, &replay_entries_cmp);
297 struct replay_entry *r;
298 int err;
299 307
308 list_for_each_entry(r, &c->replay_list, list) {
300 cond_resched(); 309 cond_resched();
301 310
302 r = rb_entry(this, struct replay_entry, rb);
303 err = apply_replay_entry(c, r); 311 err = apply_replay_entry(c, r);
304 if (err) 312 if (err)
305 return err; 313 return err;
306 this = rb_next(this);
307 } 314 }
315
308 return 0; 316 return 0;
309} 317}
310 318
311/** 319/**
312 * insert_node - insert a node to the replay tree. 320 * destroy_replay_list - destroy the replay.
321 * @c: UBIFS file-system description object
322 *
323 * Destroy the replay list.
324 */
325static void destroy_replay_list(struct ubifs_info *c)
326{
327 struct replay_entry *r, *tmp;
328
329 list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
330 if (is_hash_key(c, &r->key))
331 kfree(r->nm.name);
332 list_del(&r->list);
333 kfree(r);
334 }
335}
336
337/**
338 * insert_node - insert a node to the replay list
313 * @c: UBIFS file-system description object 339 * @c: UBIFS file-system description object
314 * @lnum: node logical eraseblock number 340 * @lnum: node logical eraseblock number
315 * @offs: node offset 341 * @offs: node offset
@@ -321,39 +347,25 @@ static int apply_replay_tree(struct ubifs_info *c)
321 * @old_size: truncation old size 347 * @old_size: truncation old size
322 * @new_size: truncation new size 348 * @new_size: truncation new size
323 * 349 *
324 * This function inserts a scanned non-direntry node to the replay tree. The 350 * This function inserts a scanned non-direntry node to the replay list. The
325 * replay tree is an RB-tree containing @struct replay_entry elements which are 351 * replay list contains @struct replay_entry elements, and we sort this list in
326 * indexed by the sequence number. The replay tree is applied at the very end 352 * sequence number order before applying it. The replay list is applied at the
327 * of the replay process. Since the tree is sorted in sequence number order, 353 * very end of the replay process. Since the list is sorted in sequence number
328 * the older modifications are applied first. This function returns zero in 354 * order, the older modifications are applied first. This function returns zero
329 * case of success and a negative error code in case of failure. 355 * in case of success and a negative error code in case of failure.
330 */ 356 */
331static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 357static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
332 union ubifs_key *key, unsigned long long sqnum, 358 union ubifs_key *key, unsigned long long sqnum,
333 int deletion, int *used, loff_t old_size, 359 int deletion, int *used, loff_t old_size,
334 loff_t new_size) 360 loff_t new_size)
335{ 361{
336 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
337 struct replay_entry *r; 362 struct replay_entry *r;
338 363
364 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
365
339 if (key_inum(c, key) >= c->highest_inum) 366 if (key_inum(c, key) >= c->highest_inum)
340 c->highest_inum = key_inum(c, key); 367 c->highest_inum = key_inum(c, key);
341 368
342 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
343 while (*p) {
344 parent = *p;
345 r = rb_entry(parent, struct replay_entry, rb);
346 if (sqnum < r->sqnum) {
347 p = &(*p)->rb_left;
348 continue;
349 } else if (sqnum > r->sqnum) {
350 p = &(*p)->rb_right;
351 continue;
352 }
353 ubifs_err("duplicate sqnum in replay");
354 return -EINVAL;
355 }
356
357 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 369 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
358 if (!r) 370 if (!r)
359 return -ENOMEM; 371 return -ENOMEM;
@@ -363,19 +375,18 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
363 r->lnum = lnum; 375 r->lnum = lnum;
364 r->offs = offs; 376 r->offs = offs;
365 r->len = len; 377 r->len = len;
378 r->deletion = !!deletion;
366 r->sqnum = sqnum; 379 r->sqnum = sqnum;
367 r->flags = (deletion ? REPLAY_DELETION : 0); 380 key_copy(c, key, &r->key);
368 r->old_size = old_size; 381 r->old_size = old_size;
369 r->new_size = new_size; 382 r->new_size = new_size;
370 key_copy(c, key, &r->key);
371 383
372 rb_link_node(&r->rb, parent, p); 384 list_add_tail(&r->list, &c->replay_list);
373 rb_insert_color(&r->rb, &c->replay_tree);
374 return 0; 385 return 0;
375} 386}
376 387
377/** 388/**
378 * insert_dent - insert a directory entry node into the replay tree. 389 * insert_dent - insert a directory entry node into the replay list.
379 * @c: UBIFS file-system description object 390 * @c: UBIFS file-system description object
380 * @lnum: node logical eraseblock number 391 * @lnum: node logical eraseblock number
381 * @offs: node offset 392 * @offs: node offset
@@ -387,43 +398,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
387 * @deletion: non-zero if this is a deletion 398 * @deletion: non-zero if this is a deletion
388 * @used: number of bytes in use in a LEB 399 * @used: number of bytes in use in a LEB
389 * 400 *
390 * This function inserts a scanned directory entry node to the replay tree. 401 * This function inserts a scanned directory entry node or an extended
391 * Returns zero in case of success and a negative error code in case of 402 * attribute entry to the replay list. Returns zero in case of success and a
392 * failure. 403 * negative error code in case of failure.
393 *
394 * This function is also used for extended attribute entries because they are
395 * implemented as directory entry nodes.
396 */ 404 */
397static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 405static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
398 union ubifs_key *key, const char *name, int nlen, 406 union ubifs_key *key, const char *name, int nlen,
399 unsigned long long sqnum, int deletion, int *used) 407 unsigned long long sqnum, int deletion, int *used)
400{ 408{
401 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
402 struct replay_entry *r; 409 struct replay_entry *r;
403 char *nbuf; 410 char *nbuf;
404 411
412 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
405 if (key_inum(c, key) >= c->highest_inum) 413 if (key_inum(c, key) >= c->highest_inum)
406 c->highest_inum = key_inum(c, key); 414 c->highest_inum = key_inum(c, key);
407 415
408 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
409 while (*p) {
410 parent = *p;
411 r = rb_entry(parent, struct replay_entry, rb);
412 if (sqnum < r->sqnum) {
413 p = &(*p)->rb_left;
414 continue;
415 }
416 if (sqnum > r->sqnum) {
417 p = &(*p)->rb_right;
418 continue;
419 }
420 ubifs_err("duplicate sqnum in replay");
421 return -EINVAL;
422 }
423
424 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 416 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
425 if (!r) 417 if (!r)
426 return -ENOMEM; 418 return -ENOMEM;
419
427 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 420 nbuf = kmalloc(nlen + 1, GFP_KERNEL);
428 if (!nbuf) { 421 if (!nbuf) {
429 kfree(r); 422 kfree(r);
@@ -435,17 +428,15 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
435 r->lnum = lnum; 428 r->lnum = lnum;
436 r->offs = offs; 429 r->offs = offs;
437 r->len = len; 430 r->len = len;
431 r->deletion = !!deletion;
438 r->sqnum = sqnum; 432 r->sqnum = sqnum;
433 key_copy(c, key, &r->key);
439 r->nm.len = nlen; 434 r->nm.len = nlen;
440 memcpy(nbuf, name, nlen); 435 memcpy(nbuf, name, nlen);
441 nbuf[nlen] = '\0'; 436 nbuf[nlen] = '\0';
442 r->nm.name = nbuf; 437 r->nm.name = nbuf;
443 r->flags = (deletion ? REPLAY_DELETION : 0);
444 key_copy(c, key, &r->key);
445 438
446 ubifs_assert(!*p); 439 list_add_tail(&r->list, &c->replay_list);
447 rb_link_node(&r->rb, parent, p);
448 rb_insert_color(&r->rb, &c->replay_tree);
449 return 0; 440 return 0;
450} 441}
451 442
@@ -482,29 +473,92 @@ int ubifs_validate_entry(struct ubifs_info *c,
482} 473}
483 474
484/** 475/**
476 * is_last_bud - check if the bud is the last in the journal head.
477 * @c: UBIFS file-system description object
478 * @bud: bud description object
479 *
480 * This function checks if bud @bud is the last bud in its journal head. This
481 * information is then used by 'replay_bud()' to decide whether the bud can
482 * have corruptions or not. Indeed, only last buds can be corrupted by power
483 * cuts. Returns %1 if this is the last bud, and %0 if not.
484 */
485static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
486{
487 struct ubifs_jhead *jh = &c->jheads[bud->jhead];
488 struct ubifs_bud *next;
489 uint32_t data;
490 int err;
491
492 if (list_is_last(&bud->list, &jh->buds_list))
493 return 1;
494
495 /*
496 * The following is a quirk to make sure we work correctly with UBIFS
497 * images used with older UBIFS.
498 *
499 * Normally, the last bud will be the last in the journal head's list
500 * of bud. However, there is one exception if the UBIFS image belongs
501 * to older UBIFS. This is fairly unlikely: one would need to use old
502 * UBIFS, then have a power cut exactly at the right point, and then
503 * try to mount this image with new UBIFS.
504 *
505 * The exception is: it is possible to have 2 buds A and B, A goes
506 * before B, and B is the last, bud B is contains no data, and bud A is
507 * corrupted at the end. The reason is that in older versions when the
508 * journal code switched the next bud (from A to B), it first added a
509 * log reference node for the new bud (B), and only after this it
510 * synchronized the write-buffer of current bud (A). But later this was
511 * changed and UBIFS started to always synchronize the write-buffer of
512 * the bud (A) before writing the log reference for the new bud (B).
513 *
514 * But because older UBIFS always synchronized A's write-buffer before
515 * writing to B, we can recognize this exceptional situation but
516 * checking the contents of bud B - if it is empty, then A can be
517 * treated as the last and we can recover it.
518 *
519 * TODO: remove this piece of code in a couple of years (today it is
520 * 16.05.2011).
521 */
522 next = list_entry(bud->list.next, struct ubifs_bud, list);
523 if (!list_is_last(&next->list, &jh->buds_list))
524 return 0;
525
526 err = ubi_read(c->ubi, next->lnum, (char *)&data,
527 next->start, 4);
528 if (err)
529 return 0;
530
531 return data == 0xFFFFFFFF;
532}
533
534/**
485 * replay_bud - replay a bud logical eraseblock. 535 * replay_bud - replay a bud logical eraseblock.
486 * @c: UBIFS file-system description object 536 * @c: UBIFS file-system description object
487 * @lnum: bud logical eraseblock number to replay 537 * @b: bud entry which describes the bud
488 * @offs: bud start offset
489 * @jhead: journal head to which this bud belongs
490 * @free: amount of free space in the bud is returned here
491 * @dirty: amount of dirty space from padding and deletion nodes is returned
492 * here
493 * 538 *
494 * This function returns zero in case of success and a negative error code in 539 * This function replays bud @bud, recovers it if needed, and adds all nodes
495 * case of failure. 540 * from this bud to the replay list. Returns zero in case of success and a
541 * negative error code in case of failure.
496 */ 542 */
497static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 543static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
498 int *free, int *dirty)
499{ 544{
500 int err = 0, used = 0; 545 int is_last = is_last_bud(c, b->bud);
546 int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
501 struct ubifs_scan_leb *sleb; 547 struct ubifs_scan_leb *sleb;
502 struct ubifs_scan_node *snod; 548 struct ubifs_scan_node *snod;
503 struct ubifs_bud *bud;
504 549
505 dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); 550 dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
506 if (c->need_recovery) 551 lnum, b->bud->jhead, offs, is_last);
507 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); 552
553 if (c->need_recovery && is_last)
554 /*
555 * Recover only last LEBs in the journal heads, because power
556 * cuts may cause corruptions only in these LEBs, because only
557 * these LEBs could possibly be written to at the power cut
558 * time.
559 */
560 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf,
561 b->bud->jhead != GCHD);
508 else 562 else
509 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 563 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
510 if (IS_ERR(sleb)) 564 if (IS_ERR(sleb))
@@ -620,19 +674,13 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
620 goto out; 674 goto out;
621 } 675 }
622 676
623 bud = ubifs_search_bud(c, lnum); 677 ubifs_assert(ubifs_search_bud(c, lnum));
624 if (!bud)
625 BUG();
626
627 ubifs_assert(sleb->endpt - offs >= used); 678 ubifs_assert(sleb->endpt - offs >= used);
628 ubifs_assert(sleb->endpt % c->min_io_size == 0); 679 ubifs_assert(sleb->endpt % c->min_io_size == 0);
629 680
630 if (sleb->endpt + c->min_io_size <= c->leb_size && !c->ro_mount) 681 b->dirty = sleb->endpt - offs - used;
631 err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, 682 b->free = c->leb_size - sleb->endpt;
632 sleb->endpt, UBI_SHORTTERM); 683 dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free);
633
634 *dirty = sleb->endpt - offs - used;
635 *free = c->leb_size - sleb->endpt;
636 684
637out: 685out:
638 ubifs_scan_destroy(sleb); 686 ubifs_scan_destroy(sleb);
@@ -646,55 +694,6 @@ out_dump:
646} 694}
647 695
648/** 696/**
649 * insert_ref_node - insert a reference node to the replay tree.
650 * @c: UBIFS file-system description object
651 * @lnum: node logical eraseblock number
652 * @offs: node offset
653 * @sqnum: sequence number
654 * @free: amount of free space in bud
655 * @dirty: amount of dirty space from padding and deletion nodes
656 *
657 * This function inserts a reference node to the replay tree and returns zero
658 * in case of success or a negative error code in case of failure.
659 */
660static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
661 unsigned long long sqnum, int free, int dirty)
662{
663 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
664 struct replay_entry *r;
665
666 dbg_mnt("add ref LEB %d:%d", lnum, offs);
667 while (*p) {
668 parent = *p;
669 r = rb_entry(parent, struct replay_entry, rb);
670 if (sqnum < r->sqnum) {
671 p = &(*p)->rb_left;
672 continue;
673 } else if (sqnum > r->sqnum) {
674 p = &(*p)->rb_right;
675 continue;
676 }
677 ubifs_err("duplicate sqnum in replay tree");
678 return -EINVAL;
679 }
680
681 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
682 if (!r)
683 return -ENOMEM;
684
685 r->lnum = lnum;
686 r->offs = offs;
687 r->sqnum = sqnum;
688 r->flags = REPLAY_REF;
689 r->free = free;
690 r->dirty = dirty;
691
692 rb_link_node(&r->rb, parent, p);
693 rb_insert_color(&r->rb, &c->replay_tree);
694 return 0;
695}
696
697/**
698 * replay_buds - replay all buds. 697 * replay_buds - replay all buds.
699 * @c: UBIFS file-system description object 698 * @c: UBIFS file-system description object
700 * 699 *
@@ -704,17 +703,16 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
704static int replay_buds(struct ubifs_info *c) 703static int replay_buds(struct ubifs_info *c)
705{ 704{
706 struct bud_entry *b; 705 struct bud_entry *b;
707 int err, uninitialized_var(free), uninitialized_var(dirty); 706 int err;
707 unsigned long long prev_sqnum = 0;
708 708
709 list_for_each_entry(b, &c->replay_buds, list) { 709 list_for_each_entry(b, &c->replay_buds, list) {
710 err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, 710 err = replay_bud(c, b);
711 &free, &dirty);
712 if (err)
713 return err;
714 err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
715 free, dirty);
716 if (err) 711 if (err)
717 return err; 712 return err;
713
714 ubifs_assert(b->sqnum > prev_sqnum);
715 prev_sqnum = b->sqnum;
718 } 716 }
719 717
720 return 0; 718 return 0;
@@ -1054,25 +1052,29 @@ int ubifs_replay_journal(struct ubifs_info *c)
1054 if (err) 1052 if (err)
1055 goto out; 1053 goto out;
1056 1054
1057 err = apply_replay_tree(c); 1055 err = apply_replay_list(c);
1056 if (err)
1057 goto out;
1058
1059 err = set_buds_lprops(c);
1058 if (err) 1060 if (err)
1059 goto out; 1061 goto out;
1060 1062
1061 /* 1063 /*
1062 * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable 1064 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
1063 * to roughly estimate index growth. Things like @c->min_idx_lebs 1065 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
1064 * depend on it. This means we have to initialize it to make sure 1066 * depend on it. This means we have to initialize it to make sure
1065 * budgeting works properly. 1067 * budgeting works properly.
1066 */ 1068 */
1067 c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); 1069 c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
1068 c->budg_uncommitted_idx *= c->max_idx_node_sz; 1070 c->bi.uncommitted_idx *= c->max_idx_node_sz;
1069 1071
1070 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1072 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1071 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " 1073 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
1072 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1074 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
1073 (unsigned long)c->highest_inum); 1075 (unsigned long)c->highest_inum);
1074out: 1076out:
1075 destroy_replay_tree(c); 1077 destroy_replay_list(c);
1076 destroy_bud_list(c); 1078 destroy_bud_list(c);
1077 c->replaying = 0; 1079 c->replaying = 0;
1078 return err; 1080 return err;
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index bf31b4729e51..c606f010e8df 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -475,7 +475,8 @@ failed:
475 * @c: UBIFS file-system description object 475 * @c: UBIFS file-system description object
476 * 476 *
477 * This function returns a pointer to the superblock node or a negative error 477 * This function returns a pointer to the superblock node or a negative error
478 * code. 478 * code. Note, the user of this function is responsible of kfree()'ing the
479 * returned superblock buffer.
479 */ 480 */
480struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) 481struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
481{ 482{
@@ -616,6 +617,7 @@ int ubifs_read_superblock(struct ubifs_info *c)
616 c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); 617 c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);
617 memcpy(&c->uuid, &sup->uuid, 16); 618 memcpy(&c->uuid, &sup->uuid, 16);
618 c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); 619 c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);
620 c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP);
619 621
620 /* Automatically increase file system size to the maximum size */ 622 /* Automatically increase file system size to the maximum size */
621 c->old_leb_cnt = c->leb_cnt; 623 c->old_leb_cnt = c->leb_cnt;
@@ -650,3 +652,152 @@ out:
650 kfree(sup); 652 kfree(sup);
651 return err; 653 return err;
652} 654}
655
656/**
657 * fixup_leb - fixup/unmap an LEB containing free space.
658 * @c: UBIFS file-system description object
659 * @lnum: the LEB number to fix up
660 * @len: number of used bytes in LEB (starting at offset 0)
661 *
662 * This function reads the contents of the given LEB number @lnum, then fixes
663 * it up, so that empty min. I/O units in the end of LEB are actually erased on
664 * flash (rather than being just all-0xff real data). If the LEB is completely
665 * empty, it is simply unmapped.
666 */
667static int fixup_leb(struct ubifs_info *c, int lnum, int len)
668{
669 int err;
670
671 ubifs_assert(len >= 0);
672 ubifs_assert(len % c->min_io_size == 0);
673 ubifs_assert(len < c->leb_size);
674
675 if (len == 0) {
676 dbg_mnt("unmap empty LEB %d", lnum);
677 return ubi_leb_unmap(c->ubi, lnum);
678 }
679
680 dbg_mnt("fixup LEB %d, data len %d", lnum, len);
681 err = ubi_read(c->ubi, lnum, c->sbuf, 0, len);
682 if (err)
683 return err;
684
685 return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
686}
687
688/**
689 * fixup_free_space - find & remap all LEBs containing free space.
690 * @c: UBIFS file-system description object
691 *
692 * This function walks through all LEBs in the filesystem and fiexes up those
693 * containing free/empty space.
694 */
695static int fixup_free_space(struct ubifs_info *c)
696{
697 int lnum, err = 0;
698 struct ubifs_lprops *lprops;
699
700 ubifs_get_lprops(c);
701
702 /* Fixup LEBs in the master area */
703 for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) {
704 err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz);
705 if (err)
706 goto out;
707 }
708
709 /* Unmap unused log LEBs */
710 lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
711 while (lnum != c->ltail_lnum) {
712 err = fixup_leb(c, lnum, 0);
713 if (err)
714 goto out;
715 lnum = ubifs_next_log_lnum(c, lnum);
716 }
717
718 /* Fixup the current log head */
719 err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
720 if (err)
721 goto out;
722
723 /* Fixup LEBs in the LPT area */
724 for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
725 int free = c->ltab[lnum - c->lpt_first].free;
726
727 if (free > 0) {
728 err = fixup_leb(c, lnum, c->leb_size - free);
729 if (err)
730 goto out;
731 }
732 }
733
734 /* Unmap LEBs in the orphans area */
735 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
736 err = fixup_leb(c, lnum, 0);
737 if (err)
738 goto out;
739 }
740
741 /* Fixup LEBs in the main area */
742 for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
743 lprops = ubifs_lpt_lookup(c, lnum);
744 if (IS_ERR(lprops)) {
745 err = PTR_ERR(lprops);
746 goto out;
747 }
748
749 if (lprops->free > 0) {
750 err = fixup_leb(c, lnum, c->leb_size - lprops->free);
751 if (err)
752 goto out;
753 }
754 }
755
756out:
757 ubifs_release_lprops(c);
758 return err;
759}
760
761/**
762 * ubifs_fixup_free_space - find & fix all LEBs with free space.
763 * @c: UBIFS file-system description object
764 *
765 * This function fixes up LEBs containing free space on first mount, if the
766 * appropriate flag was set when the FS was created. Each LEB with one or more
767 * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure
768 * the free space is actually erased. E.g., this is necessary for some NAND
769 * chips, since the free space may have been programmed like real "0xff" data
770 * (generating a non-0xff ECC), causing future writes to the not-really-erased
771 * NAND pages to behave badly. After the space is fixed up, the superblock flag
772 * is cleared, so that this is skipped for all future mounts.
773 */
774int ubifs_fixup_free_space(struct ubifs_info *c)
775{
776 int err;
777 struct ubifs_sb_node *sup;
778
779 ubifs_assert(c->space_fixup);
780 ubifs_assert(!c->ro_mount);
781
782 ubifs_msg("start fixing up free space");
783
784 err = fixup_free_space(c);
785 if (err)
786 return err;
787
788 sup = ubifs_read_sb_node(c);
789 if (IS_ERR(sup))
790 return PTR_ERR(sup);
791
792 /* Free-space fixup is no longer required */
793 c->space_fixup = 0;
794 sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP);
795
796 err = ubifs_write_sb_node(c, sup);
797 kfree(sup);
798 if (err)
799 return err;
800
801 ubifs_msg("free space fixup complete");
802 return err;
803}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index be6c7b008f38..6db0bdaa9f74 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -375,7 +375,7 @@ out:
375 ubifs_release_dirty_inode_budget(c, ui); 375 ubifs_release_dirty_inode_budget(c, ui);
376 else { 376 else {
377 /* We've deleted something - clean the "no space" flags */ 377 /* We've deleted something - clean the "no space" flags */
378 c->nospace = c->nospace_rp = 0; 378 c->bi.nospace = c->bi.nospace_rp = 0;
379 smp_wmb(); 379 smp_wmb();
380 } 380 }
381done: 381done:
@@ -694,11 +694,11 @@ static int init_constants_sb(struct ubifs_info *c)
694 * be compressed and direntries are of the maximum size. 694 * be compressed and direntries are of the maximum size.
695 * 695 *
696 * Note, data, which may be stored in inodes is budgeted separately, so 696 * Note, data, which may be stored in inodes is budgeted separately, so
697 * it is not included into 'c->inode_budget'. 697 * it is not included into 'c->bi.inode_budget'.
698 */ 698 */
699 c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; 699 c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
700 c->inode_budget = UBIFS_INO_NODE_SZ; 700 c->bi.inode_budget = UBIFS_INO_NODE_SZ;
701 c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; 701 c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ;
702 702
703 /* 703 /*
704 * When the amount of flash space used by buds becomes 704 * When the amount of flash space used by buds becomes
@@ -742,7 +742,7 @@ static void init_constants_master(struct ubifs_info *c)
742{ 742{
743 long long tmp64; 743 long long tmp64;
744 744
745 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 745 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
746 c->report_rp_size = ubifs_reported_space(c, c->rp_size); 746 c->report_rp_size = ubifs_reported_space(c, c->rp_size);
747 747
748 /* 748 /*
@@ -1144,8 +1144,8 @@ static int check_free_space(struct ubifs_info *c)
1144{ 1144{
1145 ubifs_assert(c->dark_wm > 0); 1145 ubifs_assert(c->dark_wm > 0);
1146 if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { 1146 if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) {
1147 ubifs_err("insufficient free space to mount in read/write mode"); 1147 ubifs_err("insufficient free space to mount in R/W mode");
1148 dbg_dump_budg(c); 1148 dbg_dump_budg(c, &c->bi);
1149 dbg_dump_lprops(c); 1149 dbg_dump_lprops(c);
1150 return -ENOSPC; 1150 return -ENOSPC;
1151 } 1151 }
@@ -1257,12 +1257,12 @@ static int mount_ubifs(struct ubifs_info *c)
1257 goto out_free; 1257 goto out_free;
1258 } 1258 }
1259 1259
1260 err = alloc_wbufs(c);
1261 if (err)
1262 goto out_cbuf;
1263
1260 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); 1264 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
1261 if (!c->ro_mount) { 1265 if (!c->ro_mount) {
1262 err = alloc_wbufs(c);
1263 if (err)
1264 goto out_cbuf;
1265
1266 /* Create background thread */ 1266 /* Create background thread */
1267 c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); 1267 c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
1268 if (IS_ERR(c->bgt)) { 1268 if (IS_ERR(c->bgt)) {
@@ -1304,7 +1304,7 @@ static int mount_ubifs(struct ubifs_info *c)
1304 if (err) 1304 if (err)
1305 goto out_lpt; 1305 goto out_lpt;
1306 1306
1307 err = dbg_check_idx_size(c, c->old_idx_sz); 1307 err = dbg_check_idx_size(c, c->bi.old_idx_sz);
1308 if (err) 1308 if (err)
1309 goto out_lpt; 1309 goto out_lpt;
1310 1310
@@ -1313,7 +1313,7 @@ static int mount_ubifs(struct ubifs_info *c)
1313 goto out_journal; 1313 goto out_journal;
1314 1314
1315 /* Calculate 'min_idx_lebs' after journal replay */ 1315 /* Calculate 'min_idx_lebs' after journal replay */
1316 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 1316 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
1317 1317
1318 err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount); 1318 err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount);
1319 if (err) 1319 if (err)
@@ -1396,6 +1396,12 @@ static int mount_ubifs(struct ubifs_info *c)
1396 } else 1396 } else
1397 ubifs_assert(c->lst.taken_empty_lebs > 0); 1397 ubifs_assert(c->lst.taken_empty_lebs > 0);
1398 1398
1399 if (!c->ro_mount && c->space_fixup) {
1400 err = ubifs_fixup_free_space(c);
1401 if (err)
1402 goto out_infos;
1403 }
1404
1399 err = dbg_check_filesystem(c); 1405 err = dbg_check_filesystem(c);
1400 if (err) 1406 if (err)
1401 goto out_infos; 1407 goto out_infos;
@@ -1442,7 +1448,8 @@ static int mount_ubifs(struct ubifs_info *c)
1442 c->main_lebs, c->main_first, c->leb_cnt - 1); 1448 c->main_lebs, c->main_first, c->leb_cnt - 1);
1443 dbg_msg("index LEBs: %d", c->lst.idx_lebs); 1449 dbg_msg("index LEBs: %d", c->lst.idx_lebs);
1444 dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", 1450 dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)",
1445 c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); 1451 c->bi.old_idx_sz, c->bi.old_idx_sz >> 10,
1452 c->bi.old_idx_sz >> 20);
1446 dbg_msg("key hash type: %d", c->key_hash_type); 1453 dbg_msg("key hash type: %d", c->key_hash_type);
1447 dbg_msg("tree fanout: %d", c->fanout); 1454 dbg_msg("tree fanout: %d", c->fanout);
1448 dbg_msg("reserved GC LEB: %d", c->gc_lnum); 1455 dbg_msg("reserved GC LEB: %d", c->gc_lnum);
@@ -1456,7 +1463,7 @@ static int mount_ubifs(struct ubifs_info *c)
1456 dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", 1463 dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu",
1457 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); 1464 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
1458 dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", 1465 dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d",
1459 UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, 1466 UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
1460 UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); 1467 UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout));
1461 dbg_msg("dead watermark: %d", c->dead_wm); 1468 dbg_msg("dead watermark: %d", c->dead_wm);
1462 dbg_msg("dark watermark: %d", c->dark_wm); 1469 dbg_msg("dark watermark: %d", c->dark_wm);
@@ -1584,6 +1591,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1584 } 1591 }
1585 sup->leb_cnt = cpu_to_le32(c->leb_cnt); 1592 sup->leb_cnt = cpu_to_le32(c->leb_cnt);
1586 err = ubifs_write_sb_node(c, sup); 1593 err = ubifs_write_sb_node(c, sup);
1594 kfree(sup);
1587 if (err) 1595 if (err)
1588 goto out; 1596 goto out;
1589 } 1597 }
@@ -1631,12 +1639,6 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1631 if (err) 1639 if (err)
1632 goto out; 1640 goto out;
1633 1641
1634 err = alloc_wbufs(c);
1635 if (err)
1636 goto out;
1637
1638 ubifs_create_buds_lists(c);
1639
1640 /* Create background thread */ 1642 /* Create background thread */
1641 c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); 1643 c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
1642 if (IS_ERR(c->bgt)) { 1644 if (IS_ERR(c->bgt)) {
@@ -1690,6 +1692,13 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1690 */ 1692 */
1691 err = dbg_check_space_info(c); 1693 err = dbg_check_space_info(c);
1692 } 1694 }
1695
1696 if (c->space_fixup) {
1697 err = ubifs_fixup_free_space(c);
1698 if (err)
1699 goto out;
1700 }
1701
1693 mutex_unlock(&c->umount_mutex); 1702 mutex_unlock(&c->umount_mutex);
1694 return err; 1703 return err;
1695 1704
@@ -1744,7 +1753,6 @@ static void ubifs_remount_ro(struct ubifs_info *c)
1744 if (err) 1753 if (err)
1745 ubifs_ro_mode(c, err); 1754 ubifs_ro_mode(c, err);
1746 1755
1747 free_wbufs(c);
1748 vfree(c->orph_buf); 1756 vfree(c->orph_buf);
1749 c->orph_buf = NULL; 1757 c->orph_buf = NULL;
1750 kfree(c->write_reserve_buf); 1758 kfree(c->write_reserve_buf);
@@ -1773,10 +1781,9 @@ static void ubifs_put_super(struct super_block *sb)
1773 * to write them back because of I/O errors. 1781 * to write them back because of I/O errors.
1774 */ 1782 */
1775 if (!c->ro_error) { 1783 if (!c->ro_error) {
1776 ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); 1784 ubifs_assert(c->bi.idx_growth == 0);
1777 ubifs_assert(c->budg_idx_growth == 0); 1785 ubifs_assert(c->bi.dd_growth == 0);
1778 ubifs_assert(c->budg_dd_growth == 0); 1786 ubifs_assert(c->bi.data_growth == 0);
1779 ubifs_assert(c->budg_data_growth == 0);
1780 } 1787 }
1781 1788
1782 /* 1789 /*
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index de485979ca39..8119b1fd8d94 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -2557,11 +2557,11 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
2557 if (err) { 2557 if (err) {
2558 /* Ensure the znode is dirtied */ 2558 /* Ensure the znode is dirtied */
2559 if (znode->cnext || !ubifs_zn_dirty(znode)) { 2559 if (znode->cnext || !ubifs_zn_dirty(znode)) {
2560 znode = dirty_cow_bottom_up(c, znode); 2560 znode = dirty_cow_bottom_up(c, znode);
2561 if (IS_ERR(znode)) { 2561 if (IS_ERR(znode)) {
2562 err = PTR_ERR(znode); 2562 err = PTR_ERR(znode);
2563 goto out_unlock; 2563 goto out_unlock;
2564 } 2564 }
2565 } 2565 }
2566 err = tnc_delete(c, znode, n); 2566 err = tnc_delete(c, znode, n);
2567 } 2567 }
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 53288e5d604e..41920f357bbf 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -377,15 +377,13 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
377 c->gap_lebs = NULL; 377 c->gap_lebs = NULL;
378 return err; 378 return err;
379 } 379 }
380 if (!dbg_force_in_the_gaps_enabled) { 380 if (dbg_force_in_the_gaps_enabled()) {
381 /* 381 /*
382 * Do not print scary warnings if the debugging 382 * Do not print scary warnings if the debugging
383 * option which forces in-the-gaps is enabled. 383 * option which forces in-the-gaps is enabled.
384 */ 384 */
385 ubifs_err("out of space"); 385 ubifs_warn("out of space");
386 spin_lock(&c->space_lock); 386 dbg_dump_budg(c, &c->bi);
387 dbg_dump_budg(c);
388 spin_unlock(&c->space_lock);
389 dbg_dump_lprops(c); 387 dbg_dump_lprops(c);
390 } 388 }
391 /* Try to commit anyway */ 389 /* Try to commit anyway */
@@ -796,16 +794,16 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
796 spin_lock(&c->space_lock); 794 spin_lock(&c->space_lock);
797 /* 795 /*
798 * Although we have not finished committing yet, update size of the 796 * Although we have not finished committing yet, update size of the
799 * committed index ('c->old_idx_sz') and zero out the index growth 797 * committed index ('c->bi.old_idx_sz') and zero out the index growth
800 * budget. It is OK to do this now, because we've reserved all the 798 * budget. It is OK to do this now, because we've reserved all the
801 * space which is needed to commit the index, and it is save for the 799 * space which is needed to commit the index, and it is save for the
802 * budgeting subsystem to assume the index is already committed, 800 * budgeting subsystem to assume the index is already committed,
803 * even though it is not. 801 * even though it is not.
804 */ 802 */
805 ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); 803 ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
806 c->old_idx_sz = c->calc_idx_sz; 804 c->bi.old_idx_sz = c->calc_idx_sz;
807 c->budg_uncommitted_idx = 0; 805 c->bi.uncommitted_idx = 0;
808 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 806 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
809 spin_unlock(&c->space_lock); 807 spin_unlock(&c->space_lock);
810 mutex_unlock(&c->tnc_mutex); 808 mutex_unlock(&c->tnc_mutex);
811 809
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index 191ca7863fe7..e24380cf46ed 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -408,9 +408,11 @@ enum {
408 * Superblock flags. 408 * Superblock flags.
409 * 409 *
410 * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set 410 * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set
411 * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed
411 */ 412 */
412enum { 413enum {
413 UBIFS_FLG_BIGLPT = 0x02, 414 UBIFS_FLG_BIGLPT = 0x02,
415 UBIFS_FLG_SPACE_FIXUP = 0x04,
414}; 416};
415 417
416/** 418/**
@@ -434,7 +436,7 @@ struct ubifs_ch {
434 __u8 node_type; 436 __u8 node_type;
435 __u8 group_type; 437 __u8 group_type;
436 __u8 padding[2]; 438 __u8 padding[2];
437} __attribute__ ((packed)); 439} __packed;
438 440
439/** 441/**
440 * union ubifs_dev_desc - device node descriptor. 442 * union ubifs_dev_desc - device node descriptor.
@@ -448,7 +450,7 @@ struct ubifs_ch {
448union ubifs_dev_desc { 450union ubifs_dev_desc {
449 __le32 new; 451 __le32 new;
450 __le64 huge; 452 __le64 huge;
451} __attribute__ ((packed)); 453} __packed;
452 454
453/** 455/**
454 * struct ubifs_ino_node - inode node. 456 * struct ubifs_ino_node - inode node.
@@ -509,7 +511,7 @@ struct ubifs_ino_node {
509 __le16 compr_type; 511 __le16 compr_type;
510 __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ 512 __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */
511 __u8 data[]; 513 __u8 data[];
512} __attribute__ ((packed)); 514} __packed;
513 515
514/** 516/**
515 * struct ubifs_dent_node - directory entry node. 517 * struct ubifs_dent_node - directory entry node.
@@ -534,7 +536,7 @@ struct ubifs_dent_node {
534 __le16 nlen; 536 __le16 nlen;
535 __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ 537 __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */
536 __u8 name[]; 538 __u8 name[];
537} __attribute__ ((packed)); 539} __packed;
538 540
539/** 541/**
540 * struct ubifs_data_node - data node. 542 * struct ubifs_data_node - data node.
@@ -555,7 +557,7 @@ struct ubifs_data_node {
555 __le16 compr_type; 557 __le16 compr_type;
556 __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ 558 __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */
557 __u8 data[]; 559 __u8 data[];
558} __attribute__ ((packed)); 560} __packed;
559 561
560/** 562/**
561 * struct ubifs_trun_node - truncation node. 563 * struct ubifs_trun_node - truncation node.
@@ -575,7 +577,7 @@ struct ubifs_trun_node {
575 __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ 577 __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */
576 __le64 old_size; 578 __le64 old_size;
577 __le64 new_size; 579 __le64 new_size;
578} __attribute__ ((packed)); 580} __packed;
579 581
580/** 582/**
581 * struct ubifs_pad_node - padding node. 583 * struct ubifs_pad_node - padding node.
@@ -586,7 +588,7 @@ struct ubifs_trun_node {
586struct ubifs_pad_node { 588struct ubifs_pad_node {
587 struct ubifs_ch ch; 589 struct ubifs_ch ch;
588 __le32 pad_len; 590 __le32 pad_len;
589} __attribute__ ((packed)); 591} __packed;
590 592
591/** 593/**
592 * struct ubifs_sb_node - superblock node. 594 * struct ubifs_sb_node - superblock node.
@@ -644,7 +646,7 @@ struct ubifs_sb_node {
644 __u8 uuid[16]; 646 __u8 uuid[16];
645 __le32 ro_compat_version; 647 __le32 ro_compat_version;
646 __u8 padding2[3968]; 648 __u8 padding2[3968];
647} __attribute__ ((packed)); 649} __packed;
648 650
649/** 651/**
650 * struct ubifs_mst_node - master node. 652 * struct ubifs_mst_node - master node.
@@ -711,7 +713,7 @@ struct ubifs_mst_node {
711 __le32 idx_lebs; 713 __le32 idx_lebs;
712 __le32 leb_cnt; 714 __le32 leb_cnt;
713 __u8 padding[344]; 715 __u8 padding[344];
714} __attribute__ ((packed)); 716} __packed;
715 717
716/** 718/**
717 * struct ubifs_ref_node - logical eraseblock reference node. 719 * struct ubifs_ref_node - logical eraseblock reference node.
@@ -727,7 +729,7 @@ struct ubifs_ref_node {
727 __le32 offs; 729 __le32 offs;
728 __le32 jhead; 730 __le32 jhead;
729 __u8 padding[28]; 731 __u8 padding[28];
730} __attribute__ ((packed)); 732} __packed;
731 733
732/** 734/**
733 * struct ubifs_branch - key/reference/length branch 735 * struct ubifs_branch - key/reference/length branch
@@ -741,7 +743,7 @@ struct ubifs_branch {
741 __le32 offs; 743 __le32 offs;
742 __le32 len; 744 __le32 len;
743 __u8 key[]; 745 __u8 key[];
744} __attribute__ ((packed)); 746} __packed;
745 747
746/** 748/**
747 * struct ubifs_idx_node - indexing node. 749 * struct ubifs_idx_node - indexing node.
@@ -755,7 +757,7 @@ struct ubifs_idx_node {
755 __le16 child_cnt; 757 __le16 child_cnt;
756 __le16 level; 758 __le16 level;
757 __u8 branches[]; 759 __u8 branches[];
758} __attribute__ ((packed)); 760} __packed;
759 761
760/** 762/**
761 * struct ubifs_cs_node - commit start node. 763 * struct ubifs_cs_node - commit start node.
@@ -765,7 +767,7 @@ struct ubifs_idx_node {
765struct ubifs_cs_node { 767struct ubifs_cs_node {
766 struct ubifs_ch ch; 768 struct ubifs_ch ch;
767 __le64 cmt_no; 769 __le64 cmt_no;
768} __attribute__ ((packed)); 770} __packed;
769 771
770/** 772/**
771 * struct ubifs_orph_node - orphan node. 773 * struct ubifs_orph_node - orphan node.
@@ -777,6 +779,6 @@ struct ubifs_orph_node {
777 struct ubifs_ch ch; 779 struct ubifs_ch ch;
778 __le64 cmt_no; 780 __le64 cmt_no;
779 __le64 inos[]; 781 __le64 inos[];
780} __attribute__ ((packed)); 782} __packed;
781 783
782#endif /* __UBIFS_MEDIA_H__ */ 784#endif /* __UBIFS_MEDIA_H__ */
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 8c40ad3c6721..93d1412a06f0 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -389,9 +389,9 @@ struct ubifs_gced_idx_leb {
389 * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses 389 * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
390 * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot 390 * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
391 * make sure @inode->i_size is always changed under @ui_mutex, because it 391 * make sure @inode->i_size is always changed under @ui_mutex, because it
392 * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock 392 * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would
393 * with 'ubifs_writepage()' (see file.c). All the other inode fields are 393 * deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields
394 * changed under @ui_mutex, so they do not need "shadow" fields. Note, one 394 * are changed under @ui_mutex, so they do not need "shadow" fields. Note, one
395 * could consider to rework locking and base it on "shadow" fields. 395 * could consider to rework locking and base it on "shadow" fields.
396 */ 396 */
397struct ubifs_inode { 397struct ubifs_inode {
@@ -937,6 +937,40 @@ struct ubifs_mount_opts {
937 unsigned int compr_type:2; 937 unsigned int compr_type:2;
938}; 938};
939 939
940/**
941 * struct ubifs_budg_info - UBIFS budgeting information.
942 * @idx_growth: amount of bytes budgeted for index growth
943 * @data_growth: amount of bytes budgeted for cached data
944 * @dd_growth: amount of bytes budgeted for cached data that will make
945 * other data dirty
946 * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but
947 * which still have to be taken into account because the index
948 * has not been committed so far
949 * @old_idx_sz: size of index on flash
950 * @min_idx_lebs: minimum number of LEBs required for the index
951 * @nospace: non-zero if the file-system does not have flash space (used as
952 * optimization)
953 * @nospace_rp: the same as @nospace, but additionally means that even reserved
954 * pool is full
955 * @page_budget: budget for a page (constant, nenver changed after mount)
956 * @inode_budget: budget for an inode (constant, nenver changed after mount)
957 * @dent_budget: budget for a directory entry (constant, nenver changed after
958 * mount)
959 */
960struct ubifs_budg_info {
961 long long idx_growth;
962 long long data_growth;
963 long long dd_growth;
964 long long uncommitted_idx;
965 unsigned long long old_idx_sz;
966 int min_idx_lebs;
967 unsigned int nospace:1;
968 unsigned int nospace_rp:1;
969 int page_budget;
970 int inode_budget;
971 int dent_budget;
972};
973
940struct ubifs_debug_info; 974struct ubifs_debug_info;
941 975
942/** 976/**
@@ -980,6 +1014,7 @@ struct ubifs_debug_info;
980 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running 1014 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running
981 * 1015 *
982 * @big_lpt: flag that LPT is too big to write whole during commit 1016 * @big_lpt: flag that LPT is too big to write whole during commit
1017 * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up
983 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during 1018 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
984 * recovery) 1019 * recovery)
985 * @bulk_read: enable bulk-reads 1020 * @bulk_read: enable bulk-reads
@@ -1057,32 +1092,14 @@ struct ubifs_debug_info;
1057 * @dirty_zn_cnt: number of dirty znodes 1092 * @dirty_zn_cnt: number of dirty znodes
1058 * @clean_zn_cnt: number of clean znodes 1093 * @clean_zn_cnt: number of clean znodes
1059 * 1094 *
1060 * @budg_idx_growth: amount of bytes budgeted for index growth 1095 * @space_lock: protects @bi and @lst
1061 * @budg_data_growth: amount of bytes budgeted for cached data 1096 * @lst: lprops statistics
1062 * @budg_dd_growth: amount of bytes budgeted for cached data that will make 1097 * @bi: budgeting information
1063 * other data dirty
1064 * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index,
1065 * but which still have to be taken into account because
1066 * the index has not been committed so far
1067 * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth,
1068 * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst,
1069 * @nospace, and @nospace_rp;
1070 * @min_idx_lebs: minimum number of LEBs required for the index
1071 * @old_idx_sz: size of index on flash
1072 * @calc_idx_sz: temporary variable which is used to calculate new index size 1098 * @calc_idx_sz: temporary variable which is used to calculate new index size
1073 * (contains accurate new index size at end of TNC commit start) 1099 * (contains accurate new index size at end of TNC commit start)
1074 * @lst: lprops statistics
1075 * @nospace: non-zero if the file-system does not have flash space (used as
1076 * optimization)
1077 * @nospace_rp: the same as @nospace, but additionally means that even reserved
1078 * pool is full
1079 *
1080 * @page_budget: budget for a page
1081 * @inode_budget: budget for an inode
1082 * @dent_budget: budget for a directory entry
1083 * 1100 *
1084 * @ref_node_alsz: size of the LEB reference node aligned to the min. flash 1101 * @ref_node_alsz: size of the LEB reference node aligned to the min. flash
1085 * I/O unit 1102 * I/O unit
1086 * @mst_node_alsz: master node aligned size 1103 * @mst_node_alsz: master node aligned size
1087 * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary 1104 * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary
1088 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary 1105 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
@@ -1189,7 +1206,6 @@ struct ubifs_debug_info;
1189 * @replaying: %1 during journal replay 1206 * @replaying: %1 during journal replay
1190 * @mounting: %1 while mounting 1207 * @mounting: %1 while mounting
1191 * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode 1208 * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
1192 * @replay_tree: temporary tree used during journal replay
1193 * @replay_list: temporary list used during journal replay 1209 * @replay_list: temporary list used during journal replay
1194 * @replay_buds: list of buds to replay 1210 * @replay_buds: list of buds to replay
1195 * @cs_sqnum: sequence number of first node in the log (commit start node) 1211 * @cs_sqnum: sequence number of first node in the log (commit start node)
@@ -1238,6 +1254,7 @@ struct ubifs_info {
1238 wait_queue_head_t cmt_wq; 1254 wait_queue_head_t cmt_wq;
1239 1255
1240 unsigned int big_lpt:1; 1256 unsigned int big_lpt:1;
1257 unsigned int space_fixup:1;
1241 unsigned int no_chk_data_crc:1; 1258 unsigned int no_chk_data_crc:1;
1242 unsigned int bulk_read:1; 1259 unsigned int bulk_read:1;
1243 unsigned int default_compr:2; 1260 unsigned int default_compr:2;
@@ -1308,21 +1325,10 @@ struct ubifs_info {
1308 atomic_long_t dirty_zn_cnt; 1325 atomic_long_t dirty_zn_cnt;
1309 atomic_long_t clean_zn_cnt; 1326 atomic_long_t clean_zn_cnt;
1310 1327
1311 long long budg_idx_growth;
1312 long long budg_data_growth;
1313 long long budg_dd_growth;
1314 long long budg_uncommitted_idx;
1315 spinlock_t space_lock; 1328 spinlock_t space_lock;
1316 int min_idx_lebs;
1317 unsigned long long old_idx_sz;
1318 unsigned long long calc_idx_sz;
1319 struct ubifs_lp_stats lst; 1329 struct ubifs_lp_stats lst;
1320 unsigned int nospace:1; 1330 struct ubifs_budg_info bi;
1321 unsigned int nospace_rp:1; 1331 unsigned long long calc_idx_sz;
1322
1323 int page_budget;
1324 int inode_budget;
1325 int dent_budget;
1326 1332
1327 int ref_node_alsz; 1333 int ref_node_alsz;
1328 int mst_node_alsz; 1334 int mst_node_alsz;
@@ -1430,7 +1436,6 @@ struct ubifs_info {
1430 unsigned int replaying:1; 1436 unsigned int replaying:1;
1431 unsigned int mounting:1; 1437 unsigned int mounting:1;
1432 unsigned int remounting_rw:1; 1438 unsigned int remounting_rw:1;
1433 struct rb_root replay_tree;
1434 struct list_head replay_list; 1439 struct list_head replay_list;
1435 struct list_head replay_buds; 1440 struct list_head replay_buds;
1436 unsigned long long cs_sqnum; 1441 unsigned long long cs_sqnum;
@@ -1628,6 +1633,7 @@ int ubifs_write_master(struct ubifs_info *c);
1628int ubifs_read_superblock(struct ubifs_info *c); 1633int ubifs_read_superblock(struct ubifs_info *c);
1629struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); 1634struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c);
1630int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); 1635int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup);
1636int ubifs_fixup_free_space(struct ubifs_info *c);
1631 1637
1632/* replay.c */ 1638/* replay.c */
1633int ubifs_validate_entry(struct ubifs_info *c, 1639int ubifs_validate_entry(struct ubifs_info *c,
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 3299f469e712..16f19f55e63f 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -80,8 +80,8 @@ enum {
80 SECURITY_XATTR, 80 SECURITY_XATTR,
81}; 81};
82 82
83static const struct inode_operations none_inode_operations; 83static const struct inode_operations empty_iops;
84static const struct file_operations none_file_operations; 84static const struct file_operations empty_fops;
85 85
86/** 86/**
87 * create_xattr - create an extended attribute. 87 * create_xattr - create an extended attribute.
@@ -131,8 +131,8 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
131 131
132 /* Re-define all operations to be "nothing" */ 132 /* Re-define all operations to be "nothing" */
133 inode->i_mapping->a_ops = &empty_aops; 133 inode->i_mapping->a_ops = &empty_aops;
134 inode->i_op = &none_inode_operations; 134 inode->i_op = &empty_iops;
135 inode->i_fop = &none_file_operations; 135 inode->i_fop = &empty_fops;
136 136
137 inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; 137 inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA;
138 ui = ubifs_inode(inode); 138 ui = ubifs_inode(inode);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index e765743cf9f3..b4d791a83207 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -409,7 +409,7 @@ out:
409} 409}
410 410
411/** 411/**
412 * ufs_getfrag_bloc() - `get_block_t' function, interface between UFS and 412 * ufs_getfrag_block() - `get_block_t' function, interface between UFS and
413 * readpage, writepage and so on 413 * readpage, writepage and so on
414 */ 414 */
415 415
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 9ef9ed2cfe2e..5e68099db2a5 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -33,7 +33,6 @@
33#include <linux/migrate.h> 33#include <linux/migrate.h>
34#include <linux/backing-dev.h> 34#include <linux/backing-dev.h>
35#include <linux/freezer.h> 35#include <linux/freezer.h>
36#include <linux/list_sort.h>
37 36
38#include "xfs_sb.h" 37#include "xfs_sb.h"
39#include "xfs_inum.h" 38#include "xfs_inum.h"
@@ -709,6 +708,27 @@ xfs_buf_get_empty(
709 return bp; 708 return bp;
710} 709}
711 710
711/*
712 * Return a buffer allocated as an empty buffer and associated to external
713 * memory via xfs_buf_associate_memory() back to it's empty state.
714 */
715void
716xfs_buf_set_empty(
717 struct xfs_buf *bp,
718 size_t len)
719{
720 if (bp->b_pages)
721 _xfs_buf_free_pages(bp);
722
723 bp->b_pages = NULL;
724 bp->b_page_count = 0;
725 bp->b_addr = NULL;
726 bp->b_file_offset = 0;
727 bp->b_buffer_length = bp->b_count_desired = len;
728 bp->b_bn = XFS_BUF_DADDR_NULL;
729 bp->b_flags &= ~XBF_MAPPED;
730}
731
712static inline struct page * 732static inline struct page *
713mem_to_page( 733mem_to_page(
714 void *addr) 734 void *addr)
@@ -1402,12 +1422,12 @@ restart:
1402int 1422int
1403xfs_buftarg_shrink( 1423xfs_buftarg_shrink(
1404 struct shrinker *shrink, 1424 struct shrinker *shrink,
1405 int nr_to_scan, 1425 struct shrink_control *sc)
1406 gfp_t mask)
1407{ 1426{
1408 struct xfs_buftarg *btp = container_of(shrink, 1427 struct xfs_buftarg *btp = container_of(shrink,
1409 struct xfs_buftarg, bt_shrinker); 1428 struct xfs_buftarg, bt_shrinker);
1410 struct xfs_buf *bp; 1429 struct xfs_buf *bp;
1430 int nr_to_scan = sc->nr_to_scan;
1411 LIST_HEAD(dispose); 1431 LIST_HEAD(dispose);
1412 1432
1413 if (!nr_to_scan) 1433 if (!nr_to_scan)
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a9a1c4512645..50a7d5fb3b73 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -178,6 +178,7 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
178 xfs_buf_flags_t); 178 xfs_buf_flags_t);
179 179
180extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); 180extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
181extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
181extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); 182extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
182extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); 183extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
183extern void xfs_buf_hold(xfs_buf_t *); 184extern void xfs_buf_hold(xfs_buf_t *);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index b3486dfa5520..54e623bfbb85 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -586,7 +586,8 @@ xfs_file_compat_ioctl(
586 case XFS_IOC_RESVSP_32: 586 case XFS_IOC_RESVSP_32:
587 case XFS_IOC_UNRESVSP_32: 587 case XFS_IOC_UNRESVSP_32:
588 case XFS_IOC_RESVSP64_32: 588 case XFS_IOC_RESVSP64_32:
589 case XFS_IOC_UNRESVSP64_32: { 589 case XFS_IOC_UNRESVSP64_32:
590 case XFS_IOC_ZERO_RANGE_32: {
590 struct xfs_flock64 bf; 591 struct xfs_flock64 bf;
591 592
592 if (xfs_compat_flock64_copyin(&bf, arg)) 593 if (xfs_compat_flock64_copyin(&bf, arg))
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 08b605792a99..80f4060e8970 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -184,6 +184,7 @@ typedef struct compat_xfs_flock64 {
184#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) 184#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64)
185#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) 185#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64)
186#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) 186#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64)
187#define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64)
187 188
188typedef struct compat_xfs_fsop_geom_v1 { 189typedef struct compat_xfs_fsop_geom_v1 {
189 __u32 blocksize; /* filesystem (data) block size */ 190 __u32 blocksize; /* filesystem (data) block size */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 244be9cbfe78..8633521b3b2e 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -70,6 +70,7 @@
70#include <linux/ctype.h> 70#include <linux/ctype.h>
71#include <linux/writeback.h> 71#include <linux/writeback.h>
72#include <linux/capability.h> 72#include <linux/capability.h>
73#include <linux/list_sort.h>
73 74
74#include <asm/page.h> 75#include <asm/page.h>
75#include <asm/div64.h> 76#include <asm/div64.h>
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
index 9f76cceb678d..bd672def95ac 100644
--- a/fs/xfs/linux-2.6/xfs_message.c
+++ b/fs/xfs/linux-2.6/xfs_message.c
@@ -41,23 +41,6 @@ __xfs_printk(
41 printk("%sXFS: %pV\n", level, vaf); 41 printk("%sXFS: %pV\n", level, vaf);
42} 42}
43 43
44void xfs_printk(
45 const char *level,
46 const struct xfs_mount *mp,
47 const char *fmt, ...)
48{
49 struct va_format vaf;
50 va_list args;
51
52 va_start(args, fmt);
53
54 vaf.fmt = fmt;
55 vaf.va = &args;
56
57 __xfs_printk(level, mp, &vaf);
58 va_end(args);
59}
60
61#define define_xfs_printk_level(func, kern_level) \ 44#define define_xfs_printk_level(func, kern_level) \
62void func(const struct xfs_mount *mp, const char *fmt, ...) \ 45void func(const struct xfs_mount *mp, const char *fmt, ...) \
63{ \ 46{ \
@@ -95,8 +78,7 @@ xfs_alert_tag(
95 int do_panic = 0; 78 int do_panic = 0;
96 79
97 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { 80 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
98 xfs_printk(KERN_ALERT, mp, 81 xfs_alert(mp, "Transforming an alert into a BUG.");
99 "XFS: Transforming an alert into a BUG.");
100 do_panic = 1; 82 do_panic = 1;
101 } 83 }
102 84
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
index f1b3fc1b6c4e..7fb7ea007672 100644
--- a/fs/xfs/linux-2.6/xfs_message.h
+++ b/fs/xfs/linux-2.6/xfs_message.h
@@ -3,9 +3,6 @@
3 3
4struct xfs_mount; 4struct xfs_mount;
5 5
6extern void xfs_printk(const char *level, const struct xfs_mount *mp,
7 const char *fmt, ...)
8 __attribute__ ((format (printf, 3, 4)));
9extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) 6extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
10 __attribute__ ((format (printf, 2, 3))); 7 __attribute__ ((format (printf, 2, 3)));
11extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) 8extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
@@ -28,7 +25,9 @@ extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
28extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) 25extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
29 __attribute__ ((format (printf, 2, 3))); 26 __attribute__ ((format (printf, 2, 3)));
30#else 27#else
31static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) 28static inline void
29__attribute__ ((format (printf, 2, 3)))
30xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
32{ 31{
33} 32}
34#endif 33#endif
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index b38e58d02299..b0aa59e51fd0 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1787,10 +1787,6 @@ init_xfs_fs(void)
1787 if (error) 1787 if (error)
1788 goto out_cleanup_procfs; 1788 goto out_cleanup_procfs;
1789 1789
1790 error = xfs_init_workqueues();
1791 if (error)
1792 goto out_sysctl_unregister;
1793
1794 vfs_initquota(); 1790 vfs_initquota();
1795 1791
1796 error = register_filesystem(&xfs_fs_type); 1792 error = register_filesystem(&xfs_fs_type);
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index e4f9c1b0836c..8ecad5ff9f9b 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -267,6 +267,16 @@ xfs_sync_inode_attr(
267 267
268 error = xfs_iflush(ip, flags); 268 error = xfs_iflush(ip, flags);
269 269
270 /*
271 * We don't want to try again on non-blocking flushes that can't run
272 * again immediately. If an inode really must be written, then that's
273 * what the SYNC_WAIT flag is for.
274 */
275 if (error == EAGAIN) {
276 ASSERT(!(flags & SYNC_WAIT));
277 error = 0;
278 }
279
270 out_unlock: 280 out_unlock:
271 xfs_iunlock(ip, XFS_ILOCK_SHARED); 281 xfs_iunlock(ip, XFS_ILOCK_SHARED);
272 return error; 282 return error;
@@ -926,6 +936,7 @@ restart:
926 XFS_LOOKUP_BATCH, 936 XFS_LOOKUP_BATCH,
927 XFS_ICI_RECLAIM_TAG); 937 XFS_ICI_RECLAIM_TAG);
928 if (!nr_found) { 938 if (!nr_found) {
939 done = 1;
929 rcu_read_unlock(); 940 rcu_read_unlock();
930 break; 941 break;
931 } 942 }
@@ -1021,13 +1032,14 @@ xfs_reclaim_inodes(
1021static int 1032static int
1022xfs_reclaim_inode_shrink( 1033xfs_reclaim_inode_shrink(
1023 struct shrinker *shrink, 1034 struct shrinker *shrink,
1024 int nr_to_scan, 1035 struct shrink_control *sc)
1025 gfp_t gfp_mask)
1026{ 1036{
1027 struct xfs_mount *mp; 1037 struct xfs_mount *mp;
1028 struct xfs_perag *pag; 1038 struct xfs_perag *pag;
1029 xfs_agnumber_t ag; 1039 xfs_agnumber_t ag;
1030 int reclaimable; 1040 int reclaimable;
1041 int nr_to_scan = sc->nr_to_scan;
1042 gfp_t gfp_mask = sc->gfp_mask;
1031 1043
1032 mp = container_of(shrink, struct xfs_mount, m_inode_shrink); 1044 mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
1033 if (nr_to_scan) { 1045 if (nr_to_scan) {
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 2d0bcb479075..d48b7a579ae1 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -1151,44 +1151,7 @@ TRACE_EVENT(xfs_bunmap,
1151 1151
1152); 1152);
1153 1153
1154#define XFS_BUSY_SYNC \ 1154DECLARE_EVENT_CLASS(xfs_busy_class,
1155 { 0, "async" }, \
1156 { 1, "sync" }
1157
1158TRACE_EVENT(xfs_alloc_busy,
1159 TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno,
1160 xfs_agblock_t agbno, xfs_extlen_t len, int sync),
1161 TP_ARGS(trans, agno, agbno, len, sync),
1162 TP_STRUCT__entry(
1163 __field(dev_t, dev)
1164 __field(struct xfs_trans *, tp)
1165 __field(int, tid)
1166 __field(xfs_agnumber_t, agno)
1167 __field(xfs_agblock_t, agbno)
1168 __field(xfs_extlen_t, len)
1169 __field(int, sync)
1170 ),
1171 TP_fast_assign(
1172 __entry->dev = trans->t_mountp->m_super->s_dev;
1173 __entry->tp = trans;
1174 __entry->tid = trans->t_ticket->t_tid;
1175 __entry->agno = agno;
1176 __entry->agbno = agbno;
1177 __entry->len = len;
1178 __entry->sync = sync;
1179 ),
1180 TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s",
1181 MAJOR(__entry->dev), MINOR(__entry->dev),
1182 __entry->tp,
1183 __entry->tid,
1184 __entry->agno,
1185 __entry->agbno,
1186 __entry->len,
1187 __print_symbolic(__entry->sync, XFS_BUSY_SYNC))
1188
1189);
1190
1191TRACE_EVENT(xfs_alloc_unbusy,
1192 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 1155 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
1193 xfs_agblock_t agbno, xfs_extlen_t len), 1156 xfs_agblock_t agbno, xfs_extlen_t len),
1194 TP_ARGS(mp, agno, agbno, len), 1157 TP_ARGS(mp, agno, agbno, len),
@@ -1210,35 +1173,45 @@ TRACE_EVENT(xfs_alloc_unbusy,
1210 __entry->agbno, 1173 __entry->agbno,
1211 __entry->len) 1174 __entry->len)
1212); 1175);
1176#define DEFINE_BUSY_EVENT(name) \
1177DEFINE_EVENT(xfs_busy_class, name, \
1178 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
1179 xfs_agblock_t agbno, xfs_extlen_t len), \
1180 TP_ARGS(mp, agno, agbno, len))
1181DEFINE_BUSY_EVENT(xfs_alloc_busy);
1182DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
1183DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
1184DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
1185DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
1213 1186
1214#define XFS_BUSY_STATES \ 1187TRACE_EVENT(xfs_alloc_busy_trim,
1215 { 0, "missing" }, \
1216 { 1, "found" }
1217
1218TRACE_EVENT(xfs_alloc_busysearch,
1219 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 1188 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
1220 xfs_agblock_t agbno, xfs_extlen_t len, int found), 1189 xfs_agblock_t agbno, xfs_extlen_t len,
1221 TP_ARGS(mp, agno, agbno, len, found), 1190 xfs_agblock_t tbno, xfs_extlen_t tlen),
1191 TP_ARGS(mp, agno, agbno, len, tbno, tlen),
1222 TP_STRUCT__entry( 1192 TP_STRUCT__entry(
1223 __field(dev_t, dev) 1193 __field(dev_t, dev)
1224 __field(xfs_agnumber_t, agno) 1194 __field(xfs_agnumber_t, agno)
1225 __field(xfs_agblock_t, agbno) 1195 __field(xfs_agblock_t, agbno)
1226 __field(xfs_extlen_t, len) 1196 __field(xfs_extlen_t, len)
1227 __field(int, found) 1197 __field(xfs_agblock_t, tbno)
1198 __field(xfs_extlen_t, tlen)
1228 ), 1199 ),
1229 TP_fast_assign( 1200 TP_fast_assign(
1230 __entry->dev = mp->m_super->s_dev; 1201 __entry->dev = mp->m_super->s_dev;
1231 __entry->agno = agno; 1202 __entry->agno = agno;
1232 __entry->agbno = agbno; 1203 __entry->agbno = agbno;
1233 __entry->len = len; 1204 __entry->len = len;
1234 __entry->found = found; 1205 __entry->tbno = tbno;
1206 __entry->tlen = tlen;
1235 ), 1207 ),
1236 TP_printk("dev %d:%d agno %u agbno %u len %u %s", 1208 TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
1237 MAJOR(__entry->dev), MINOR(__entry->dev), 1209 MAJOR(__entry->dev), MINOR(__entry->dev),
1238 __entry->agno, 1210 __entry->agno,
1239 __entry->agbno, 1211 __entry->agbno,
1240 __entry->len, 1212 __entry->len,
1241 __print_symbolic(__entry->found, XFS_BUSY_STATES)) 1213 __entry->tbno,
1214 __entry->tlen)
1242); 1215);
1243 1216
1244TRACE_EVENT(xfs_trans_commit_lsn, 1217TRACE_EVENT(xfs_trans_commit_lsn,
@@ -1418,7 +1391,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
1418 __entry->wasfromfl, 1391 __entry->wasfromfl,
1419 __entry->isfl, 1392 __entry->isfl,
1420 __entry->userdata, 1393 __entry->userdata,
1421 __entry->firstblock) 1394 (unsigned long long)__entry->firstblock)
1422) 1395)
1423 1396
1424#define DEFINE_ALLOC_EVENT(name) \ 1397#define DEFINE_ALLOC_EVENT(name) \
@@ -1433,11 +1406,14 @@ DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
1433DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); 1406DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
1434DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); 1407DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
1435DEFINE_ALLOC_EVENT(xfs_alloc_near_error); 1408DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
1409DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
1410DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
1436DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); 1411DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
1437DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); 1412DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
1438DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); 1413DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
1439DEFINE_ALLOC_EVENT(xfs_alloc_size_done); 1414DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
1440DEFINE_ALLOC_EVENT(xfs_alloc_size_error); 1415DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
1416DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
1441DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); 1417DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
1442DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); 1418DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
1443DEFINE_ALLOC_EVENT(xfs_alloc_small_done); 1419DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 69228aa8605a..b94dace4e785 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -60,7 +60,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
60 60
61STATIC int xfs_qm_init_quotainos(xfs_mount_t *); 61STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
62STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); 62STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
63STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t); 63STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *);
64 64
65static struct shrinker xfs_qm_shaker = { 65static struct shrinker xfs_qm_shaker = {
66 .shrink = xfs_qm_shake, 66 .shrink = xfs_qm_shake,
@@ -2009,10 +2009,10 @@ xfs_qm_shake_freelist(
2009STATIC int 2009STATIC int
2010xfs_qm_shake( 2010xfs_qm_shake(
2011 struct shrinker *shrink, 2011 struct shrinker *shrink,
2012 int nr_to_scan, 2012 struct shrink_control *sc)
2013 gfp_t gfp_mask)
2014{ 2013{
2015 int ndqused, nfree, n; 2014 int ndqused, nfree, n;
2015 gfp_t gfp_mask = sc->gfp_mask;
2016 2016
2017 if (!kmem_shake_allow(gfp_mask)) 2017 if (!kmem_shake_allow(gfp_mask))
2018 return 0; 2018 return 0;
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 58632cc17f2d..da0a561ffba2 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -187,7 +187,6 @@ struct xfs_busy_extent {
187 xfs_agnumber_t agno; 187 xfs_agnumber_t agno;
188 xfs_agblock_t bno; 188 xfs_agblock_t bno;
189 xfs_extlen_t length; 189 xfs_extlen_t length;
190 xlog_tid_t tid; /* transaction that created this */
191}; 190};
192 191
193/* 192/*
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 27d64d752eab..acdced86413c 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -41,19 +41,13 @@
41#define XFSA_FIXUP_BNO_OK 1 41#define XFSA_FIXUP_BNO_OK 1
42#define XFSA_FIXUP_CNT_OK 2 42#define XFSA_FIXUP_CNT_OK 2
43 43
44/*
45 * Prototypes for per-ag allocation routines
46 */
47
48STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); 44STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
49STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); 45STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
50STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); 46STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
51STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, 47STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
52 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); 48 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
53 49STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *,
54/* 50 xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *);
55 * Internal functions.
56 */
57 51
58/* 52/*
59 * Lookup the record equal to [bno, len] in the btree given by cur. 53 * Lookup the record equal to [bno, len] in the btree given by cur.
@@ -154,19 +148,21 @@ xfs_alloc_compute_aligned(
154 xfs_extlen_t *reslen) /* result length */ 148 xfs_extlen_t *reslen) /* result length */
155{ 149{
156 xfs_agblock_t bno; 150 xfs_agblock_t bno;
157 xfs_extlen_t diff;
158 xfs_extlen_t len; 151 xfs_extlen_t len;
159 152
160 if (args->alignment > 1 && foundlen >= args->minlen) { 153 /* Trim busy sections out of found extent */
161 bno = roundup(foundbno, args->alignment); 154 xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len);
162 diff = bno - foundbno; 155
163 len = diff >= foundlen ? 0 : foundlen - diff; 156 if (args->alignment > 1 && len >= args->minlen) {
157 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
158 xfs_extlen_t diff = aligned_bno - bno;
159
160 *resbno = aligned_bno;
161 *reslen = diff >= len ? 0 : len - diff;
164 } else { 162 } else {
165 bno = foundbno; 163 *resbno = bno;
166 len = foundlen; 164 *reslen = len;
167 } 165 }
168 *resbno = bno;
169 *reslen = len;
170} 166}
171 167
172/* 168/*
@@ -280,7 +276,6 @@ xfs_alloc_fix_minleft(
280 return 1; 276 return 1;
281 agf = XFS_BUF_TO_AGF(args->agbp); 277 agf = XFS_BUF_TO_AGF(args->agbp);
282 diff = be32_to_cpu(agf->agf_freeblks) 278 diff = be32_to_cpu(agf->agf_freeblks)
283 + be32_to_cpu(agf->agf_flcount)
284 - args->len - args->minleft; 279 - args->len - args->minleft;
285 if (diff >= 0) 280 if (diff >= 0)
286 return 1; 281 return 1;
@@ -541,16 +536,8 @@ xfs_alloc_ag_vextent(
541 if (error) 536 if (error)
542 return error; 537 return error;
543 538
544 /* 539 ASSERT(!xfs_alloc_busy_search(args->mp, args->agno,
545 * Search the busylist for these blocks and mark the 540 args->agbno, args->len));
546 * transaction as synchronous if blocks are found. This
547 * avoids the need to block due to a synchronous log
548 * force to ensure correct ordering as the synchronous
549 * transaction will guarantee that for us.
550 */
551 if (xfs_alloc_busy_search(args->mp, args->agno,
552 args->agbno, args->len))
553 xfs_trans_set_sync(args->tp);
554 } 541 }
555 542
556 if (!args->isfl) { 543 if (!args->isfl) {
@@ -577,14 +564,14 @@ xfs_alloc_ag_vextent_exact(
577{ 564{
578 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ 565 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
579 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ 566 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
580 xfs_agblock_t end; /* end of allocated extent */
581 int error; 567 int error;
582 xfs_agblock_t fbno; /* start block of found extent */ 568 xfs_agblock_t fbno; /* start block of found extent */
583 xfs_agblock_t fend; /* end block of found extent */
584 xfs_extlen_t flen; /* length of found extent */ 569 xfs_extlen_t flen; /* length of found extent */
570 xfs_agblock_t tbno; /* start block of trimmed extent */
571 xfs_extlen_t tlen; /* length of trimmed extent */
572 xfs_agblock_t tend; /* end block of trimmed extent */
573 xfs_agblock_t end; /* end of allocated extent */
585 int i; /* success/failure of operation */ 574 int i; /* success/failure of operation */
586 xfs_agblock_t maxend; /* end of maximal extent */
587 xfs_agblock_t minend; /* end of minimal extent */
588 xfs_extlen_t rlen; /* length of returned extent */ 575 xfs_extlen_t rlen; /* length of returned extent */
589 576
590 ASSERT(args->alignment == 1); 577 ASSERT(args->alignment == 1);
@@ -614,14 +601,22 @@ xfs_alloc_ag_vextent_exact(
614 goto error0; 601 goto error0;
615 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 602 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
616 ASSERT(fbno <= args->agbno); 603 ASSERT(fbno <= args->agbno);
617 minend = args->agbno + args->minlen;
618 maxend = args->agbno + args->maxlen;
619 fend = fbno + flen;
620 604
621 /* 605 /*
622 * Give up if the freespace isn't long enough for the minimum request. 606 * Check for overlapping busy extents.
607 */
608 xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen);
609
610 /*
611 * Give up if the start of the extent is busy, or the freespace isn't
612 * long enough for the minimum request.
623 */ 613 */
624 if (fend < minend) 614 if (tbno > args->agbno)
615 goto not_found;
616 if (tlen < args->minlen)
617 goto not_found;
618 tend = tbno + tlen;
619 if (tend < args->agbno + args->minlen)
625 goto not_found; 620 goto not_found;
626 621
627 /* 622 /*
@@ -630,14 +625,14 @@ xfs_alloc_ag_vextent_exact(
630 * 625 *
631 * Fix the length according to mod and prod if given. 626 * Fix the length according to mod and prod if given.
632 */ 627 */
633 end = XFS_AGBLOCK_MIN(fend, maxend); 628 end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen);
634 args->len = end - args->agbno; 629 args->len = end - args->agbno;
635 xfs_alloc_fix_len(args); 630 xfs_alloc_fix_len(args);
636 if (!xfs_alloc_fix_minleft(args)) 631 if (!xfs_alloc_fix_minleft(args))
637 goto not_found; 632 goto not_found;
638 633
639 rlen = args->len; 634 rlen = args->len;
640 ASSERT(args->agbno + rlen <= fend); 635 ASSERT(args->agbno + rlen <= tend);
641 end = args->agbno + rlen; 636 end = args->agbno + rlen;
642 637
643 /* 638 /*
@@ -686,11 +681,11 @@ xfs_alloc_find_best_extent(
686 struct xfs_btree_cur **scur, /* searching cursor */ 681 struct xfs_btree_cur **scur, /* searching cursor */
687 xfs_agblock_t gdiff, /* difference for search comparison */ 682 xfs_agblock_t gdiff, /* difference for search comparison */
688 xfs_agblock_t *sbno, /* extent found by search */ 683 xfs_agblock_t *sbno, /* extent found by search */
689 xfs_extlen_t *slen, 684 xfs_extlen_t *slen, /* extent length */
690 xfs_extlen_t *slena, /* aligned length */ 685 xfs_agblock_t *sbnoa, /* aligned extent found by search */
686 xfs_extlen_t *slena, /* aligned extent length */
691 int dir) /* 0 = search right, 1 = search left */ 687 int dir) /* 0 = search right, 1 = search left */
692{ 688{
693 xfs_agblock_t bno;
694 xfs_agblock_t new; 689 xfs_agblock_t new;
695 xfs_agblock_t sdiff; 690 xfs_agblock_t sdiff;
696 int error; 691 int error;
@@ -708,16 +703,16 @@ xfs_alloc_find_best_extent(
708 if (error) 703 if (error)
709 goto error0; 704 goto error0;
710 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 705 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
711 xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena); 706 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
712 707
713 /* 708 /*
714 * The good extent is closer than this one. 709 * The good extent is closer than this one.
715 */ 710 */
716 if (!dir) { 711 if (!dir) {
717 if (bno >= args->agbno + gdiff) 712 if (*sbnoa >= args->agbno + gdiff)
718 goto out_use_good; 713 goto out_use_good;
719 } else { 714 } else {
720 if (bno <= args->agbno - gdiff) 715 if (*sbnoa <= args->agbno - gdiff)
721 goto out_use_good; 716 goto out_use_good;
722 } 717 }
723 718
@@ -729,8 +724,8 @@ xfs_alloc_find_best_extent(
729 xfs_alloc_fix_len(args); 724 xfs_alloc_fix_len(args);
730 725
731 sdiff = xfs_alloc_compute_diff(args->agbno, args->len, 726 sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
732 args->alignment, *sbno, 727 args->alignment, *sbnoa,
733 *slen, &new); 728 *slena, &new);
734 729
735 /* 730 /*
736 * Choose closer size and invalidate other cursor. 731 * Choose closer size and invalidate other cursor.
@@ -780,7 +775,7 @@ xfs_alloc_ag_vextent_near(
780 xfs_agblock_t gtbnoa; /* aligned ... */ 775 xfs_agblock_t gtbnoa; /* aligned ... */
781 xfs_extlen_t gtdiff; /* difference to right side entry */ 776 xfs_extlen_t gtdiff; /* difference to right side entry */
782 xfs_extlen_t gtlen; /* length of right side entry */ 777 xfs_extlen_t gtlen; /* length of right side entry */
783 xfs_extlen_t gtlena = 0; /* aligned ... */ 778 xfs_extlen_t gtlena; /* aligned ... */
784 xfs_agblock_t gtnew; /* useful start bno of right side */ 779 xfs_agblock_t gtnew; /* useful start bno of right side */
785 int error; /* error code */ 780 int error; /* error code */
786 int i; /* result code, temporary */ 781 int i; /* result code, temporary */
@@ -789,9 +784,10 @@ xfs_alloc_ag_vextent_near(
789 xfs_agblock_t ltbnoa; /* aligned ... */ 784 xfs_agblock_t ltbnoa; /* aligned ... */
790 xfs_extlen_t ltdiff; /* difference to left side entry */ 785 xfs_extlen_t ltdiff; /* difference to left side entry */
791 xfs_extlen_t ltlen; /* length of left side entry */ 786 xfs_extlen_t ltlen; /* length of left side entry */
792 xfs_extlen_t ltlena = 0; /* aligned ... */ 787 xfs_extlen_t ltlena; /* aligned ... */
793 xfs_agblock_t ltnew; /* useful start bno of left side */ 788 xfs_agblock_t ltnew; /* useful start bno of left side */
794 xfs_extlen_t rlen; /* length of returned extent */ 789 xfs_extlen_t rlen; /* length of returned extent */
790 int forced = 0;
795#if defined(DEBUG) && defined(__KERNEL__) 791#if defined(DEBUG) && defined(__KERNEL__)
796 /* 792 /*
797 * Randomly don't execute the first algorithm. 793 * Randomly don't execute the first algorithm.
@@ -800,13 +796,20 @@ xfs_alloc_ag_vextent_near(
800 796
801 dofirst = random32() & 1; 797 dofirst = random32() & 1;
802#endif 798#endif
799
800restart:
801 bno_cur_lt = NULL;
802 bno_cur_gt = NULL;
803 ltlen = 0;
804 gtlena = 0;
805 ltlena = 0;
806
803 /* 807 /*
804 * Get a cursor for the by-size btree. 808 * Get a cursor for the by-size btree.
805 */ 809 */
806 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 810 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
807 args->agno, XFS_BTNUM_CNT); 811 args->agno, XFS_BTNUM_CNT);
808 ltlen = 0; 812
809 bno_cur_lt = bno_cur_gt = NULL;
810 /* 813 /*
811 * See if there are any free extents as big as maxlen. 814 * See if there are any free extents as big as maxlen.
812 */ 815 */
@@ -822,11 +825,13 @@ xfs_alloc_ag_vextent_near(
822 goto error0; 825 goto error0;
823 if (i == 0 || ltlen == 0) { 826 if (i == 0 || ltlen == 0) {
824 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 827 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
828 trace_xfs_alloc_near_noentry(args);
825 return 0; 829 return 0;
826 } 830 }
827 ASSERT(i == 1); 831 ASSERT(i == 1);
828 } 832 }
829 args->wasfromfl = 0; 833 args->wasfromfl = 0;
834
830 /* 835 /*
831 * First algorithm. 836 * First algorithm.
832 * If the requested extent is large wrt the freespaces available 837 * If the requested extent is large wrt the freespaces available
@@ -890,7 +895,7 @@ xfs_alloc_ag_vextent_near(
890 if (args->len < blen) 895 if (args->len < blen)
891 continue; 896 continue;
892 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 897 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
893 args->alignment, ltbno, ltlen, &ltnew); 898 args->alignment, ltbnoa, ltlena, &ltnew);
894 if (ltnew != NULLAGBLOCK && 899 if (ltnew != NULLAGBLOCK &&
895 (args->len > blen || ltdiff < bdiff)) { 900 (args->len > blen || ltdiff < bdiff)) {
896 bdiff = ltdiff; 901 bdiff = ltdiff;
@@ -1042,11 +1047,12 @@ xfs_alloc_ag_vextent_near(
1042 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1047 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1043 xfs_alloc_fix_len(args); 1048 xfs_alloc_fix_len(args);
1044 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1049 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1045 args->alignment, ltbno, ltlen, &ltnew); 1050 args->alignment, ltbnoa, ltlena, &ltnew);
1046 1051
1047 error = xfs_alloc_find_best_extent(args, 1052 error = xfs_alloc_find_best_extent(args,
1048 &bno_cur_lt, &bno_cur_gt, 1053 &bno_cur_lt, &bno_cur_gt,
1049 ltdiff, &gtbno, &gtlen, &gtlena, 1054 ltdiff, &gtbno, &gtlen,
1055 &gtbnoa, &gtlena,
1050 0 /* search right */); 1056 0 /* search right */);
1051 } else { 1057 } else {
1052 ASSERT(gtlena >= args->minlen); 1058 ASSERT(gtlena >= args->minlen);
@@ -1057,11 +1063,12 @@ xfs_alloc_ag_vextent_near(
1057 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); 1063 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1058 xfs_alloc_fix_len(args); 1064 xfs_alloc_fix_len(args);
1059 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1065 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1060 args->alignment, gtbno, gtlen, &gtnew); 1066 args->alignment, gtbnoa, gtlena, &gtnew);
1061 1067
1062 error = xfs_alloc_find_best_extent(args, 1068 error = xfs_alloc_find_best_extent(args,
1063 &bno_cur_gt, &bno_cur_lt, 1069 &bno_cur_gt, &bno_cur_lt,
1064 gtdiff, &ltbno, &ltlen, &ltlena, 1070 gtdiff, &ltbno, &ltlen,
1071 &ltbnoa, &ltlena,
1065 1 /* search left */); 1072 1 /* search left */);
1066 } 1073 }
1067 1074
@@ -1073,6 +1080,12 @@ xfs_alloc_ag_vextent_near(
1073 * If we couldn't get anything, give up. 1080 * If we couldn't get anything, give up.
1074 */ 1081 */
1075 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1082 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1083 if (!forced++) {
1084 trace_xfs_alloc_near_busy(args);
1085 xfs_log_force(args->mp, XFS_LOG_SYNC);
1086 goto restart;
1087 }
1088
1076 trace_xfs_alloc_size_neither(args); 1089 trace_xfs_alloc_size_neither(args);
1077 args->agbno = NULLAGBLOCK; 1090 args->agbno = NULLAGBLOCK;
1078 return 0; 1091 return 0;
@@ -1107,12 +1120,13 @@ xfs_alloc_ag_vextent_near(
1107 return 0; 1120 return 0;
1108 } 1121 }
1109 rlen = args->len; 1122 rlen = args->len;
1110 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, 1123 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1111 ltlen, &ltnew); 1124 ltbnoa, ltlena, &ltnew);
1112 ASSERT(ltnew >= ltbno); 1125 ASSERT(ltnew >= ltbno);
1113 ASSERT(ltnew + rlen <= ltbno + ltlen); 1126 ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1114 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1127 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1115 args->agbno = ltnew; 1128 args->agbno = ltnew;
1129
1116 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1130 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1117 ltnew, rlen, XFSA_FIXUP_BNO_OK))) 1131 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1118 goto error0; 1132 goto error0;
@@ -1155,26 +1169,35 @@ xfs_alloc_ag_vextent_size(
1155 int i; /* temp status variable */ 1169 int i; /* temp status variable */
1156 xfs_agblock_t rbno; /* returned block number */ 1170 xfs_agblock_t rbno; /* returned block number */
1157 xfs_extlen_t rlen; /* length of returned extent */ 1171 xfs_extlen_t rlen; /* length of returned extent */
1172 int forced = 0;
1158 1173
1174restart:
1159 /* 1175 /*
1160 * Allocate and initialize a cursor for the by-size btree. 1176 * Allocate and initialize a cursor for the by-size btree.
1161 */ 1177 */
1162 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1178 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1163 args->agno, XFS_BTNUM_CNT); 1179 args->agno, XFS_BTNUM_CNT);
1164 bno_cur = NULL; 1180 bno_cur = NULL;
1181
1165 /* 1182 /*
1166 * Look for an entry >= maxlen+alignment-1 blocks. 1183 * Look for an entry >= maxlen+alignment-1 blocks.
1167 */ 1184 */
1168 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, 1185 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1169 args->maxlen + args->alignment - 1, &i))) 1186 args->maxlen + args->alignment - 1, &i)))
1170 goto error0; 1187 goto error0;
1188
1171 /* 1189 /*
1172 * If none, then pick up the last entry in the tree unless the 1190 * If none or we have busy extents that we cannot allocate from, then
1173 * tree is empty. 1191 * we have to settle for a smaller extent. In the case that there are
1192 * no large extents, this will return the last entry in the tree unless
1193 * the tree is empty. In the case that there are only busy large
1194 * extents, this will return the largest small extent unless there
1195 * are no smaller extents available.
1174 */ 1196 */
1175 if (!i) { 1197 if (!i || forced > 1) {
1176 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, 1198 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1177 &flen, &i))) 1199 &fbno, &flen, &i);
1200 if (error)
1178 goto error0; 1201 goto error0;
1179 if (i == 0 || flen == 0) { 1202 if (i == 0 || flen == 0) {
1180 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1203 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -1182,22 +1205,56 @@ xfs_alloc_ag_vextent_size(
1182 return 0; 1205 return 0;
1183 } 1206 }
1184 ASSERT(i == 1); 1207 ASSERT(i == 1);
1208 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1209 } else {
1210 /*
1211 * Search for a non-busy extent that is large enough.
1212 * If we are at low space, don't check, or if we fall of
1213 * the end of the btree, turn off the busy check and
1214 * restart.
1215 */
1216 for (;;) {
1217 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1218 if (error)
1219 goto error0;
1220 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1221
1222 xfs_alloc_compute_aligned(args, fbno, flen,
1223 &rbno, &rlen);
1224
1225 if (rlen >= args->maxlen)
1226 break;
1227
1228 error = xfs_btree_increment(cnt_cur, 0, &i);
1229 if (error)
1230 goto error0;
1231 if (i == 0) {
1232 /*
1233 * Our only valid extents must have been busy.
1234 * Make it unbusy by forcing the log out and
1235 * retrying. If we've been here before, forcing
1236 * the log isn't making the extents available,
1237 * which means they have probably been freed in
1238 * this transaction. In that case, we have to
1239 * give up on them and we'll attempt a minlen
1240 * allocation the next time around.
1241 */
1242 xfs_btree_del_cursor(cnt_cur,
1243 XFS_BTREE_NOERROR);
1244 trace_xfs_alloc_size_busy(args);
1245 if (!forced++)
1246 xfs_log_force(args->mp, XFS_LOG_SYNC);
1247 goto restart;
1248 }
1249 }
1185 } 1250 }
1186 /* 1251
1187 * There's a freespace as big as maxlen+alignment-1, get it.
1188 */
1189 else {
1190 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
1191 goto error0;
1192 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1193 }
1194 /* 1252 /*
1195 * In the first case above, we got the last entry in the 1253 * In the first case above, we got the last entry in the
1196 * by-size btree. Now we check to see if the space hits maxlen 1254 * by-size btree. Now we check to see if the space hits maxlen
1197 * once aligned; if not, we search left for something better. 1255 * once aligned; if not, we search left for something better.
1198 * This can't happen in the second case above. 1256 * This can't happen in the second case above.
1199 */ 1257 */
1200 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1201 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1258 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1202 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1259 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1203 (rlen <= flen && rbno + rlen <= fbno + flen), error0); 1260 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
@@ -1251,13 +1308,19 @@ xfs_alloc_ag_vextent_size(
1251 * Fix up the length. 1308 * Fix up the length.
1252 */ 1309 */
1253 args->len = rlen; 1310 args->len = rlen;
1254 xfs_alloc_fix_len(args); 1311 if (rlen < args->minlen) {
1255 if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { 1312 if (!forced++) {
1256 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1313 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1257 trace_xfs_alloc_size_nominleft(args); 1314 trace_xfs_alloc_size_busy(args);
1258 args->agbno = NULLAGBLOCK; 1315 xfs_log_force(args->mp, XFS_LOG_SYNC);
1259 return 0; 1316 goto restart;
1317 }
1318 goto out_nominleft;
1260 } 1319 }
1320 xfs_alloc_fix_len(args);
1321
1322 if (!xfs_alloc_fix_minleft(args))
1323 goto out_nominleft;
1261 rlen = args->len; 1324 rlen = args->len;
1262 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); 1325 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
1263 /* 1326 /*
@@ -1287,6 +1350,12 @@ error0:
1287 if (bno_cur) 1350 if (bno_cur)
1288 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1351 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1289 return error; 1352 return error;
1353
1354out_nominleft:
1355 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1356 trace_xfs_alloc_size_nominleft(args);
1357 args->agbno = NULLAGBLOCK;
1358 return 0;
1290} 1359}
1291 1360
1292/* 1361/*
@@ -1326,6 +1395,9 @@ xfs_alloc_ag_vextent_small(
1326 if (error) 1395 if (error)
1327 goto error0; 1396 goto error0;
1328 if (fbno != NULLAGBLOCK) { 1397 if (fbno != NULLAGBLOCK) {
1398 xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1,
1399 args->userdata);
1400
1329 if (args->userdata) { 1401 if (args->userdata) {
1330 xfs_buf_t *bp; 1402 xfs_buf_t *bp;
1331 1403
@@ -1617,18 +1689,6 @@ xfs_free_ag_extent(
1617 1689
1618 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); 1690 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1619 1691
1620 /*
1621 * Since blocks move to the free list without the coordination
1622 * used in xfs_bmap_finish, we can't allow block to be available
1623 * for reallocation and non-transaction writing (user data)
1624 * until we know that the transaction that moved it to the free
1625 * list is permanently on disk. We track the blocks by declaring
1626 * these blocks as "busy"; the busy list is maintained on a per-ag
1627 * basis and each transaction records which entries should be removed
1628 * when the iclog commits to disk. If a busy block is allocated,
1629 * the iclog is pushed up to the LSN that freed the block.
1630 */
1631 xfs_alloc_busy_insert(tp, agno, bno, len);
1632 return 0; 1692 return 0;
1633 1693
1634 error0: 1694 error0:
@@ -1923,21 +1983,6 @@ xfs_alloc_get_freelist(
1923 xfs_alloc_log_agf(tp, agbp, logflags); 1983 xfs_alloc_log_agf(tp, agbp, logflags);
1924 *bnop = bno; 1984 *bnop = bno;
1925 1985
1926 /*
1927 * As blocks are freed, they are added to the per-ag busy list and
1928 * remain there until the freeing transaction is committed to disk.
1929 * Now that we have allocated blocks, this list must be searched to see
1930 * if a block is being reused. If one is, then the freeing transaction
1931 * must be pushed to disk before this transaction.
1932 *
1933 * We do this by setting the current transaction to a sync transaction
1934 * which guarantees that the freeing transaction is on disk before this
1935 * transaction. This is done instead of a synchronous log force here so
1936 * that we don't sit and wait with the AGF locked in the transaction
1937 * during the log force.
1938 */
1939 if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1))
1940 xfs_trans_set_sync(tp);
1941 return 0; 1986 return 0;
1942} 1987}
1943 1988
@@ -2423,105 +2468,13 @@ xfs_free_extent(
2423 } 2468 }
2424 2469
2425 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); 2470 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2471 if (!error)
2472 xfs_alloc_busy_insert(tp, args.agno, args.agbno, len);
2426error0: 2473error0:
2427 xfs_perag_put(args.pag); 2474 xfs_perag_put(args.pag);
2428 return error; 2475 return error;
2429} 2476}
2430 2477
2431
2432/*
2433 * AG Busy list management
2434 * The busy list contains block ranges that have been freed but whose
2435 * transactions have not yet hit disk. If any block listed in a busy
2436 * list is reused, the transaction that freed it must be forced to disk
2437 * before continuing to use the block.
2438 *
2439 * xfs_alloc_busy_insert - add to the per-ag busy list
2440 * xfs_alloc_busy_clear - remove an item from the per-ag busy list
2441 * xfs_alloc_busy_search - search for a busy extent
2442 */
2443
2444/*
2445 * Insert a new extent into the busy tree.
2446 *
2447 * The busy extent tree is indexed by the start block of the busy extent.
2448 * there can be multiple overlapping ranges in the busy extent tree but only
2449 * ever one entry at a given start block. The reason for this is that
2450 * multi-block extents can be freed, then smaller chunks of that extent
2451 * allocated and freed again before the first transaction commit is on disk.
2452 * If the exact same start block is freed a second time, we have to wait for
2453 * that busy extent to pass out of the tree before the new extent is inserted.
2454 * There are two main cases we have to handle here.
2455 *
2456 * The first case is a transaction that triggers a "free - allocate - free"
2457 * cycle. This can occur during btree manipulations as a btree block is freed
2458 * to the freelist, then allocated from the free list, then freed again. In
2459 * this case, the second extxpnet free is what triggers the duplicate and as
2460 * such the transaction IDs should match. Because the extent was allocated in
2461 * this transaction, the transaction must be marked as synchronous. This is
2462 * true for all cases where the free/alloc/free occurs in the one transaction,
2463 * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case.
2464 * This serves to catch violations of the second case quite effectively.
2465 *
2466 * The second case is where the free/alloc/free occur in different
2467 * transactions. In this case, the thread freeing the extent the second time
2468 * can't mark the extent busy immediately because it is already tracked in a
2469 * transaction that may be committing. When the log commit for the existing
2470 * busy extent completes, the busy extent will be removed from the tree. If we
2471 * allow the second busy insert to continue using that busy extent structure,
2472 * it can be freed before this transaction is safely in the log. Hence our
2473 * only option in this case is to force the log to remove the existing busy
2474 * extent from the list before we insert the new one with the current
2475 * transaction ID.
2476 *
2477 * The problem we are trying to avoid in the free-alloc-free in separate
2478 * transactions is most easily described with a timeline:
2479 *
2480 * Thread 1 Thread 2 Thread 3 xfslogd
2481 * xact alloc
2482 * free X
2483 * mark busy
2484 * commit xact
2485 * free xact
2486 * xact alloc
2487 * alloc X
2488 * busy search
2489 * mark xact sync
2490 * commit xact
2491 * free xact
2492 * force log
2493 * checkpoint starts
2494 * ....
2495 * xact alloc
2496 * free X
2497 * mark busy
2498 * finds match
2499 * *** KABOOM! ***
2500 * ....
2501 * log IO completes
2502 * unbusy X
2503 * checkpoint completes
2504 *
2505 * By issuing a log force in thread 3 @ "KABOOM", the thread will block until
2506 * the checkpoint completes, and the busy extent it matched will have been
2507 * removed from the tree when it is woken. Hence it can then continue safely.
2508 *
2509 * However, to ensure this matching process is robust, we need to use the
2510 * transaction ID for identifying transaction, as delayed logging results in
2511 * the busy extent and transaction lifecycles being different. i.e. the busy
2512 * extent is active for a lot longer than the transaction. Hence the
2513 * transaction structure can be freed and reallocated, then mark the same
2514 * extent busy again in the new transaction. In this case the new transaction
2515 * will have a different tid but can have the same address, and hence we need
2516 * to check against the tid.
2517 *
2518 * Future: for delayed logging, we could avoid the log force if the extent was
2519 * first freed in the current checkpoint sequence. This, however, requires the
2520 * ability to pin the current checkpoint in memory until this transaction
2521 * commits to ensure that both the original free and the current one combine
2522 * logically into the one checkpoint. If the checkpoint sequences are
2523 * different, however, we still need to wait on a log force.
2524 */
2525void 2478void
2526xfs_alloc_busy_insert( 2479xfs_alloc_busy_insert(
2527 struct xfs_trans *tp, 2480 struct xfs_trans *tp,
@@ -2533,9 +2486,7 @@ xfs_alloc_busy_insert(
2533 struct xfs_busy_extent *busyp; 2486 struct xfs_busy_extent *busyp;
2534 struct xfs_perag *pag; 2487 struct xfs_perag *pag;
2535 struct rb_node **rbp; 2488 struct rb_node **rbp;
2536 struct rb_node *parent; 2489 struct rb_node *parent = NULL;
2537 int match;
2538
2539 2490
2540 new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); 2491 new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
2541 if (!new) { 2492 if (!new) {
@@ -2544,7 +2495,7 @@ xfs_alloc_busy_insert(
2544 * block, make this a synchronous transaction to insure that 2495 * block, make this a synchronous transaction to insure that
2545 * the block is not reused before this transaction commits. 2496 * the block is not reused before this transaction commits.
2546 */ 2497 */
2547 trace_xfs_alloc_busy(tp, agno, bno, len, 1); 2498 trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len);
2548 xfs_trans_set_sync(tp); 2499 xfs_trans_set_sync(tp);
2549 return; 2500 return;
2550 } 2501 }
@@ -2552,66 +2503,28 @@ xfs_alloc_busy_insert(
2552 new->agno = agno; 2503 new->agno = agno;
2553 new->bno = bno; 2504 new->bno = bno;
2554 new->length = len; 2505 new->length = len;
2555 new->tid = xfs_log_get_trans_ident(tp);
2556
2557 INIT_LIST_HEAD(&new->list); 2506 INIT_LIST_HEAD(&new->list);
2558 2507
2559 /* trace before insert to be able to see failed inserts */ 2508 /* trace before insert to be able to see failed inserts */
2560 trace_xfs_alloc_busy(tp, agno, bno, len, 0); 2509 trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len);
2561 2510
2562 pag = xfs_perag_get(tp->t_mountp, new->agno); 2511 pag = xfs_perag_get(tp->t_mountp, new->agno);
2563restart:
2564 spin_lock(&pag->pagb_lock); 2512 spin_lock(&pag->pagb_lock);
2565 rbp = &pag->pagb_tree.rb_node; 2513 rbp = &pag->pagb_tree.rb_node;
2566 parent = NULL; 2514 while (*rbp) {
2567 busyp = NULL;
2568 match = 0;
2569 while (*rbp && match >= 0) {
2570 parent = *rbp; 2515 parent = *rbp;
2571 busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); 2516 busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
2572 2517
2573 if (new->bno < busyp->bno) { 2518 if (new->bno < busyp->bno) {
2574 /* may overlap, but exact start block is lower */
2575 rbp = &(*rbp)->rb_left; 2519 rbp = &(*rbp)->rb_left;
2576 if (new->bno + new->length > busyp->bno) 2520 ASSERT(new->bno + new->length <= busyp->bno);
2577 match = busyp->tid == new->tid ? 1 : -1;
2578 } else if (new->bno > busyp->bno) { 2521 } else if (new->bno > busyp->bno) {
2579 /* may overlap, but exact start block is higher */
2580 rbp = &(*rbp)->rb_right; 2522 rbp = &(*rbp)->rb_right;
2581 if (bno < busyp->bno + busyp->length) 2523 ASSERT(bno >= busyp->bno + busyp->length);
2582 match = busyp->tid == new->tid ? 1 : -1;
2583 } else { 2524 } else {
2584 match = busyp->tid == new->tid ? 1 : -1; 2525 ASSERT(0);
2585 break;
2586 } 2526 }
2587 } 2527 }
2588 if (match < 0) {
2589 /* overlap marked busy in different transaction */
2590 spin_unlock(&pag->pagb_lock);
2591 xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
2592 goto restart;
2593 }
2594 if (match > 0) {
2595 /*
2596 * overlap marked busy in same transaction. Update if exact
2597 * start block match, otherwise combine the busy extents into
2598 * a single range.
2599 */
2600 if (busyp->bno == new->bno) {
2601 busyp->length = max(busyp->length, new->length);
2602 spin_unlock(&pag->pagb_lock);
2603 ASSERT(tp->t_flags & XFS_TRANS_SYNC);
2604 xfs_perag_put(pag);
2605 kmem_free(new);
2606 return;
2607 }
2608 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2609 new->length = max(busyp->bno + busyp->length,
2610 new->bno + new->length) -
2611 min(busyp->bno, new->bno);
2612 new->bno = min(busyp->bno, new->bno);
2613 } else
2614 busyp = NULL;
2615 2528
2616 rb_link_node(&new->rb_node, parent, rbp); 2529 rb_link_node(&new->rb_node, parent, rbp);
2617 rb_insert_color(&new->rb_node, &pag->pagb_tree); 2530 rb_insert_color(&new->rb_node, &pag->pagb_tree);
@@ -2619,7 +2532,6 @@ restart:
2619 list_add(&new->list, &tp->t_busy); 2532 list_add(&new->list, &tp->t_busy);
2620 spin_unlock(&pag->pagb_lock); 2533 spin_unlock(&pag->pagb_lock);
2621 xfs_perag_put(pag); 2534 xfs_perag_put(pag);
2622 kmem_free(busyp);
2623} 2535}
2624 2536
2625/* 2537/*
@@ -2668,31 +2580,443 @@ xfs_alloc_busy_search(
2668 } 2580 }
2669 } 2581 }
2670 spin_unlock(&pag->pagb_lock); 2582 spin_unlock(&pag->pagb_lock);
2671 trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match);
2672 xfs_perag_put(pag); 2583 xfs_perag_put(pag);
2673 return match; 2584 return match;
2674} 2585}
2675 2586
2587/*
2588 * The found free extent [fbno, fend] overlaps part or all of the given busy
2589 * extent. If the overlap covers the beginning, the end, or all of the busy
2590 * extent, the overlapping portion can be made unbusy and used for the
2591 * allocation. We can't split a busy extent because we can't modify a
2592 * transaction/CIL context busy list, but we can update an entries block
2593 * number or length.
2594 *
2595 * Returns true if the extent can safely be reused, or false if the search
2596 * needs to be restarted.
2597 */
2598STATIC bool
2599xfs_alloc_busy_update_extent(
2600 struct xfs_mount *mp,
2601 struct xfs_perag *pag,
2602 struct xfs_busy_extent *busyp,
2603 xfs_agblock_t fbno,
2604 xfs_extlen_t flen,
2605 bool userdata)
2606{
2607 xfs_agblock_t fend = fbno + flen;
2608 xfs_agblock_t bbno = busyp->bno;
2609 xfs_agblock_t bend = bbno + busyp->length;
2610
2611 /*
2612 * If there is a busy extent overlapping a user allocation, we have
2613 * no choice but to force the log and retry the search.
2614 *
2615 * Fortunately this does not happen during normal operation, but
2616 * only if the filesystem is very low on space and has to dip into
2617 * the AGFL for normal allocations.
2618 */
2619 if (userdata)
2620 goto out_force_log;
2621
2622 if (bbno < fbno && bend > fend) {
2623 /*
2624 * Case 1:
2625 * bbno bend
2626 * +BBBBBBBBBBBBBBBBB+
2627 * +---------+
2628 * fbno fend
2629 */
2630
2631 /*
2632 * We would have to split the busy extent to be able to track
2633 * it correct, which we cannot do because we would have to
2634 * modify the list of busy extents attached to the transaction
2635 * or CIL context, which is immutable.
2636 *
2637 * Force out the log to clear the busy extent and retry the
2638 * search.
2639 */
2640 goto out_force_log;
2641 } else if (bbno >= fbno && bend <= fend) {
2642 /*
2643 * Case 2:
2644 * bbno bend
2645 * +BBBBBBBBBBBBBBBBB+
2646 * +-----------------+
2647 * fbno fend
2648 *
2649 * Case 3:
2650 * bbno bend
2651 * +BBBBBBBBBBBBBBBBB+
2652 * +--------------------------+
2653 * fbno fend
2654 *
2655 * Case 4:
2656 * bbno bend
2657 * +BBBBBBBBBBBBBBBBB+
2658 * +--------------------------+
2659 * fbno fend
2660 *
2661 * Case 5:
2662 * bbno bend
2663 * +BBBBBBBBBBBBBBBBB+
2664 * +-----------------------------------+
2665 * fbno fend
2666 *
2667 */
2668
2669 /*
2670 * The busy extent is fully covered by the extent we are
2671 * allocating, and can simply be removed from the rbtree.
2672 * However we cannot remove it from the immutable list
2673 * tracking busy extents in the transaction or CIL context,
2674 * so set the length to zero to mark it invalid.
2675 *
2676 * We also need to restart the busy extent search from the
2677 * tree root, because erasing the node can rearrange the
2678 * tree topology.
2679 */
2680 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2681 busyp->length = 0;
2682 return false;
2683 } else if (fend < bend) {
2684 /*
2685 * Case 6:
2686 * bbno bend
2687 * +BBBBBBBBBBBBBBBBB+
2688 * +---------+
2689 * fbno fend
2690 *
2691 * Case 7:
2692 * bbno bend
2693 * +BBBBBBBBBBBBBBBBB+
2694 * +------------------+
2695 * fbno fend
2696 *
2697 */
2698 busyp->bno = fend;
2699 } else if (bbno < fbno) {
2700 /*
2701 * Case 8:
2702 * bbno bend
2703 * +BBBBBBBBBBBBBBBBB+
2704 * +-------------+
2705 * fbno fend
2706 *
2707 * Case 9:
2708 * bbno bend
2709 * +BBBBBBBBBBBBBBBBB+
2710 * +----------------------+
2711 * fbno fend
2712 */
2713 busyp->length = fbno - busyp->bno;
2714 } else {
2715 ASSERT(0);
2716 }
2717
2718 trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen);
2719 return true;
2720
2721out_force_log:
2722 spin_unlock(&pag->pagb_lock);
2723 xfs_log_force(mp, XFS_LOG_SYNC);
2724 trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen);
2725 spin_lock(&pag->pagb_lock);
2726 return false;
2727}
2728
2729
2730/*
2731 * For a given extent [fbno, flen], make sure we can reuse it safely.
2732 */
2676void 2733void
2677xfs_alloc_busy_clear( 2734xfs_alloc_busy_reuse(
2678 struct xfs_mount *mp, 2735 struct xfs_mount *mp,
2679 struct xfs_busy_extent *busyp) 2736 xfs_agnumber_t agno,
2737 xfs_agblock_t fbno,
2738 xfs_extlen_t flen,
2739 bool userdata)
2680{ 2740{
2681 struct xfs_perag *pag; 2741 struct xfs_perag *pag;
2742 struct rb_node *rbp;
2682 2743
2683 trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno, 2744 ASSERT(flen > 0);
2684 busyp->length);
2685 2745
2686 ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno, 2746 pag = xfs_perag_get(mp, agno);
2687 busyp->length) == 1); 2747 spin_lock(&pag->pagb_lock);
2748restart:
2749 rbp = pag->pagb_tree.rb_node;
2750 while (rbp) {
2751 struct xfs_busy_extent *busyp =
2752 rb_entry(rbp, struct xfs_busy_extent, rb_node);
2753 xfs_agblock_t bbno = busyp->bno;
2754 xfs_agblock_t bend = bbno + busyp->length;
2688 2755
2689 list_del_init(&busyp->list); 2756 if (fbno + flen <= bbno) {
2757 rbp = rbp->rb_left;
2758 continue;
2759 } else if (fbno >= bend) {
2760 rbp = rbp->rb_right;
2761 continue;
2762 }
2690 2763
2691 pag = xfs_perag_get(mp, busyp->agno); 2764 if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen,
2692 spin_lock(&pag->pagb_lock); 2765 userdata))
2693 rb_erase(&busyp->rb_node, &pag->pagb_tree); 2766 goto restart;
2767 }
2694 spin_unlock(&pag->pagb_lock); 2768 spin_unlock(&pag->pagb_lock);
2695 xfs_perag_put(pag); 2769 xfs_perag_put(pag);
2770}
2771
2772/*
2773 * For a given extent [fbno, flen], search the busy extent list to find a
2774 * subset of the extent that is not busy. If *rlen is smaller than
2775 * args->minlen no suitable extent could be found, and the higher level
2776 * code needs to force out the log and retry the allocation.
2777 */
2778STATIC void
2779xfs_alloc_busy_trim(
2780 struct xfs_alloc_arg *args,
2781 xfs_agblock_t bno,
2782 xfs_extlen_t len,
2783 xfs_agblock_t *rbno,
2784 xfs_extlen_t *rlen)
2785{
2786 xfs_agblock_t fbno;
2787 xfs_extlen_t flen;
2788 struct rb_node *rbp;
2789
2790 ASSERT(len > 0);
2696 2791
2792 spin_lock(&args->pag->pagb_lock);
2793restart:
2794 fbno = bno;
2795 flen = len;
2796 rbp = args->pag->pagb_tree.rb_node;
2797 while (rbp && flen >= args->minlen) {
2798 struct xfs_busy_extent *busyp =
2799 rb_entry(rbp, struct xfs_busy_extent, rb_node);
2800 xfs_agblock_t fend = fbno + flen;
2801 xfs_agblock_t bbno = busyp->bno;
2802 xfs_agblock_t bend = bbno + busyp->length;
2803
2804 if (fend <= bbno) {
2805 rbp = rbp->rb_left;
2806 continue;
2807 } else if (fbno >= bend) {
2808 rbp = rbp->rb_right;
2809 continue;
2810 }
2811
2812 /*
2813 * If this is a metadata allocation, try to reuse the busy
2814 * extent instead of trimming the allocation.
2815 */
2816 if (!args->userdata) {
2817 if (!xfs_alloc_busy_update_extent(args->mp, args->pag,
2818 busyp, fbno, flen,
2819 false))
2820 goto restart;
2821 continue;
2822 }
2823
2824 if (bbno <= fbno) {
2825 /* start overlap */
2826
2827 /*
2828 * Case 1:
2829 * bbno bend
2830 * +BBBBBBBBBBBBBBBBB+
2831 * +---------+
2832 * fbno fend
2833 *
2834 * Case 2:
2835 * bbno bend
2836 * +BBBBBBBBBBBBBBBBB+
2837 * +-------------+
2838 * fbno fend
2839 *
2840 * Case 3:
2841 * bbno bend
2842 * +BBBBBBBBBBBBBBBBB+
2843 * +-------------+
2844 * fbno fend
2845 *
2846 * Case 4:
2847 * bbno bend
2848 * +BBBBBBBBBBBBBBBBB+
2849 * +-----------------+
2850 * fbno fend
2851 *
2852 * No unbusy region in extent, return failure.
2853 */
2854 if (fend <= bend)
2855 goto fail;
2856
2857 /*
2858 * Case 5:
2859 * bbno bend
2860 * +BBBBBBBBBBBBBBBBB+
2861 * +----------------------+
2862 * fbno fend
2863 *
2864 * Case 6:
2865 * bbno bend
2866 * +BBBBBBBBBBBBBBBBB+
2867 * +--------------------------+
2868 * fbno fend
2869 *
2870 * Needs to be trimmed to:
2871 * +-------+
2872 * fbno fend
2873 */
2874 fbno = bend;
2875 } else if (bend >= fend) {
2876 /* end overlap */
2877
2878 /*
2879 * Case 7:
2880 * bbno bend
2881 * +BBBBBBBBBBBBBBBBB+
2882 * +------------------+
2883 * fbno fend
2884 *
2885 * Case 8:
2886 * bbno bend
2887 * +BBBBBBBBBBBBBBBBB+
2888 * +--------------------------+
2889 * fbno fend
2890 *
2891 * Needs to be trimmed to:
2892 * +-------+
2893 * fbno fend
2894 */
2895 fend = bbno;
2896 } else {
2897 /* middle overlap */
2898
2899 /*
2900 * Case 9:
2901 * bbno bend
2902 * +BBBBBBBBBBBBBBBBB+
2903 * +-----------------------------------+
2904 * fbno fend
2905 *
2906 * Can be trimmed to:
2907 * +-------+ OR +-------+
2908 * fbno fend fbno fend
2909 *
2910 * Backward allocation leads to significant
2911 * fragmentation of directories, which degrades
2912 * directory performance, therefore we always want to
2913 * choose the option that produces forward allocation
2914 * patterns.
2915 * Preferring the lower bno extent will make the next
2916 * request use "fend" as the start of the next
2917 * allocation; if the segment is no longer busy at
2918 * that point, we'll get a contiguous allocation, but
2919 * even if it is still busy, we will get a forward
2920 * allocation.
2921 * We try to avoid choosing the segment at "bend",
2922 * because that can lead to the next allocation
2923 * taking the segment at "fbno", which would be a
2924 * backward allocation. We only use the segment at
2925 * "fbno" if it is much larger than the current
2926 * requested size, because in that case there's a
2927 * good chance subsequent allocations will be
2928 * contiguous.
2929 */
2930 if (bbno - fbno >= args->maxlen) {
2931 /* left candidate fits perfect */
2932 fend = bbno;
2933 } else if (fend - bend >= args->maxlen * 4) {
2934 /* right candidate has enough free space */
2935 fbno = bend;
2936 } else if (bbno - fbno >= args->minlen) {
2937 /* left candidate fits minimum requirement */
2938 fend = bbno;
2939 } else {
2940 goto fail;
2941 }
2942 }
2943
2944 flen = fend - fbno;
2945 }
2946 spin_unlock(&args->pag->pagb_lock);
2947
2948 if (fbno != bno || flen != len) {
2949 trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len,
2950 fbno, flen);
2951 }
2952 *rbno = fbno;
2953 *rlen = flen;
2954 return;
2955fail:
2956 /*
2957 * Return a zero extent length as failure indications. All callers
2958 * re-check if the trimmed extent satisfies the minlen requirement.
2959 */
2960 spin_unlock(&args->pag->pagb_lock);
2961 trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
2962 *rbno = fbno;
2963 *rlen = 0;
2964}
2965
2966static void
2967xfs_alloc_busy_clear_one(
2968 struct xfs_mount *mp,
2969 struct xfs_perag *pag,
2970 struct xfs_busy_extent *busyp)
2971{
2972 if (busyp->length) {
2973 trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno,
2974 busyp->length);
2975 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2976 }
2977
2978 list_del_init(&busyp->list);
2697 kmem_free(busyp); 2979 kmem_free(busyp);
2698} 2980}
2981
2982void
2983xfs_alloc_busy_clear(
2984 struct xfs_mount *mp,
2985 struct list_head *list)
2986{
2987 struct xfs_busy_extent *busyp, *n;
2988 struct xfs_perag *pag = NULL;
2989 xfs_agnumber_t agno = NULLAGNUMBER;
2990
2991 list_for_each_entry_safe(busyp, n, list, list) {
2992 if (busyp->agno != agno) {
2993 if (pag) {
2994 spin_unlock(&pag->pagb_lock);
2995 xfs_perag_put(pag);
2996 }
2997 pag = xfs_perag_get(mp, busyp->agno);
2998 spin_lock(&pag->pagb_lock);
2999 agno = busyp->agno;
3000 }
3001
3002 xfs_alloc_busy_clear_one(mp, pag, busyp);
3003 }
3004
3005 if (pag) {
3006 spin_unlock(&pag->pagb_lock);
3007 xfs_perag_put(pag);
3008 }
3009}
3010
3011/*
3012 * Callback for list_sort to sort busy extents by the AG they reside in.
3013 */
3014int
3015xfs_busy_extent_ag_cmp(
3016 void *priv,
3017 struct list_head *a,
3018 struct list_head *b)
3019{
3020 return container_of(a, struct xfs_busy_extent, list)->agno -
3021 container_of(b, struct xfs_busy_extent, list)->agno;
3022}
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index d0b3bc72005b..240ad288f2f9 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -140,11 +140,24 @@ xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno,
140 xfs_agblock_t bno, xfs_extlen_t len); 140 xfs_agblock_t bno, xfs_extlen_t len);
141 141
142void 142void
143xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); 143xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list);
144 144
145int 145int
146xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, 146xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
147 xfs_agblock_t bno, xfs_extlen_t len); 147 xfs_agblock_t bno, xfs_extlen_t len);
148
149void
150xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
151 xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
152
153int
154xfs_busy_extent_ag_cmp(void *priv, struct list_head *a, struct list_head *b);
155
156static inline void xfs_alloc_busy_sort(struct list_head *list)
157{
158 list_sort(NULL, list, xfs_busy_extent_ag_cmp);
159}
160
148#endif /* __KERNEL__ */ 161#endif /* __KERNEL__ */
149 162
150/* 163/*
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 3916925e2584..8b469d53599f 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -95,6 +95,8 @@ xfs_allocbt_alloc_block(
95 return 0; 95 return 0;
96 } 96 }
97 97
98 xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
99
98 xfs_trans_agbtree_delta(cur->bc_tp, 1); 100 xfs_trans_agbtree_delta(cur->bc_tp, 1);
99 new->s = cpu_to_be32(bno); 101 new->s = cpu_to_be32(bno);
100 102
@@ -118,17 +120,6 @@ xfs_allocbt_free_block(
118 if (error) 120 if (error)
119 return error; 121 return error;
120 122
121 /*
122 * Since blocks move to the free list without the coordination used in
123 * xfs_bmap_finish, we can't allow block to be available for
124 * reallocation and non-transaction writing (user data) until we know
125 * that the transaction that moved it to the free list is permanently
126 * on disk. We track the blocks by declaring these blocks as "busy";
127 * the busy list is maintained on a per-ag basis and each transaction
128 * records which entries should be removed when the iclog commits to
129 * disk. If a busy block is allocated, the iclog is pushed up to the
130 * LSN that freed the block.
131 */
132 xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); 123 xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
133 xfs_trans_agbtree_delta(cur->bc_tp, -1); 124 xfs_trans_agbtree_delta(cur->bc_tp, -1);
134 return 0; 125 return 0;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index be628677c288..9a84a85c03b1 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -202,7 +202,7 @@ xfs_swap_extents(
202 xfs_inode_t *tip, /* tmp inode */ 202 xfs_inode_t *tip, /* tmp inode */
203 xfs_swapext_t *sxp) 203 xfs_swapext_t *sxp)
204{ 204{
205 xfs_mount_t *mp; 205 xfs_mount_t *mp = ip->i_mount;
206 xfs_trans_t *tp; 206 xfs_trans_t *tp;
207 xfs_bstat_t *sbp = &sxp->sx_stat; 207 xfs_bstat_t *sbp = &sxp->sx_stat;
208 xfs_ifork_t *tempifp, *ifp, *tifp; 208 xfs_ifork_t *tempifp, *ifp, *tifp;
@@ -212,16 +212,12 @@ xfs_swap_extents(
212 int taforkblks = 0; 212 int taforkblks = 0;
213 __uint64_t tmp; 213 __uint64_t tmp;
214 214
215 mp = ip->i_mount;
216
217 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); 215 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
218 if (!tempifp) { 216 if (!tempifp) {
219 error = XFS_ERROR(ENOMEM); 217 error = XFS_ERROR(ENOMEM);
220 goto out; 218 goto out;
221 } 219 }
222 220
223 sbp = &sxp->sx_stat;
224
225 /* 221 /*
226 * we have to do two separate lock calls here to keep lockdep 222 * we have to do two separate lock calls here to keep lockdep
227 * happy. If we try to get all the locks in one call, lock will 223 * happy. If we try to get all the locks in one call, lock will
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a37480a6e023..c8e3349c287c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1354,7 +1354,7 @@ xfs_itruncate_start(
1354 return 0; 1354 return 0;
1355 } 1355 }
1356 last_byte = xfs_file_last_byte(ip); 1356 last_byte = xfs_file_last_byte(ip);
1357 trace_xfs_itruncate_start(ip, flags, new_size, toss_start, last_byte); 1357 trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte);
1358 if (last_byte > toss_start) { 1358 if (last_byte > toss_start) {
1359 if (flags & XFS_ITRUNC_DEFINITE) { 1359 if (flags & XFS_ITRUNC_DEFINITE) {
1360 xfs_tosspages(ip, toss_start, 1360 xfs_tosspages(ip, toss_start,
@@ -1470,7 +1470,7 @@ xfs_itruncate_finish(
1470 * file but the log buffers containing the free and reallocation 1470 * file but the log buffers containing the free and reallocation
1471 * don't, then we'd end up with garbage in the blocks being freed. 1471 * don't, then we'd end up with garbage in the blocks being freed.
1472 * As long as we make the new_size permanent before actually 1472 * As long as we make the new_size permanent before actually
1473 * freeing any blocks it doesn't matter if they get writtten to. 1473 * freeing any blocks it doesn't matter if they get written to.
1474 * 1474 *
1475 * The callers must signal into us whether or not the size 1475 * The callers must signal into us whether or not the size
1476 * setting here must be synchronous. There are a few cases 1476 * setting here must be synchronous. There are a few cases
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 576fdfe81d60..09983a3344a5 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -970,7 +970,6 @@ xfs_iflush_abort(
970{ 970{
971 xfs_inode_log_item_t *iip = ip->i_itemp; 971 xfs_inode_log_item_t *iip = ip->i_itemp;
972 972
973 iip = ip->i_itemp;
974 if (iip) { 973 if (iip) {
975 struct xfs_ail *ailp = iip->ili_item.li_ailp; 974 struct xfs_ail *ailp = iip->ili_item.li_ailp;
976 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { 975 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index b612ce4520ae..211930246f20 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1449,6 +1449,13 @@ xlog_dealloc_log(xlog_t *log)
1449 1449
1450 xlog_cil_destroy(log); 1450 xlog_cil_destroy(log);
1451 1451
1452 /*
1453 * always need to ensure that the extra buffer does not point to memory
1454 * owned by another log buffer before we free it.
1455 */
1456 xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size);
1457 xfs_buf_free(log->l_xbuf);
1458
1452 iclog = log->l_iclog; 1459 iclog = log->l_iclog;
1453 for (i=0; i<log->l_iclog_bufs; i++) { 1460 for (i=0; i<log->l_iclog_bufs; i++) {
1454 xfs_buf_free(iclog->ic_bp); 1461 xfs_buf_free(iclog->ic_bp);
@@ -1458,7 +1465,6 @@ xlog_dealloc_log(xlog_t *log)
1458 } 1465 }
1459 spinlock_destroy(&log->l_icloglock); 1466 spinlock_destroy(&log->l_icloglock);
1460 1467
1461 xfs_buf_free(log->l_xbuf);
1462 log->l_mp->m_log = NULL; 1468 log->l_mp->m_log = NULL;
1463 kmem_free(log); 1469 kmem_free(log);
1464} /* xlog_dealloc_log */ 1470} /* xlog_dealloc_log */
@@ -3248,13 +3254,6 @@ xfs_log_ticket_get(
3248 return ticket; 3254 return ticket;
3249} 3255}
3250 3256
3251xlog_tid_t
3252xfs_log_get_trans_ident(
3253 struct xfs_trans *tp)
3254{
3255 return tp->t_ticket->t_tid;
3256}
3257
3258/* 3257/*
3259 * Allocate and initialise a new log ticket. 3258 * Allocate and initialise a new log ticket.
3260 */ 3259 */
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 3bd3291ef8d2..78c9039994af 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -189,8 +189,6 @@ void xlog_iodone(struct xfs_buf *);
189struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); 189struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
190void xfs_log_ticket_put(struct xlog_ticket *ticket); 190void xfs_log_ticket_put(struct xlog_ticket *ticket);
191 191
192xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
193
194void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 192void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
195 struct xfs_log_vec *log_vector, 193 struct xfs_log_vec *log_vector,
196 xfs_lsn_t *commit_lsn, int flags); 194 xfs_lsn_t *commit_lsn, int flags);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 9ca59be08977..7d56e88a3f0e 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -361,13 +361,12 @@ xlog_cil_committed(
361 int abort) 361 int abort)
362{ 362{
363 struct xfs_cil_ctx *ctx = args; 363 struct xfs_cil_ctx *ctx = args;
364 struct xfs_busy_extent *busyp, *n;
365 364
366 xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, 365 xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
367 ctx->start_lsn, abort); 366 ctx->start_lsn, abort);
368 367
369 list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) 368 xfs_alloc_busy_sort(&ctx->busy_extents);
370 xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); 369 xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, &ctx->busy_extents);
371 370
372 spin_lock(&ctx->cil->xc_cil_lock); 371 spin_lock(&ctx->cil->xc_cil_lock);
373 list_del(&ctx->committing); 372 list_del(&ctx->committing);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 5864850e9e34..2d3b6a498d63 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -146,6 +146,8 @@ static inline uint xlog_get_client_id(__be32 i)
146 shutdown */ 146 shutdown */
147#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ 147#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */
148 148
149typedef __uint32_t xlog_tid_t;
150
149#ifdef __KERNEL__ 151#ifdef __KERNEL__
150/* 152/*
151 * Below are states for covering allocation transactions. 153 * Below are states for covering allocation transactions.
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 5cc464a17c93..04142caedb2b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -205,6 +205,35 @@ xlog_bread(
205} 205}
206 206
207/* 207/*
208 * Read at an offset into the buffer. Returns with the buffer in it's original
209 * state regardless of the result of the read.
210 */
211STATIC int
212xlog_bread_offset(
213 xlog_t *log,
214 xfs_daddr_t blk_no, /* block to read from */
215 int nbblks, /* blocks to read */
216 xfs_buf_t *bp,
217 xfs_caddr_t offset)
218{
219 xfs_caddr_t orig_offset = XFS_BUF_PTR(bp);
220 int orig_len = bp->b_buffer_length;
221 int error, error2;
222
223 error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks));
224 if (error)
225 return error;
226
227 error = xlog_bread_noalign(log, blk_no, nbblks, bp);
228
229 /* must reset buffer pointer even on error */
230 error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len);
231 if (error)
232 return error;
233 return error2;
234}
235
236/*
208 * Write out the buffer at the given block for the given number of blocks. 237 * Write out the buffer at the given block for the given number of blocks.
209 * The buffer is kept locked across the write and is returned locked. 238 * The buffer is kept locked across the write and is returned locked.
210 * This can only be used for synchronous log writes. 239 * This can only be used for synchronous log writes.
@@ -1229,20 +1258,12 @@ xlog_write_log_records(
1229 */ 1258 */
1230 ealign = round_down(end_block, sectbb); 1259 ealign = round_down(end_block, sectbb);
1231 if (j == 0 && (start_block + endcount > ealign)) { 1260 if (j == 0 && (start_block + endcount > ealign)) {
1232 offset = XFS_BUF_PTR(bp); 1261 offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block);
1233 balign = BBTOB(ealign - start_block); 1262 error = xlog_bread_offset(log, ealign, sectbb,
1234 error = XFS_BUF_SET_PTR(bp, offset + balign, 1263 bp, offset);
1235 BBTOB(sectbb));
1236 if (error) 1264 if (error)
1237 break; 1265 break;
1238 1266
1239 error = xlog_bread_noalign(log, ealign, sectbb, bp);
1240 if (error)
1241 break;
1242
1243 error = XFS_BUF_SET_PTR(bp, offset, bufblks);
1244 if (error)
1245 break;
1246 } 1267 }
1247 1268
1248 offset = xlog_align(log, start_block, endcount, bp); 1269 offset = xlog_align(log, start_block, endcount, bp);
@@ -3448,19 +3469,9 @@ xlog_do_recovery_pass(
3448 * - order is important. 3469 * - order is important.
3449 */ 3470 */
3450 wrapped_hblks = hblks - split_hblks; 3471 wrapped_hblks = hblks - split_hblks;
3451 error = XFS_BUF_SET_PTR(hbp, 3472 error = xlog_bread_offset(log, 0,
3452 offset + BBTOB(split_hblks), 3473 wrapped_hblks, hbp,
3453 BBTOB(hblks - split_hblks)); 3474 offset + BBTOB(split_hblks));
3454 if (error)
3455 goto bread_err2;
3456
3457 error = xlog_bread_noalign(log, 0,
3458 wrapped_hblks, hbp);
3459 if (error)
3460 goto bread_err2;
3461
3462 error = XFS_BUF_SET_PTR(hbp, offset,
3463 BBTOB(hblks));
3464 if (error) 3475 if (error)
3465 goto bread_err2; 3476 goto bread_err2;
3466 } 3477 }
@@ -3511,19 +3522,9 @@ xlog_do_recovery_pass(
3511 * _first_, then the log start (LR header end) 3522 * _first_, then the log start (LR header end)
3512 * - order is important. 3523 * - order is important.
3513 */ 3524 */
3514 error = XFS_BUF_SET_PTR(dbp, 3525 error = xlog_bread_offset(log, 0,
3515 offset + BBTOB(split_bblks), 3526 bblks - split_bblks, hbp,
3516 BBTOB(bblks - split_bblks)); 3527 offset + BBTOB(split_bblks));
3517 if (error)
3518 goto bread_err2;
3519
3520 error = xlog_bread_noalign(log, wrapped_hblks,
3521 bblks - split_bblks,
3522 dbp);
3523 if (error)
3524 goto bread_err2;
3525
3526 error = XFS_BUF_SET_PTR(dbp, offset, h_size);
3527 if (error) 3528 if (error)
3528 goto bread_err2; 3529 goto bread_err2;
3529 } 3530 }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index bb3f9a7b24ed..b49b82363d20 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1900,7 +1900,7 @@ xfs_mod_incore_sb_batch(
1900 uint nmsb, 1900 uint nmsb,
1901 int rsvd) 1901 int rsvd)
1902{ 1902{
1903 xfs_mod_sb_t *msbp = &msb[0]; 1903 xfs_mod_sb_t *msbp;
1904 int error = 0; 1904 int error = 0;
1905 1905
1906 /* 1906 /*
@@ -1910,7 +1910,7 @@ xfs_mod_incore_sb_batch(
1910 * changes will be atomic. 1910 * changes will be atomic.
1911 */ 1911 */
1912 spin_lock(&mp->m_sb_lock); 1912 spin_lock(&mp->m_sb_lock);
1913 for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { 1913 for (msbp = msb; msbp < (msb + nmsb); msbp++) {
1914 ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || 1914 ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
1915 msbp->msb_field > XFS_SBS_FDBLOCKS); 1915 msbp->msb_field > XFS_SBS_FDBLOCKS);
1916 1916
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 76922793f64f..d1f24858ccc4 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -608,10 +608,8 @@ STATIC void
608xfs_trans_free( 608xfs_trans_free(
609 struct xfs_trans *tp) 609 struct xfs_trans *tp)
610{ 610{
611 struct xfs_busy_extent *busyp, *n; 611 xfs_alloc_busy_sort(&tp->t_busy);
612 612 xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy);
613 list_for_each_entry_safe(busyp, n, &tp->t_busy, list)
614 xfs_alloc_busy_clear(tp->t_mountp, busyp);
615 613
616 atomic_dec(&tp->t_mountp->m_active_trans); 614 atomic_dec(&tp->t_mountp->m_active_trans);
617 xfs_trans_free_dqinfo(tp); 615 xfs_trans_free_dqinfo(tp);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index acdb92f14d51..5fc2380092c8 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -346,20 +346,23 @@ xfs_ail_delete(
346 */ 346 */
347STATIC void 347STATIC void
348xfs_ail_worker( 348xfs_ail_worker(
349 struct work_struct *work) 349 struct work_struct *work)
350{ 350{
351 struct xfs_ail *ailp = container_of(to_delayed_work(work), 351 struct xfs_ail *ailp = container_of(to_delayed_work(work),
352 struct xfs_ail, xa_work); 352 struct xfs_ail, xa_work);
353 long tout; 353 xfs_mount_t *mp = ailp->xa_mount;
354 xfs_lsn_t target = ailp->xa_target;
355 xfs_lsn_t lsn;
356 xfs_log_item_t *lip;
357 int flush_log, count, stuck;
358 xfs_mount_t *mp = ailp->xa_mount;
359 struct xfs_ail_cursor *cur = &ailp->xa_cursors; 354 struct xfs_ail_cursor *cur = &ailp->xa_cursors;
360 int push_xfsbufd = 0; 355 xfs_log_item_t *lip;
356 xfs_lsn_t lsn;
357 xfs_lsn_t target;
358 long tout = 10;
359 int flush_log = 0;
360 int stuck = 0;
361 int count = 0;
362 int push_xfsbufd = 0;
361 363
362 spin_lock(&ailp->xa_lock); 364 spin_lock(&ailp->xa_lock);
365 target = ailp->xa_target;
363 xfs_trans_ail_cursor_init(ailp, cur); 366 xfs_trans_ail_cursor_init(ailp, cur);
364 lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn); 367 lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn);
365 if (!lip || XFS_FORCED_SHUTDOWN(mp)) { 368 if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
@@ -368,8 +371,7 @@ xfs_ail_worker(
368 */ 371 */
369 xfs_trans_ail_cursor_done(ailp, cur); 372 xfs_trans_ail_cursor_done(ailp, cur);
370 spin_unlock(&ailp->xa_lock); 373 spin_unlock(&ailp->xa_lock);
371 ailp->xa_last_pushed_lsn = 0; 374 goto out_done;
372 return;
373 } 375 }
374 376
375 XFS_STATS_INC(xs_push_ail); 377 XFS_STATS_INC(xs_push_ail);
@@ -386,8 +388,7 @@ xfs_ail_worker(
386 * lots of contention on the AIL lists. 388 * lots of contention on the AIL lists.
387 */ 389 */
388 lsn = lip->li_lsn; 390 lsn = lip->li_lsn;
389 flush_log = stuck = count = 0; 391 while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
390 while ((XFS_LSN_CMP(lip->li_lsn, target) < 0)) {
391 int lock_result; 392 int lock_result;
392 /* 393 /*
393 * If we can lock the item without sleeping, unlock the AIL 394 * If we can lock the item without sleeping, unlock the AIL
@@ -480,21 +481,25 @@ xfs_ail_worker(
480 } 481 }
481 482
482 /* assume we have more work to do in a short while */ 483 /* assume we have more work to do in a short while */
483 tout = 10; 484out_done:
484 if (!count) { 485 if (!count) {
485 /* We're past our target or empty, so idle */ 486 /* We're past our target or empty, so idle */
486 ailp->xa_last_pushed_lsn = 0; 487 ailp->xa_last_pushed_lsn = 0;
487 488
488 /* 489 /*
489 * Check for an updated push target before clearing the 490 * We clear the XFS_AIL_PUSHING_BIT first before checking
490 * XFS_AIL_PUSHING_BIT. If the target changed, we've got more 491 * whether the target has changed. If the target has changed,
491 * work to do. Wait a bit longer before starting that work. 492 * this pushes the requeue race directly onto the result of the
493 * atomic test/set bit, so we are guaranteed that either the
494 * the pusher that changed the target or ourselves will requeue
495 * the work (but not both).
492 */ 496 */
497 clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags);
493 smp_rmb(); 498 smp_rmb();
494 if (ailp->xa_target == target) { 499 if (XFS_LSN_CMP(ailp->xa_target, target) == 0 ||
495 clear_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags); 500 test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
496 return; 501 return;
497 } 502
498 tout = 50; 503 tout = 50;
499 } else if (XFS_LSN_CMP(lsn, target) >= 0) { 504 } else if (XFS_LSN_CMP(lsn, target) >= 0) {
500 /* 505 /*
@@ -553,7 +558,7 @@ xfs_ail_push(
553 * the XFS_AIL_PUSHING_BIT. 558 * the XFS_AIL_PUSHING_BIT.
554 */ 559 */
555 smp_wmb(); 560 smp_wmb();
556 ailp->xa_target = threshold_lsn; 561 xfs_trans_ail_copy_lsn(ailp, &ailp->xa_target, &threshold_lsn);
557 if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags)) 562 if (!test_and_set_bit(XFS_AIL_PUSHING_BIT, &ailp->xa_flags))
558 queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0); 563 queue_delayed_work(xfs_syncd_wq, &ailp->xa_work, 0);
559} 564}
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 26d1867d8156..65584b55607d 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -73,8 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */
73typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ 73typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
74typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ 74typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */
75 75
76typedef __uint32_t xlog_tid_t; /* transaction ID type */
77
78/* 76/*
79 * These types are 64 bits on disk but are either 32 or 64 bits in memory. 77 * These types are 64 bits on disk but are either 32 or 64 bits in memory.
80 * Disk based types: 78 * Disk based types: