aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/Kconfig5
-rw-r--r--fs/9p/vfs_inode_dotl.c11
-rw-r--r--fs/Kconfig18
-rw-r--r--fs/binfmt_flat.c8
-rw-r--r--fs/block_dev.c34
-rw-r--r--fs/btrfs/extent_io.c1
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/cifs/Kconfig35
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/README12
-rw-r--r--fs/cifs/cifs_debug.c2
-rw-r--r--fs/cifs/cifs_fs_sb.h4
-rw-r--r--fs/cifs/cifs_unicode.h3
-rw-r--r--fs/cifs/cifsacl.c483
-rw-r--r--fs/cifs/cifsacl.h25
-rw-r--r--fs/cifs/cifsencrypt.c12
-rw-r--r--fs/cifs/cifsfs.c119
-rw-r--r--fs/cifs/cifsfs.h20
-rw-r--r--fs/cifs/cifsglob.h10
-rw-r--r--fs/cifs/cifspdu.h37
-rw-r--r--fs/cifs/cifsproto.h30
-rw-r--r--fs/cifs/cifssmb.c377
-rw-r--r--fs/cifs/connect.c279
-rw-r--r--fs/cifs/export.c4
-rw-r--r--fs/cifs/file.c167
-rw-r--r--fs/cifs/inode.c129
-rw-r--r--fs/cifs/misc.c12
-rw-r--r--fs/cifs/netmisc.c7
-rw-r--r--fs/cifs/sess.c9
-rw-r--r--fs/cifs/smbdes.c418
-rw-r--r--fs/cifs/smbencrypt.c124
-rw-r--r--fs/cifs/transport.c66
-rw-r--r--fs/cifs/xattr.c20
-rw-r--r--fs/compat.c235
-rw-r--r--fs/dcache.c9
-rw-r--r--fs/debugfs/file.c19
-rw-r--r--fs/dlm/config.c9
-rw-r--r--fs/dlm/config.h1
-rw-r--r--fs/dlm/dlm_internal.h3
-rw-r--r--fs/dlm/lock.c182
-rw-r--r--fs/dlm/lock.h1
-rw-r--r--fs/dlm/lockspace.c6
-rw-r--r--fs/dlm/plock.c65
-rw-r--r--fs/dlm/user.c1
-rw-r--r--fs/drop_caches.c5
-rw-r--r--fs/exec.c139
-rw-r--r--fs/ext2/super.c3
-rw-r--r--fs/ext3/namei.c80
-rw-r--r--fs/fat/cache.c7
-rw-r--r--fs/fat/dir.c32
-rw-r--r--fs/fat/fat.h15
-rw-r--r--fs/fat/fatent.c4
-rw-r--r--fs/fat/inode.c74
-rw-r--r--fs/fat/misc.c44
-rw-r--r--fs/fat/namei_msdos.c4
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/freevxfs/vxfs_inode.c2
-rw-r--r--fs/fscache/operation.c10
-rw-r--r--fs/fscache/page.c13
-rw-r--r--fs/gfs2/Makefile4
-rw-r--r--fs/gfs2/aops.c8
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/dir.c197
-rw-r--r--fs/gfs2/dir.h4
-rw-r--r--fs/gfs2/export.c2
-rw-r--r--fs/gfs2/file.c46
-rw-r--r--fs/gfs2/glock.c99
-rw-r--r--fs/gfs2/glock.h3
-rw-r--r--fs/gfs2/glops.c172
-rw-r--r--fs/gfs2/glops.h2
-rw-r--r--fs/gfs2/incore.h8
-rw-r--r--fs/gfs2/inode.c1510
-rw-r--r--fs/gfs2/inode.h8
-rw-r--r--fs/gfs2/log.c208
-rw-r--r--fs/gfs2/log.h2
-rw-r--r--fs/gfs2/lops.c39
-rw-r--r--fs/gfs2/main.c1
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/meta_io.h2
-rw-r--r--fs/gfs2/ops_fstype.c32
-rw-r--r--fs/gfs2/ops_inode.c1344
-rw-r--r--fs/gfs2/quota.c12
-rw-r--r--fs/gfs2/quota.h4
-rw-r--r--fs/gfs2/rgrp.c24
-rw-r--r--fs/gfs2/super.c138
-rw-r--r--fs/gfs2/sys.c6
-rw-r--r--fs/gfs2/trace_gfs2.h38
-rw-r--r--fs/hugetlbfs/inode.c4
-rw-r--r--fs/inode.c10
-rw-r--r--fs/jbd/commit.c15
-rw-r--r--fs/jbd/journal.c16
-rw-r--r--fs/jbd/transaction.c3
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/logfs/dev_bdev.c1
-rw-r--r--fs/logfs/readwrite.c2
-rw-r--r--fs/mbcache.c10
-rw-r--r--fs/namei.c2
-rw-r--r--fs/ncpfs/inode.c4
-rw-r--r--fs/nfs/dir.c5
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfsd/stats.c2
-rw-r--r--fs/nilfs2/alloc.c12
-rw-r--r--fs/nilfs2/bmap.c4
-rw-r--r--fs/nilfs2/btnode.c19
-rw-r--r--fs/nilfs2/btnode.h4
-rw-r--r--fs/nilfs2/btree.c38
-rw-r--r--fs/nilfs2/cpfile.c24
-rw-r--r--fs/nilfs2/dat.c4
-rw-r--r--fs/nilfs2/file.c1
-rw-r--r--fs/nilfs2/gcinode.c25
-rw-r--r--fs/nilfs2/ifile.c4
-rw-r--r--fs/nilfs2/inode.c23
-rw-r--r--fs/nilfs2/ioctl.c61
-rw-r--r--fs/nilfs2/mdt.c8
-rw-r--r--fs/nilfs2/mdt.h9
-rw-r--r--fs/nilfs2/nilfs.h7
-rw-r--r--fs/nilfs2/page.c79
-rw-r--r--fs/nilfs2/page.h7
-rw-r--r--fs/nilfs2/recovery.c12
-rw-r--r--fs/nilfs2/segbuf.c17
-rw-r--r--fs/nilfs2/segment.c190
-rw-r--r--fs/nilfs2/segment.h2
-rw-r--r--fs/nilfs2/sufile.c274
-rw-r--r--fs/nilfs2/sufile.h4
-rw-r--r--fs/nilfs2/super.c131
-rw-r--r--fs/nilfs2/the_nilfs.c24
-rw-r--r--fs/nilfs2/the_nilfs.h2
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/partitions/check.c8
-rw-r--r--fs/partitions/ldm.c7
-rw-r--r--fs/proc/internal.h8
-rw-r--r--fs/proc/task_mmu.c204
-rw-r--r--fs/pstore/platform.c12
-rw-r--r--fs/quota/dquot.c5
-rw-r--r--fs/splice.c33
-rw-r--r--fs/squashfs/Kconfig4
-rw-r--r--fs/squashfs/cache.c2
-rw-r--r--fs/super.c3
-rw-r--r--fs/sysfs/file.c12
-rw-r--r--fs/sysfs/group.c6
-rw-r--r--fs/timerfd.c102
-rw-r--r--fs/ubifs/budget.c104
-rw-r--r--fs/ubifs/commit.c2
-rw-r--r--fs/ubifs/debug.c167
-rw-r--r--fs/ubifs/debug.h178
-rw-r--r--fs/ubifs/dir.c4
-rw-r--r--fs/ubifs/file.c28
-rw-r--r--fs/ubifs/find.c10
-rw-r--r--fs/ubifs/gc.c71
-rw-r--r--fs/ubifs/io.c33
-rw-r--r--fs/ubifs/journal.c29
-rw-r--r--fs/ubifs/log.c28
-rw-r--r--fs/ubifs/lprops.c115
-rw-r--r--fs/ubifs/lpt_commit.c55
-rw-r--r--fs/ubifs/master.c8
-rw-r--r--fs/ubifs/misc.h17
-rw-r--r--fs/ubifs/orphan.c3
-rw-r--r--fs/ubifs/recovery.c354
-rw-r--r--fs/ubifs/replay.c468
-rw-r--r--fs/ubifs/sb.c153
-rw-r--r--fs/ubifs/super.c46
-rw-r--r--fs/ubifs/tnc.c10
-rw-r--r--fs/ubifs/tnc_commit.c18
-rw-r--r--fs/ubifs/ubifs-media.h30
-rw-r--r--fs/ubifs/ubifs.h86
-rw-r--r--fs/ubifs/xattr.c8
-rw-r--r--fs/ufs/inode.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c26
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_message.c20
-rw-r--r--fs/xfs/linux-2.6/xfs_message.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h76
-rw-r--r--fs/xfs/quota/xfs_qm.c6
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_alloc.c844
-rw-r--r--fs/xfs/xfs_alloc.h15
-rw-r--r--fs/xfs/xfs_alloc_btree.c13
-rw-r--r--fs/xfs/xfs_dfrag.c6
-rw-r--r--fs/xfs/xfs_inode.c4
-rw-r--r--fs/xfs/xfs_inode_item.c1
-rw-r--r--fs/xfs/xfs_log.c15
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_cil.c5
-rw-r--r--fs/xfs/xfs_log_priv.h2
-rw-r--r--fs/xfs/xfs_log_recover.c75
-rw-r--r--fs/xfs/xfs_mount.c4
-rw-r--r--fs/xfs/xfs_trans.c6
-rw-r--r--fs/xfs/xfs_types.h2
193 files changed, 6596 insertions, 5430 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 814ac4e213a8..0a93dc1cb4ac 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -1,6 +1,6 @@
1config 9P_FS 1config 9P_FS
2 tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" 2 tristate "Plan 9 Resource Sharing Support (9P2000)"
3 depends on INET && NET_9P && EXPERIMENTAL 3 depends on INET && NET_9P
4 help 4 help
5 If you say Y here, you will get experimental support for 5 If you say Y here, you will get experimental support for
6 Plan 9 resource sharing via the 9P2000 protocol. 6 Plan 9 resource sharing via the 9P2000 protocol.
@@ -10,7 +10,6 @@ config 9P_FS
10 If unsure, say N. 10 If unsure, say N.
11 11
12if 9P_FS 12if 9P_FS
13
14config 9P_FSCACHE 13config 9P_FSCACHE
15 bool "Enable 9P client caching support (EXPERIMENTAL)" 14 bool "Enable 9P client caching support (EXPERIMENTAL)"
16 depends on EXPERIMENTAL 15 depends on EXPERIMENTAL
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 82a7c38ddad0..691c78f58bef 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -259,7 +259,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
259 if (IS_ERR(inode_fid)) { 259 if (IS_ERR(inode_fid)) {
260 err = PTR_ERR(inode_fid); 260 err = PTR_ERR(inode_fid);
261 mutex_unlock(&v9inode->v_mutex); 261 mutex_unlock(&v9inode->v_mutex);
262 goto error; 262 goto err_clunk_old_fid;
263 } 263 }
264 v9inode->writeback_fid = (void *) inode_fid; 264 v9inode->writeback_fid = (void *) inode_fid;
265 } 265 }
@@ -267,8 +267,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
267 /* Since we are opening a file, assign the open fid to the file */ 267 /* Since we are opening a file, assign the open fid to the file */
268 filp = lookup_instantiate_filp(nd, dentry, generic_file_open); 268 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
269 if (IS_ERR(filp)) { 269 if (IS_ERR(filp)) {
270 p9_client_clunk(ofid); 270 err = PTR_ERR(filp);
271 return PTR_ERR(filp); 271 goto err_clunk_old_fid;
272 } 272 }
273 filp->private_data = ofid; 273 filp->private_data = ofid;
274#ifdef CONFIG_9P_FSCACHE 274#ifdef CONFIG_9P_FSCACHE
@@ -278,10 +278,11 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
278 return 0; 278 return 0;
279 279
280error: 280error:
281 if (ofid)
282 p9_client_clunk(ofid);
283 if (fid) 281 if (fid)
284 p9_client_clunk(fid); 282 p9_client_clunk(fid);
283err_clunk_old_fid:
284 if (ofid)
285 p9_client_clunk(ofid);
285 return err; 286 return err;
286} 287}
287 288
diff --git a/fs/Kconfig b/fs/Kconfig
index f3aa9b08b228..979992dcb386 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -121,9 +121,25 @@ config TMPFS
121 121
122 See <file:Documentation/filesystems/tmpfs.txt> for details. 122 See <file:Documentation/filesystems/tmpfs.txt> for details.
123 123
124config TMPFS_XATTR
125 bool "Tmpfs extended attributes"
126 depends on TMPFS
127 default n
128 help
129 Extended attributes are name:value pairs associated with inodes by
130 the kernel or by users (see the attr(5) manual page, or visit
131 <http://acl.bestbits.at/> for details).
132
133 Currently this enables support for the trusted.* and
134 security.* namespaces.
135
136 If unsure, say N.
137
138 You need this for POSIX ACL support on tmpfs.
139
124config TMPFS_POSIX_ACL 140config TMPFS_POSIX_ACL
125 bool "Tmpfs POSIX Access Control Lists" 141 bool "Tmpfs POSIX Access Control Lists"
126 depends on TMPFS 142 depends on TMPFS_XATTR
127 select GENERIC_ACL 143 select GENERIC_ACL
128 help 144 help
129 POSIX Access Control Lists (ACLs) support permissions for users and 145 POSIX Access Control Lists (ACLs) support permissions for users and
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 397d3057d336..1bffbe0ed778 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -820,6 +820,8 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
820 int res; 820 int res;
821 char buf[16]; 821 char buf[16];
822 822
823 memset(&bprm, 0, sizeof(bprm));
824
823 /* Create the file name */ 825 /* Create the file name */
824 sprintf(buf, "/lib/lib%d.so", id); 826 sprintf(buf, "/lib/lib%d.so", id);
825 827
@@ -835,6 +837,12 @@ static int load_flat_shared_library(int id, struct lib_info *libs)
835 if (!bprm.cred) 837 if (!bprm.cred)
836 goto out; 838 goto out;
837 839
840 /* We don't really care about recalculating credentials at this point
841 * as we're past the point of no return and are dealing with shared
842 * libraries.
843 */
844 bprm.cred_prepared = 1;
845
838 res = prepare_binprm(&bprm); 846 res = prepare_binprm(&bprm);
839 847
840 if (!IS_ERR_VALUE(res)) 848 if (!IS_ERR_VALUE(res))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 257b00e98428..1f2b19978333 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1120,6 +1120,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1120 goto restart; 1120 goto restart;
1121 } 1121 }
1122 } 1122 }
1123
1124 if (!ret && !bdev->bd_openers) {
1125 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1126 bdi = blk_get_backing_dev_info(bdev);
1127 if (bdi == NULL)
1128 bdi = &default_backing_dev_info;
1129 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1130 }
1131
1123 /* 1132 /*
1124 * If the device is invalidated, rescan partition 1133 * If the device is invalidated, rescan partition
1125 * if open succeeded or failed with -ENOMEDIUM. 1134 * if open succeeded or failed with -ENOMEDIUM.
@@ -1130,14 +1139,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1130 rescan_partitions(disk, bdev); 1139 rescan_partitions(disk, bdev);
1131 if (ret) 1140 if (ret)
1132 goto out_clear; 1141 goto out_clear;
1133
1134 if (!bdev->bd_openers) {
1135 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1136 bdi = blk_get_backing_dev_info(bdev);
1137 if (bdi == NULL)
1138 bdi = &default_backing_dev_info;
1139 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1140 }
1141 } else { 1142 } else {
1142 struct block_device *whole; 1143 struct block_device *whole;
1143 whole = bdget_disk(disk, 0); 1144 whole = bdget_disk(disk, 0);
@@ -1237,6 +1238,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1237 res = __blkdev_get(bdev, mode, 0); 1238 res = __blkdev_get(bdev, mode, 0);
1238 1239
1239 if (whole) { 1240 if (whole) {
1241 struct gendisk *disk = whole->bd_disk;
1242
1240 /* finish claiming */ 1243 /* finish claiming */
1241 mutex_lock(&bdev->bd_mutex); 1244 mutex_lock(&bdev->bd_mutex);
1242 spin_lock(&bdev_lock); 1245 spin_lock(&bdev_lock);
@@ -1263,15 +1266,16 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1263 spin_unlock(&bdev_lock); 1266 spin_unlock(&bdev_lock);
1264 1267
1265 /* 1268 /*
1266 * Block event polling for write claims. Any write 1269 * Block event polling for write claims if requested. Any
1267 * holder makes the write_holder state stick until all 1270 * write holder makes the write_holder state stick until
1268 * are released. This is good enough and tracking 1271 * all are released. This is good enough and tracking
1269 * individual writeable reference is too fragile given 1272 * individual writeable reference is too fragile given the
1270 * the way @mode is used in blkdev_get/put(). 1273 * way @mode is used in blkdev_get/put().
1271 */ 1274 */
1272 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { 1275 if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) &&
1276 !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
1273 bdev->bd_write_holder = true; 1277 bdev->bd_write_holder = true;
1274 disk_block_events(bdev->bd_disk); 1278 disk_block_events(disk);
1275 } 1279 }
1276 1280
1277 mutex_unlock(&bdev->bd_mutex); 1281 mutex_unlock(&bdev->bd_mutex);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ba41da59e31b..96fcfa522dab 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -10,6 +10,7 @@
10#include <linux/swap.h> 10#include <linux/swap.h>
11#include <linux/writeback.h> 11#include <linux/writeback.h>
12#include <linux/pagevec.h> 12#include <linux/pagevec.h>
13#include <linux/prefetch.h>
13#include "extent_io.h" 14#include "extent_io.h"
14#include "extent_map.h" 15#include "extent_map.h"
15#include "compat.h" 16#include "compat.h"
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 199a80134312..f340f7c99d09 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -709,7 +709,7 @@ again:
709 WARN_ON(cur->checked); 709 WARN_ON(cur->checked);
710 if (!list_empty(&cur->upper)) { 710 if (!list_empty(&cur->upper)) {
711 /* 711 /*
712 * the backref was added previously when processsing 712 * the backref was added previously when processing
713 * backref of type BTRFS_TREE_BLOCK_REF_KEY 713 * backref of type BTRFS_TREE_BLOCK_REF_KEY
714 */ 714 */
715 BUG_ON(!list_is_singular(&cur->upper)); 715 BUG_ON(!list_is_singular(&cur->upper));
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 7cb0f7f847e4..75c47cd8d086 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -7,6 +7,7 @@ config CIFS
7 select CRYPTO_MD5 7 select CRYPTO_MD5
8 select CRYPTO_HMAC 8 select CRYPTO_HMAC
9 select CRYPTO_ARC4 9 select CRYPTO_ARC4
10 select CRYPTO_DES
10 help 11 help
11 This is the client VFS module for the Common Internet File System 12 This is the client VFS module for the Common Internet File System
12 (CIFS) protocol which is the successor to the Server Message Block 13 (CIFS) protocol which is the successor to the Server Message Block
@@ -152,16 +153,28 @@ config CIFS_ACL
152 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob 153 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob
153 is handed over to the application/caller. 154 is handed over to the application/caller.
154 155
155config CIFS_EXPERIMENTAL 156config CIFS_SMB2
156 bool "CIFS Experimental Features (EXPERIMENTAL)" 157 bool "SMB2 network file system support (EXPERIMENTAL)"
158 depends on EXPERIMENTAL && INET && BROKEN
159 select NLS
160 select KEYS
161 select FSCACHE
162 select DNS_RESOLVER
163
164 help
165 This enables experimental support for the SMB2 (Server Message Block
166 version 2) protocol. The SMB2 protocol is the successor to the
167 popular CIFS and SMB network file sharing protocols. SMB2 is the
168 native file sharing mechanism for recent versions of Windows
169 operating systems (since Vista). SMB2 enablement will eventually
170 allow users better performance, security and features, than would be
171 possible with cifs. Note that smb2 mount options also are simpler
172 (compared to cifs) due to protocol improvements.
173
174 Unless you are a developer or tester, say N.
175
176config CIFS_NFSD_EXPORT
177 bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)"
157 depends on CIFS && EXPERIMENTAL 178 depends on CIFS && EXPERIMENTAL
158 help 179 help
159 Enables cifs features under testing. These features are 180 Allows NFS server to export a CIFS mounted share (nfsd over cifs)
160 experimental and currently include DFS support and directory
161 change notification ie fcntl(F_DNOTIFY), as well as the upcall
162 mechanism which will be used for Kerberos session negotiation
163 and uid remapping. Some of these features also may depend on
164 setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
165 (which is disabled by default). See the file fs/cifs/README
166 for more details. If unsure, say N.
167
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index d87558448e3d..005d524c3a4a 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -4,7 +4,7 @@
4obj-$(CONFIG_CIFS) += cifs.o 4obj-$(CONFIG_CIFS) += cifs.o
5 5
6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ 6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
7 link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ 7 link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
8 cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ 8 cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
9 readdir.o ioctl.o sess.o export.o 9 readdir.o ioctl.o sess.o export.o
10 10
diff --git a/fs/cifs/README b/fs/cifs/README
index 74ab165fc646..4a3ca0e5ca24 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -704,18 +704,6 @@ the start of smb requests and responses can be enabled via:
704 704
705 echo 1 > /proc/fs/cifs/traceSMB 705 echo 1 > /proc/fs/cifs/traceSMB
706 706
707Two other experimental features are under development. To test these
708requires enabling CONFIG_CIFS_EXPERIMENTAL
709
710 cifsacl support needed to retrieve approximated mode bits based on
711 the contents on the CIFS ACL.
712
713 lease support: cifs will check the oplock state before calling into
714 the vfs to see if we can grant a lease on a file.
715
716 DNOTIFY fcntl: needed for support of directory change
717 notification and perhaps later for file leases)
718
719Per share (per client mount) statistics are available in /proc/fs/cifs/Stats 707Per share (per client mount) statistics are available in /proc/fs/cifs/Stats
720if the kernel was configured with cifs statistics enabled. The statistics 708if the kernel was configured with cifs statistics enabled. The statistics
721represent the number of successful (ie non-zero return code from the server) 709represent the number of successful (ie non-zero return code from the server)
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 30d01bc90855..18f4272d9047 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -63,7 +63,7 @@ void cifs_dump_detail(struct smb_hdr *smb)
63 cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d", 63 cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
64 smb->Command, smb->Status.CifsError, 64 smb->Command, smb->Status.CifsError,
65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid); 65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
66 cERROR(1, "smb buf %p len %d", smb, smbCalcSize_LE(smb)); 66 cERROR(1, "smb buf %p len %d", smb, smbCalcSize(smb));
67} 67}
68 68
69 69
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index ac51cd2d33ae..a9d5692e0c20 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -58,9 +58,7 @@ struct cifs_sb_info {
58 unsigned int mnt_cifs_flags; 58 unsigned int mnt_cifs_flags;
59 int prepathlen; 59 int prepathlen;
60 char *prepath; /* relative path under the share to mount to */ 60 char *prepath; /* relative path under the share to mount to */
61#ifdef CONFIG_CIFS_DFS_UPCALL 61 char *mountdata; /* options received at mount time or via DFS refs */
62 char *mountdata; /* mount options received at mount time */
63#endif
64 struct backing_dev_info bdi; 62 struct backing_dev_info bdi;
65 struct delayed_work prune_tlinks; 63 struct delayed_work prune_tlinks;
66}; 64};
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 644dd882a560..6d02fd560566 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -82,6 +82,9 @@ int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *);
82char *cifs_strndup_from_ucs(const char *src, const int maxlen, 82char *cifs_strndup_from_ucs(const char *src, const int maxlen,
83 const bool is_unicode, 83 const bool is_unicode,
84 const struct nls_table *codepage); 84 const struct nls_table *codepage);
85extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
86 const struct nls_table *cp, int mapChars);
87
85#endif 88#endif
86 89
87/* 90/*
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index beeebf194234..f3c6fb9942ac 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -23,24 +23,16 @@
23 23
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/string.h>
27#include <linux/keyctl.h>
28#include <linux/key-type.h>
29#include <keys/user-type.h>
26#include "cifspdu.h" 30#include "cifspdu.h"
27#include "cifsglob.h" 31#include "cifsglob.h"
28#include "cifsacl.h" 32#include "cifsacl.h"
29#include "cifsproto.h" 33#include "cifsproto.h"
30#include "cifs_debug.h" 34#include "cifs_debug.h"
31 35
32
33static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
34 {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
35 {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
36 {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"},
37 {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(18), 0, 0, 0, 0} }, "sys"},
38 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(544), 0, 0, 0} }, "root"},
39 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(545), 0, 0, 0} }, "users"},
40 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(546), 0, 0, 0} }, "guest"} }
41;
42
43
44/* security id for everyone/world system group */ 36/* security id for everyone/world system group */
45static const struct cifs_sid sid_everyone = { 37static const struct cifs_sid sid_everyone = {
46 1, 1, {0, 0, 0, 0, 0, 1}, {0} }; 38 1, 1, {0, 0, 0, 0, 0, 1}, {0} };
@@ -50,50 +42,385 @@ static const struct cifs_sid sid_authusers = {
50/* group users */ 42/* group users */
51static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} }; 43static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
52 44
45const struct cred *root_cred;
53 46
54int match_sid(struct cifs_sid *ctsid) 47static void
48shrink_idmap_tree(struct rb_root *root, int nr_to_scan, int *nr_rem,
49 int *nr_del)
55{ 50{
56 int i, j; 51 struct rb_node *node;
57 int num_subauth, num_sat, num_saw; 52 struct rb_node *tmp;
58 struct cifs_sid *cwsid; 53 struct cifs_sid_id *psidid;
54
55 node = rb_first(root);
56 while (node) {
57 tmp = node;
58 node = rb_next(tmp);
59 psidid = rb_entry(tmp, struct cifs_sid_id, rbnode);
60 if (nr_to_scan == 0 || *nr_del == nr_to_scan)
61 ++(*nr_rem);
62 else {
63 if (time_after(jiffies, psidid->time + SID_MAP_EXPIRE)
64 && psidid->refcount == 0) {
65 rb_erase(tmp, root);
66 ++(*nr_del);
67 } else
68 ++(*nr_rem);
69 }
70 }
71}
72
73/*
74 * Run idmap cache shrinker.
75 */
76static int
77cifs_idmap_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
78{
79 int nr_del = 0;
80 int nr_rem = 0;
81 struct rb_root *root;
82
83 root = &uidtree;
84 spin_lock(&siduidlock);
85 shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del);
86 spin_unlock(&siduidlock);
87
88 root = &gidtree;
89 spin_lock(&sidgidlock);
90 shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del);
91 spin_unlock(&sidgidlock);
92
93 return nr_rem;
94}
95
96static struct shrinker cifs_shrinker = {
97 .shrink = cifs_idmap_shrinker,
98 .seeks = DEFAULT_SEEKS,
99};
100
101static int
102cifs_idmap_key_instantiate(struct key *key, const void *data, size_t datalen)
103{
104 char *payload;
105
106 payload = kmalloc(datalen, GFP_KERNEL);
107 if (!payload)
108 return -ENOMEM;
109
110 memcpy(payload, data, datalen);
111 key->payload.data = payload;
112 return 0;
113}
114
115static inline void
116cifs_idmap_key_destroy(struct key *key)
117{
118 kfree(key->payload.data);
119}
59 120
60 if (!ctsid) 121struct key_type cifs_idmap_key_type = {
61 return -1; 122 .name = "cifs.idmap",
123 .instantiate = cifs_idmap_key_instantiate,
124 .destroy = cifs_idmap_key_destroy,
125 .describe = user_describe,
126 .match = user_match,
127};
128
129static void
130sid_to_str(struct cifs_sid *sidptr, char *sidstr)
131{
132 int i;
133 unsigned long saval;
134 char *strptr;
62 135
63 for (i = 0; i < NUM_WK_SIDS; ++i) { 136 strptr = sidstr;
64 cwsid = &(wksidarr[i].cifssid);
65 137
66 /* compare the revision */ 138 sprintf(strptr, "%s", "S");
67 if (ctsid->revision != cwsid->revision) 139 strptr = sidstr + strlen(sidstr);
68 continue;
69 140
70 /* compare all of the six auth values */ 141 sprintf(strptr, "-%d", sidptr->revision);
71 for (j = 0; j < 6; ++j) { 142 strptr = sidstr + strlen(sidstr);
72 if (ctsid->authority[j] != cwsid->authority[j]) 143
73 break; 144 for (i = 0; i < 6; ++i) {
145 if (sidptr->authority[i]) {
146 sprintf(strptr, "-%d", sidptr->authority[i]);
147 strptr = sidstr + strlen(sidstr);
74 } 148 }
75 if (j < 6) 149 }
76 continue; /* all of the auth values did not match */ 150
77 151 for (i = 0; i < sidptr->num_subauth; ++i) {
78 /* compare all of the subauth values if any */ 152 saval = le32_to_cpu(sidptr->sub_auth[i]);
79 num_sat = ctsid->num_subauth; 153 sprintf(strptr, "-%ld", saval);
80 num_saw = cwsid->num_subauth; 154 strptr = sidstr + strlen(sidstr);
81 num_subauth = num_sat < num_saw ? num_sat : num_saw; 155 }
82 if (num_subauth) { 156}
83 for (j = 0; j < num_subauth; ++j) { 157
84 if (ctsid->sub_auth[j] != cwsid->sub_auth[j]) 158static void
85 break; 159id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
86 } 160 struct cifs_sid_id **psidid, char *typestr)
87 if (j < num_subauth) 161{
88 continue; /* all sub_auth values do not match */ 162 int rc;
163 char *strptr;
164 struct rb_node *node = root->rb_node;
165 struct rb_node *parent = NULL;
166 struct rb_node **linkto = &(root->rb_node);
167 struct cifs_sid_id *lsidid;
168
169 while (node) {
170 lsidid = rb_entry(node, struct cifs_sid_id, rbnode);
171 parent = node;
172 rc = compare_sids(sidptr, &((lsidid)->sid));
173 if (rc > 0) {
174 linkto = &(node->rb_left);
175 node = node->rb_left;
176 } else if (rc < 0) {
177 linkto = &(node->rb_right);
178 node = node->rb_right;
179 }
180 }
181
182 memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid));
183 (*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
184 (*psidid)->refcount = 0;
185
186 sprintf((*psidid)->sidstr, "%s", typestr);
187 strptr = (*psidid)->sidstr + strlen((*psidid)->sidstr);
188 sid_to_str(&(*psidid)->sid, strptr);
189
190 clear_bit(SID_ID_PENDING, &(*psidid)->state);
191 clear_bit(SID_ID_MAPPED, &(*psidid)->state);
192
193 rb_link_node(&(*psidid)->rbnode, parent, linkto);
194 rb_insert_color(&(*psidid)->rbnode, root);
195}
196
197static struct cifs_sid_id *
198id_rb_search(struct rb_root *root, struct cifs_sid *sidptr)
199{
200 int rc;
201 struct rb_node *node = root->rb_node;
202 struct cifs_sid_id *lsidid;
203
204 while (node) {
205 lsidid = rb_entry(node, struct cifs_sid_id, rbnode);
206 rc = compare_sids(sidptr, &((lsidid)->sid));
207 if (rc > 0) {
208 node = node->rb_left;
209 } else if (rc < 0) {
210 node = node->rb_right;
211 } else /* node found */
212 return lsidid;
213 }
214
215 return NULL;
216}
217
218static int
219sidid_pending_wait(void *unused)
220{
221 schedule();
222 return signal_pending(current) ? -ERESTARTSYS : 0;
223}
224
225static int
226sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
227 struct cifs_fattr *fattr, uint sidtype)
228{
229 int rc;
230 unsigned long cid;
231 struct key *idkey;
232 const struct cred *saved_cred;
233 struct cifs_sid_id *psidid, *npsidid;
234 struct rb_root *cidtree;
235 spinlock_t *cidlock;
236
237 if (sidtype == SIDOWNER) {
238 cid = cifs_sb->mnt_uid; /* default uid, in case upcall fails */
239 cidlock = &siduidlock;
240 cidtree = &uidtree;
241 } else if (sidtype == SIDGROUP) {
242 cid = cifs_sb->mnt_gid; /* default gid, in case upcall fails */
243 cidlock = &sidgidlock;
244 cidtree = &gidtree;
245 } else
246 return -ENOENT;
247
248 spin_lock(cidlock);
249 psidid = id_rb_search(cidtree, psid);
250
251 if (!psidid) { /* node does not exist, allocate one & attempt adding */
252 spin_unlock(cidlock);
253 npsidid = kzalloc(sizeof(struct cifs_sid_id), GFP_KERNEL);
254 if (!npsidid)
255 return -ENOMEM;
256
257 npsidid->sidstr = kmalloc(SIDLEN, GFP_KERNEL);
258 if (!npsidid->sidstr) {
259 kfree(npsidid);
260 return -ENOMEM;
261 }
262
263 spin_lock(cidlock);
264 psidid = id_rb_search(cidtree, psid);
265 if (psidid) { /* node happened to get inserted meanwhile */
266 ++psidid->refcount;
267 spin_unlock(cidlock);
268 kfree(npsidid->sidstr);
269 kfree(npsidid);
270 } else {
271 psidid = npsidid;
272 id_rb_insert(cidtree, psid, &psidid,
273 sidtype == SIDOWNER ? "os:" : "gs:");
274 ++psidid->refcount;
275 spin_unlock(cidlock);
89 } 276 }
277 } else {
278 ++psidid->refcount;
279 spin_unlock(cidlock);
280 }
281
282 /*
283 * If we are here, it is safe to access psidid and its fields
284 * since a reference was taken earlier while holding the spinlock.
285 * A reference on the node is put without holding the spinlock
286 * and it is OK to do so in this case, shrinker will not erase
287 * this node until all references are put and we do not access
288 * any fields of the node after a reference is put .
289 */
290 if (test_bit(SID_ID_MAPPED, &psidid->state)) {
291 cid = psidid->id;
292 psidid->time = jiffies; /* update ts for accessing */
293 goto sid_to_id_out;
294 }
90 295
91 cFYI(1, "matching sid: %s\n", wksidarr[i].sidname); 296 if (time_after(psidid->time + SID_MAP_RETRY, jiffies))
92 return 0; /* sids compare/match */ 297 goto sid_to_id_out;
298
299 if (!test_and_set_bit(SID_ID_PENDING, &psidid->state)) {
300 saved_cred = override_creds(root_cred);
301 idkey = request_key(&cifs_idmap_key_type, psidid->sidstr, "");
302 if (IS_ERR(idkey))
303 cFYI(1, "%s: Can't map SID to an id", __func__);
304 else {
305 cid = *(unsigned long *)idkey->payload.value;
306 psidid->id = cid;
307 set_bit(SID_ID_MAPPED, &psidid->state);
308 key_put(idkey);
309 kfree(psidid->sidstr);
310 }
311 revert_creds(saved_cred);
312 psidid->time = jiffies; /* update ts for accessing */
313 clear_bit(SID_ID_PENDING, &psidid->state);
314 wake_up_bit(&psidid->state, SID_ID_PENDING);
315 } else {
316 rc = wait_on_bit(&psidid->state, SID_ID_PENDING,
317 sidid_pending_wait, TASK_INTERRUPTIBLE);
318 if (rc) {
319 cFYI(1, "%s: sidid_pending_wait interrupted %d",
320 __func__, rc);
321 --psidid->refcount; /* decremented without spinlock */
322 return rc;
323 }
324 if (test_bit(SID_ID_MAPPED, &psidid->state))
325 cid = psidid->id;
93 } 326 }
94 327
95 cFYI(1, "No matching sid"); 328sid_to_id_out:
96 return -1; 329 --psidid->refcount; /* decremented without spinlock */
330 if (sidtype == SIDOWNER)
331 fattr->cf_uid = cid;
332 else
333 fattr->cf_gid = cid;
334
335 return 0;
336}
337
338int
339init_cifs_idmap(void)
340{
341 struct cred *cred;
342 struct key *keyring;
343 int ret;
344
345 cFYI(1, "Registering the %s key type\n", cifs_idmap_key_type.name);
346
347 /* create an override credential set with a special thread keyring in
348 * which requests are cached
349 *
350 * this is used to prevent malicious redirections from being installed
351 * with add_key().
352 */
353 cred = prepare_kernel_cred(NULL);
354 if (!cred)
355 return -ENOMEM;
356
357 keyring = key_alloc(&key_type_keyring, ".cifs_idmap", 0, 0, cred,
358 (KEY_POS_ALL & ~KEY_POS_SETATTR) |
359 KEY_USR_VIEW | KEY_USR_READ,
360 KEY_ALLOC_NOT_IN_QUOTA);
361 if (IS_ERR(keyring)) {
362 ret = PTR_ERR(keyring);
363 goto failed_put_cred;
364 }
365
366 ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
367 if (ret < 0)
368 goto failed_put_key;
369
370 ret = register_key_type(&cifs_idmap_key_type);
371 if (ret < 0)
372 goto failed_put_key;
373
374 /* instruct request_key() to use this special keyring as a cache for
375 * the results it looks up */
376 cred->thread_keyring = keyring;
377 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
378 root_cred = cred;
379
380 spin_lock_init(&siduidlock);
381 uidtree = RB_ROOT;
382 spin_lock_init(&sidgidlock);
383 gidtree = RB_ROOT;
384
385 register_shrinker(&cifs_shrinker);
386
387 cFYI(1, "cifs idmap keyring: %d\n", key_serial(keyring));
388 return 0;
389
390failed_put_key:
391 key_put(keyring);
392failed_put_cred:
393 put_cred(cred);
394 return ret;
395}
396
397void
398exit_cifs_idmap(void)
399{
400 key_revoke(root_cred->thread_keyring);
401 unregister_key_type(&cifs_idmap_key_type);
402 put_cred(root_cred);
403 unregister_shrinker(&cifs_shrinker);
404 cFYI(1, "Unregistered %s key type\n", cifs_idmap_key_type.name);
405}
406
407void
408cifs_destroy_idmaptrees(void)
409{
410 struct rb_root *root;
411 struct rb_node *node;
412
413 root = &uidtree;
414 spin_lock(&siduidlock);
415 while ((node = rb_first(root)))
416 rb_erase(node, root);
417 spin_unlock(&siduidlock);
418
419 root = &gidtree;
420 spin_lock(&sidgidlock);
421 while ((node = rb_first(root)))
422 rb_erase(node, root);
423 spin_unlock(&sidgidlock);
97} 424}
98 425
99/* if the two SIDs (roughly equivalent to a UUID for a user or group) are 426/* if the two SIDs (roughly equivalent to a UUID for a user or group) are
@@ -104,16 +431,24 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
104 int num_subauth, num_sat, num_saw; 431 int num_subauth, num_sat, num_saw;
105 432
106 if ((!ctsid) || (!cwsid)) 433 if ((!ctsid) || (!cwsid))
107 return 0; 434 return 1;
108 435
109 /* compare the revision */ 436 /* compare the revision */
110 if (ctsid->revision != cwsid->revision) 437 if (ctsid->revision != cwsid->revision) {
111 return 0; 438 if (ctsid->revision > cwsid->revision)
439 return 1;
440 else
441 return -1;
442 }
112 443
113 /* compare all of the six auth values */ 444 /* compare all of the six auth values */
114 for (i = 0; i < 6; ++i) { 445 for (i = 0; i < 6; ++i) {
115 if (ctsid->authority[i] != cwsid->authority[i]) 446 if (ctsid->authority[i] != cwsid->authority[i]) {
116 return 0; 447 if (ctsid->authority[i] > cwsid->authority[i])
448 return 1;
449 else
450 return -1;
451 }
117 } 452 }
118 453
119 /* compare all of the subauth values if any */ 454 /* compare all of the subauth values if any */
@@ -122,12 +457,16 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
122 num_subauth = num_sat < num_saw ? num_sat : num_saw; 457 num_subauth = num_sat < num_saw ? num_sat : num_saw;
123 if (num_subauth) { 458 if (num_subauth) {
124 for (i = 0; i < num_subauth; ++i) { 459 for (i = 0; i < num_subauth; ++i) {
125 if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) 460 if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) {
126 return 0; 461 if (ctsid->sub_auth[i] > cwsid->sub_auth[i])
462 return 1;
463 else
464 return -1;
465 }
127 } 466 }
128 } 467 }
129 468
130 return 1; /* sids compare/match */ 469 return 0; /* sids compare/match */
131} 470}
132 471
133 472
@@ -382,22 +721,22 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
382#ifdef CONFIG_CIFS_DEBUG2 721#ifdef CONFIG_CIFS_DEBUG2
383 dump_ace(ppace[i], end_of_acl); 722 dump_ace(ppace[i], end_of_acl);
384#endif 723#endif
385 if (compare_sids(&(ppace[i]->sid), pownersid)) 724 if (compare_sids(&(ppace[i]->sid), pownersid) == 0)
386 access_flags_to_mode(ppace[i]->access_req, 725 access_flags_to_mode(ppace[i]->access_req,
387 ppace[i]->type, 726 ppace[i]->type,
388 &fattr->cf_mode, 727 &fattr->cf_mode,
389 &user_mask); 728 &user_mask);
390 if (compare_sids(&(ppace[i]->sid), pgrpsid)) 729 if (compare_sids(&(ppace[i]->sid), pgrpsid) == 0)
391 access_flags_to_mode(ppace[i]->access_req, 730 access_flags_to_mode(ppace[i]->access_req,
392 ppace[i]->type, 731 ppace[i]->type,
393 &fattr->cf_mode, 732 &fattr->cf_mode,
394 &group_mask); 733 &group_mask);
395 if (compare_sids(&(ppace[i]->sid), &sid_everyone)) 734 if (compare_sids(&(ppace[i]->sid), &sid_everyone) == 0)
396 access_flags_to_mode(ppace[i]->access_req, 735 access_flags_to_mode(ppace[i]->access_req,
397 ppace[i]->type, 736 ppace[i]->type,
398 &fattr->cf_mode, 737 &fattr->cf_mode,
399 &other_mask); 738 &other_mask);
400 if (compare_sids(&(ppace[i]->sid), &sid_authusers)) 739 if (compare_sids(&(ppace[i]->sid), &sid_authusers) == 0)
401 access_flags_to_mode(ppace[i]->access_req, 740 access_flags_to_mode(ppace[i]->access_req,
402 ppace[i]->type, 741 ppace[i]->type,
403 &fattr->cf_mode, 742 &fattr->cf_mode,
@@ -475,10 +814,10 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
475 814
476 815
477/* Convert CIFS ACL to POSIX form */ 816/* Convert CIFS ACL to POSIX form */
478static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len, 817static int parse_sec_desc(struct cifs_sb_info *cifs_sb,
479 struct cifs_fattr *fattr) 818 struct cifs_ntsd *pntsd, int acl_len, struct cifs_fattr *fattr)
480{ 819{
481 int rc; 820 int rc = 0;
482 struct cifs_sid *owner_sid_ptr, *group_sid_ptr; 821 struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
483 struct cifs_acl *dacl_ptr; /* no need for SACL ptr */ 822 struct cifs_acl *dacl_ptr; /* no need for SACL ptr */
484 char *end_of_acl = ((char *)pntsd) + acl_len; 823 char *end_of_acl = ((char *)pntsd) + acl_len;
@@ -500,12 +839,26 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
500 le32_to_cpu(pntsd->sacloffset), dacloffset); 839 le32_to_cpu(pntsd->sacloffset), dacloffset);
501/* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */ 840/* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */
502 rc = parse_sid(owner_sid_ptr, end_of_acl); 841 rc = parse_sid(owner_sid_ptr, end_of_acl);
503 if (rc) 842 if (rc) {
843 cFYI(1, "%s: Error %d parsing Owner SID", __func__, rc);
844 return rc;
845 }
846 rc = sid_to_id(cifs_sb, owner_sid_ptr, fattr, SIDOWNER);
847 if (rc) {
848 cFYI(1, "%s: Error %d mapping Owner SID to uid", __func__, rc);
504 return rc; 849 return rc;
850 }
505 851
506 rc = parse_sid(group_sid_ptr, end_of_acl); 852 rc = parse_sid(group_sid_ptr, end_of_acl);
507 if (rc) 853 if (rc) {
854 cFYI(1, "%s: Error %d mapping Owner SID to gid", __func__, rc);
508 return rc; 855 return rc;
856 }
857 rc = sid_to_id(cifs_sb, group_sid_ptr, fattr, SIDGROUP);
858 if (rc) {
859 cFYI(1, "%s: Error %d mapping Group SID to gid", __func__, rc);
860 return rc;
861 }
509 862
510 if (dacloffset) 863 if (dacloffset)
511 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, 864 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr,
@@ -520,7 +873,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
520 memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr, 873 memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr,
521 sizeof(struct cifs_sid)); */ 874 sizeof(struct cifs_sid)); */
522 875
523 return 0; 876 return rc;
524} 877}
525 878
526 879
@@ -688,7 +1041,7 @@ out:
688} 1041}
689 1042
690/* Set an ACL on the server */ 1043/* Set an ACL on the server */
691static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, 1044int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
692 struct inode *inode, const char *path) 1045 struct inode *inode, const char *path)
693{ 1046{
694 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1047 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -727,7 +1080,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
727 rc = PTR_ERR(pntsd); 1080 rc = PTR_ERR(pntsd);
728 cERROR(1, "%s: error %d getting sec desc", __func__, rc); 1081 cERROR(1, "%s: error %d getting sec desc", __func__, rc);
729 } else { 1082 } else {
730 rc = parse_sec_desc(pntsd, acllen, fattr); 1083 rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr);
731 kfree(pntsd); 1084 kfree(pntsd);
732 if (rc) 1085 if (rc)
733 cERROR(1, "parse sec desc failed rc = %d", rc); 1086 cERROR(1, "parse sec desc failed rc = %d", rc);
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index c4ae7d036563..5c902c7ce524 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -39,6 +39,15 @@
39#define ACCESS_ALLOWED 0 39#define ACCESS_ALLOWED 0
40#define ACCESS_DENIED 1 40#define ACCESS_DENIED 1
41 41
42#define SIDOWNER 1
43#define SIDGROUP 2
44#define SIDLEN 150 /* S- 1 revision- 6 authorities- max 5 sub authorities */
45
46#define SID_ID_MAPPED 0
47#define SID_ID_PENDING 1
48#define SID_MAP_EXPIRE (3600 * HZ) /* map entry expires after one hour */
49#define SID_MAP_RETRY (300 * HZ) /* wait 5 minutes for next attempt to map */
50
42struct cifs_ntsd { 51struct cifs_ntsd {
43 __le16 revision; /* revision level */ 52 __le16 revision; /* revision level */
44 __le16 type; 53 __le16 type;
@@ -74,7 +83,21 @@ struct cifs_wksid {
74 char sidname[SIDNAMELENGTH]; 83 char sidname[SIDNAMELENGTH];
75} __attribute__((packed)); 84} __attribute__((packed));
76 85
77extern int match_sid(struct cifs_sid *); 86struct cifs_sid_id {
87 unsigned int refcount; /* increment with spinlock, decrement without */
88 unsigned long id;
89 unsigned long time;
90 unsigned long state;
91 char *sidstr;
92 struct rb_node rbnode;
93 struct cifs_sid sid;
94};
95
96#ifdef __KERNEL__
97extern struct key_type cifs_idmap_key_type;
98extern const struct cred *root_cred;
99#endif /* KERNEL */
100
78extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *); 101extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *);
79 102
80#endif /* _CIFSACL_H */ 103#endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index d1a016be73ba..45c3f78c8f81 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -60,7 +60,7 @@ static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
60 server->session_key.response, server->session_key.len); 60 server->session_key.response, server->session_key.len);
61 61
62 crypto_shash_update(&server->secmech.sdescmd5->shash, 62 crypto_shash_update(&server->secmech.sdescmd5->shash,
63 cifs_pdu->Protocol, cifs_pdu->smb_buf_length); 63 cifs_pdu->Protocol, be32_to_cpu(cifs_pdu->smb_buf_length));
64 64
65 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); 65 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
66 66
@@ -268,10 +268,11 @@ int setup_ntlm_response(struct cifsSesInfo *ses)
268} 268}
269 269
270#ifdef CONFIG_CIFS_WEAK_PW_HASH 270#ifdef CONFIG_CIFS_WEAK_PW_HASH
271void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, 271int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
272 char *lnm_session_key) 272 char *lnm_session_key)
273{ 273{
274 int i; 274 int i;
275 int rc;
275 char password_with_pad[CIFS_ENCPWD_SIZE]; 276 char password_with_pad[CIFS_ENCPWD_SIZE];
276 277
277 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); 278 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
@@ -282,7 +283,7 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
282 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE); 283 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE);
283 memcpy(lnm_session_key, password_with_pad, 284 memcpy(lnm_session_key, password_with_pad,
284 CIFS_ENCPWD_SIZE); 285 CIFS_ENCPWD_SIZE);
285 return; 286 return 0;
286 } 287 }
287 288
288 /* calculate old style session key */ 289 /* calculate old style session key */
@@ -299,10 +300,9 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
299 for (i = 0; i < CIFS_ENCPWD_SIZE; i++) 300 for (i = 0; i < CIFS_ENCPWD_SIZE; i++)
300 password_with_pad[i] = toupper(password_with_pad[i]); 301 password_with_pad[i] = toupper(password_with_pad[i]);
301 302
302 SMBencrypt(password_with_pad, cryptkey, lnm_session_key); 303 rc = SMBencrypt(password_with_pad, cryptkey, lnm_session_key);
303 304
304 /* clear password before we return/free memory */ 305 return rc;
305 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
306} 306}
307#endif /* CIFS_WEAK_PW_HASH */ 307#endif /* CIFS_WEAK_PW_HASH */
308 308
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5c412b33cd7c..493b74ca5648 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -128,29 +128,22 @@ cifs_read_super(struct super_block *sb, void *data,
128 } 128 }
129 cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages; 129 cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
130 130
131#ifdef CONFIG_CIFS_DFS_UPCALL 131 /*
132 /* copy mount params to sb for use in submounts */ 132 * Copy mount params to sb for use in submounts. Better to do
133 /* BB: should we move this after the mount so we 133 * the copy here and deal with the error before cleanup gets
134 * do not have to do the copy on failed mounts? 134 * complicated post-mount.
135 * BB: May be it is better to do simple copy before 135 */
136 * complex operation (mount), and in case of fail
137 * just exit instead of doing mount and attempting
138 * undo it if this copy fails?*/
139 if (data) { 136 if (data) {
140 int len = strlen(data); 137 cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
141 cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL);
142 if (cifs_sb->mountdata == NULL) { 138 if (cifs_sb->mountdata == NULL) {
143 bdi_destroy(&cifs_sb->bdi); 139 bdi_destroy(&cifs_sb->bdi);
144 kfree(sb->s_fs_info); 140 kfree(sb->s_fs_info);
145 sb->s_fs_info = NULL; 141 sb->s_fs_info = NULL;
146 return -ENOMEM; 142 return -ENOMEM;
147 } 143 }
148 strncpy(cifs_sb->mountdata, data, len + 1);
149 cifs_sb->mountdata[len] = '\0';
150 } 144 }
151#endif
152 145
153 rc = cifs_mount(sb, cifs_sb, data, devname); 146 rc = cifs_mount(sb, cifs_sb, devname);
154 147
155 if (rc) { 148 if (rc) {
156 if (!silent) 149 if (!silent)
@@ -163,7 +156,7 @@ cifs_read_super(struct super_block *sb, void *data,
163 sb->s_bdi = &cifs_sb->bdi; 156 sb->s_bdi = &cifs_sb->bdi;
164 sb->s_blocksize = CIFS_MAX_MSGSIZE; 157 sb->s_blocksize = CIFS_MAX_MSGSIZE;
165 sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ 158 sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */
166 inode = cifs_root_iget(sb, ROOT_I); 159 inode = cifs_root_iget(sb);
167 160
168 if (IS_ERR(inode)) { 161 if (IS_ERR(inode)) {
169 rc = PTR_ERR(inode); 162 rc = PTR_ERR(inode);
@@ -184,12 +177,12 @@ cifs_read_super(struct super_block *sb, void *data,
184 else 177 else
185 sb->s_d_op = &cifs_dentry_ops; 178 sb->s_d_op = &cifs_dentry_ops;
186 179
187#ifdef CONFIG_CIFS_EXPERIMENTAL 180#ifdef CIFS_NFSD_EXPORT
188 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { 181 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
189 cFYI(1, "export ops supported"); 182 cFYI(1, "export ops supported");
190 sb->s_export_op = &cifs_export_ops; 183 sb->s_export_op = &cifs_export_ops;
191 } 184 }
192#endif /* EXPERIMENTAL */ 185#endif /* CIFS_NFSD_EXPORT */
193 186
194 return 0; 187 return 0;
195 188
@@ -202,12 +195,10 @@ out_no_root:
202 195
203out_mount_failed: 196out_mount_failed:
204 if (cifs_sb) { 197 if (cifs_sb) {
205#ifdef CONFIG_CIFS_DFS_UPCALL
206 if (cifs_sb->mountdata) { 198 if (cifs_sb->mountdata) {
207 kfree(cifs_sb->mountdata); 199 kfree(cifs_sb->mountdata);
208 cifs_sb->mountdata = NULL; 200 cifs_sb->mountdata = NULL;
209 } 201 }
210#endif
211 unload_nls(cifs_sb->local_nls); 202 unload_nls(cifs_sb->local_nls);
212 bdi_destroy(&cifs_sb->bdi); 203 bdi_destroy(&cifs_sb->bdi);
213 kfree(cifs_sb); 204 kfree(cifs_sb);
@@ -231,12 +222,10 @@ cifs_put_super(struct super_block *sb)
231 rc = cifs_umount(sb, cifs_sb); 222 rc = cifs_umount(sb, cifs_sb);
232 if (rc) 223 if (rc)
233 cERROR(1, "cifs_umount failed with return code %d", rc); 224 cERROR(1, "cifs_umount failed with return code %d", rc);
234#ifdef CONFIG_CIFS_DFS_UPCALL
235 if (cifs_sb->mountdata) { 225 if (cifs_sb->mountdata) {
236 kfree(cifs_sb->mountdata); 226 kfree(cifs_sb->mountdata);
237 cifs_sb->mountdata = NULL; 227 cifs_sb->mountdata = NULL;
238 } 228 }
239#endif
240 229
241 unload_nls(cifs_sb->local_nls); 230 unload_nls(cifs_sb->local_nls);
242 bdi_destroy(&cifs_sb->bdi); 231 bdi_destroy(&cifs_sb->bdi);
@@ -618,16 +607,31 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
618{ 607{
619 /* origin == SEEK_END => we must revalidate the cached file length */ 608 /* origin == SEEK_END => we must revalidate the cached file length */
620 if (origin == SEEK_END) { 609 if (origin == SEEK_END) {
621 int retval; 610 int rc;
622 611 struct inode *inode = file->f_path.dentry->d_inode;
623 /* some applications poll for the file length in this strange 612
624 way so we must seek to end on non-oplocked files by 613 /*
625 setting the revalidate time to zero */ 614 * We need to be sure that all dirty pages are written and the
626 CIFS_I(file->f_path.dentry->d_inode)->time = 0; 615 * server has the newest file length.
627 616 */
628 retval = cifs_revalidate_file(file); 617 if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
629 if (retval < 0) 618 inode->i_mapping->nrpages != 0) {
630 return (loff_t)retval; 619 rc = filemap_fdatawait(inode->i_mapping);
620 if (rc) {
621 mapping_set_error(inode->i_mapping, rc);
622 return rc;
623 }
624 }
625 /*
626 * Some applications poll for the file length in this strange
627 * way so we must seek to end on non-oplocked files by
628 * setting the revalidate time to zero.
629 */
630 CIFS_I(inode)->time = 0;
631
632 rc = cifs_revalidate_file_attr(file);
633 if (rc < 0)
634 return (loff_t)rc;
631 } 635 }
632 return generic_file_llseek_unlocked(file, offset, origin); 636 return generic_file_llseek_unlocked(file, offset, origin);
633} 637}
@@ -760,10 +764,11 @@ const struct file_operations cifs_file_strict_ops = {
760}; 764};
761 765
762const struct file_operations cifs_file_direct_ops = { 766const struct file_operations cifs_file_direct_ops = {
763 /* no aio, no readv - 767 /* BB reevaluate whether they can be done with directio, no cache */
764 BB reevaluate whether they can be done with directio, no cache */ 768 .read = do_sync_read,
765 .read = cifs_user_read, 769 .write = do_sync_write,
766 .write = cifs_user_write, 770 .aio_read = cifs_user_readv,
771 .aio_write = cifs_user_writev,
767 .open = cifs_open, 772 .open = cifs_open,
768 .release = cifs_close, 773 .release = cifs_close,
769 .lock = cifs_lock, 774 .lock = cifs_lock,
@@ -815,10 +820,11 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
815}; 820};
816 821
817const struct file_operations cifs_file_direct_nobrl_ops = { 822const struct file_operations cifs_file_direct_nobrl_ops = {
818 /* no mmap, no aio, no readv - 823 /* BB reevaluate whether they can be done with directio, no cache */
819 BB reevaluate whether they can be done with directio, no cache */ 824 .read = do_sync_read,
820 .read = cifs_user_read, 825 .write = do_sync_write,
821 .write = cifs_user_write, 826 .aio_read = cifs_user_readv,
827 .aio_write = cifs_user_writev,
822 .open = cifs_open, 828 .open = cifs_open,
823 .release = cifs_close, 829 .release = cifs_close,
824 .fsync = cifs_fsync, 830 .fsync = cifs_fsync,
@@ -981,10 +987,10 @@ init_cifs(void)
981 int rc = 0; 987 int rc = 0;
982 cifs_proc_init(); 988 cifs_proc_init();
983 INIT_LIST_HEAD(&cifs_tcp_ses_list); 989 INIT_LIST_HEAD(&cifs_tcp_ses_list);
984#ifdef CONFIG_CIFS_EXPERIMENTAL 990#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
985 INIT_LIST_HEAD(&GlobalDnotifyReqList); 991 INIT_LIST_HEAD(&GlobalDnotifyReqList);
986 INIT_LIST_HEAD(&GlobalDnotifyRsp_Q); 992 INIT_LIST_HEAD(&GlobalDnotifyRsp_Q);
987#endif 993#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
988/* 994/*
989 * Initialize Global counters 995 * Initialize Global counters
990 */ 996 */
@@ -1033,22 +1039,33 @@ init_cifs(void)
1033 if (rc) 1039 if (rc)
1034 goto out_destroy_mids; 1040 goto out_destroy_mids;
1035 1041
1036 rc = register_filesystem(&cifs_fs_type);
1037 if (rc)
1038 goto out_destroy_request_bufs;
1039#ifdef CONFIG_CIFS_UPCALL 1042#ifdef CONFIG_CIFS_UPCALL
1040 rc = register_key_type(&cifs_spnego_key_type); 1043 rc = register_key_type(&cifs_spnego_key_type);
1041 if (rc) 1044 if (rc)
1042 goto out_unregister_filesystem; 1045 goto out_destroy_request_bufs;
1043#endif 1046#endif /* CONFIG_CIFS_UPCALL */
1047
1048#ifdef CONFIG_CIFS_ACL
1049 rc = init_cifs_idmap();
1050 if (rc)
1051 goto out_register_key_type;
1052#endif /* CONFIG_CIFS_ACL */
1053
1054 rc = register_filesystem(&cifs_fs_type);
1055 if (rc)
1056 goto out_init_cifs_idmap;
1044 1057
1045 return 0; 1058 return 0;
1046 1059
1047#ifdef CONFIG_CIFS_UPCALL 1060out_init_cifs_idmap:
1048out_unregister_filesystem: 1061#ifdef CONFIG_CIFS_ACL
1049 unregister_filesystem(&cifs_fs_type); 1062 exit_cifs_idmap();
1063out_register_key_type:
1050#endif 1064#endif
1065#ifdef CONFIG_CIFS_UPCALL
1066 unregister_key_type(&cifs_spnego_key_type);
1051out_destroy_request_bufs: 1067out_destroy_request_bufs:
1068#endif
1052 cifs_destroy_request_bufs(); 1069 cifs_destroy_request_bufs();
1053out_destroy_mids: 1070out_destroy_mids:
1054 cifs_destroy_mids(); 1071 cifs_destroy_mids();
@@ -1070,6 +1087,10 @@ exit_cifs(void)
1070#ifdef CONFIG_CIFS_DFS_UPCALL 1087#ifdef CONFIG_CIFS_DFS_UPCALL
1071 cifs_dfs_release_automount_timer(); 1088 cifs_dfs_release_automount_timer();
1072#endif 1089#endif
1090#ifdef CONFIG_CIFS_ACL
1091 cifs_destroy_idmaptrees();
1092 exit_cifs_idmap();
1093#endif
1073#ifdef CONFIG_CIFS_UPCALL 1094#ifdef CONFIG_CIFS_UPCALL
1074 unregister_key_type(&cifs_spnego_key_type); 1095 unregister_key_type(&cifs_spnego_key_type);
1075#endif 1096#endif
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index a9371b6578c0..64313f778ebf 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -47,7 +47,7 @@ extern void cifs_sb_deactive(struct super_block *sb);
47 47
48/* Functions related to inodes */ 48/* Functions related to inodes */
49extern const struct inode_operations cifs_dir_inode_ops; 49extern const struct inode_operations cifs_dir_inode_ops;
50extern struct inode *cifs_root_iget(struct super_block *, unsigned long); 50extern struct inode *cifs_root_iget(struct super_block *);
51extern int cifs_create(struct inode *, struct dentry *, int, 51extern int cifs_create(struct inode *, struct dentry *, int,
52 struct nameidata *); 52 struct nameidata *);
53extern struct dentry *cifs_lookup(struct inode *, struct dentry *, 53extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
@@ -59,9 +59,11 @@ extern int cifs_mkdir(struct inode *, struct dentry *, int);
59extern int cifs_rmdir(struct inode *, struct dentry *); 59extern int cifs_rmdir(struct inode *, struct dentry *);
60extern int cifs_rename(struct inode *, struct dentry *, struct inode *, 60extern int cifs_rename(struct inode *, struct dentry *, struct inode *,
61 struct dentry *); 61 struct dentry *);
62extern int cifs_revalidate_file_attr(struct file *filp);
63extern int cifs_revalidate_dentry_attr(struct dentry *);
62extern int cifs_revalidate_file(struct file *filp); 64extern int cifs_revalidate_file(struct file *filp);
63extern int cifs_revalidate_dentry(struct dentry *); 65extern int cifs_revalidate_dentry(struct dentry *);
64extern void cifs_invalidate_mapping(struct inode *inode); 66extern int cifs_invalidate_mapping(struct inode *inode);
65extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 67extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
66extern int cifs_setattr(struct dentry *, struct iattr *); 68extern int cifs_setattr(struct dentry *, struct iattr *);
67 69
@@ -80,12 +82,12 @@ extern const struct file_operations cifs_file_strict_nobrl_ops;
80extern int cifs_open(struct inode *inode, struct file *file); 82extern int cifs_open(struct inode *inode, struct file *file);
81extern int cifs_close(struct inode *inode, struct file *file); 83extern int cifs_close(struct inode *inode, struct file *file);
82extern int cifs_closedir(struct inode *inode, struct file *file); 84extern int cifs_closedir(struct inode *inode, struct file *file);
83extern ssize_t cifs_user_read(struct file *file, char __user *read_data, 85extern ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
84 size_t read_size, loff_t *poffset); 86 unsigned long nr_segs, loff_t pos);
85extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, 87extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
86 unsigned long nr_segs, loff_t pos); 88 unsigned long nr_segs, loff_t pos);
87extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, 89extern ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
88 size_t write_size, loff_t *poffset); 90 unsigned long nr_segs, loff_t pos);
89extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, 91extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
90 unsigned long nr_segs, loff_t pos); 92 unsigned long nr_segs, loff_t pos);
91extern int cifs_lock(struct file *, int, struct file_lock *); 93extern int cifs_lock(struct file *, int, struct file_lock *);
@@ -123,9 +125,9 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
123extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); 125extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
124extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); 126extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
125 127
126#ifdef CONFIG_CIFS_EXPERIMENTAL 128#ifdef CIFS_NFSD_EXPORT
127extern const struct export_operations cifs_export_ops; 129extern const struct export_operations cifs_export_ops;
128#endif /* EXPERIMENTAL */ 130#endif /* CIFS_NFSD_EXPORT */
129 131
130#define CIFS_VERSION "1.71" 132#define CIFS_VERSION "1.72"
131#endif /* _CIFSFS_H */ 133#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a5d1106fcbde..76b4517e74b0 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -274,7 +274,8 @@ struct cifsSesInfo {
274 int capabilities; 274 int capabilities;
275 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for 275 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
276 TCP names - will ipv6 and sctp addresses fit? */ 276 TCP names - will ipv6 and sctp addresses fit? */
277 char *user_name; 277 char *user_name; /* must not be null except during init of sess
278 and after mount option parsing we fill it */
278 char *domainName; 279 char *domainName;
279 char *password; 280 char *password;
280 struct session_key auth_key; 281 struct session_key auth_key;
@@ -780,10 +781,12 @@ GLOBAL_EXTERN spinlock_t cifs_tcp_ses_lock;
780 */ 781 */
781GLOBAL_EXTERN spinlock_t cifs_file_list_lock; 782GLOBAL_EXTERN spinlock_t cifs_file_list_lock;
782 783
784#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
783/* Outstanding dir notify requests */ 785/* Outstanding dir notify requests */
784GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; 786GLOBAL_EXTERN struct list_head GlobalDnotifyReqList;
785/* DirNotify response queue */ 787/* DirNotify response queue */
786GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q; 788GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q;
789#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
787 790
788/* 791/*
789 * Global transaction id (XID) information 792 * Global transaction id (XID) information
@@ -830,6 +833,11 @@ GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
830/* reconnect after this many failed echo attempts */ 833/* reconnect after this many failed echo attempts */
831GLOBAL_EXTERN unsigned short echo_retries; 834GLOBAL_EXTERN unsigned short echo_retries;
832 835
836GLOBAL_EXTERN struct rb_root uidtree;
837GLOBAL_EXTERN struct rb_root gidtree;
838GLOBAL_EXTERN spinlock_t siduidlock;
839GLOBAL_EXTERN spinlock_t sidgidlock;
840
833void cifs_oplock_break(struct work_struct *work); 841void cifs_oplock_break(struct work_struct *work);
834void cifs_oplock_break_get(struct cifsFileInfo *cfile); 842void cifs_oplock_break_get(struct cifsFileInfo *cfile);
835void cifs_oplock_break_put(struct cifsFileInfo *cfile); 843void cifs_oplock_break_put(struct cifsFileInfo *cfile);
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b5c8cc5d7a7f..de3aa285de03 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -397,9 +397,9 @@
397#define GETU32(var) (*((__u32 *)var)) /* BB check for endian issues */ 397#define GETU32(var) (*((__u32 *)var)) /* BB check for endian issues */
398 398
399struct smb_hdr { 399struct smb_hdr {
400 __u32 smb_buf_length; /* big endian on wire *//* BB length is only two 400 __be32 smb_buf_length; /* BB length is only two (rarely three) bytes,
401 or three bytes - with one or two byte type preceding it that are 401 with one or two byte "type" preceding it that will be
402 zero - we could mask the type byte off just in case BB */ 402 zero - we could mask the type byte off */
403 __u8 Protocol[4]; 403 __u8 Protocol[4];
404 __u8 Command; 404 __u8 Command;
405 union { 405 union {
@@ -428,43 +428,28 @@ struct smb_hdr {
428 __u8 WordCount; 428 __u8 WordCount;
429} __attribute__((packed)); 429} __attribute__((packed));
430 430
431/* given a pointer to an smb_hdr retrieve a char pointer to the byte count */ 431/* given a pointer to an smb_hdr, retrieve a void pointer to the ByteCount */
432#define BCC(smb_var) ((unsigned char *)(smb_var) + sizeof(struct smb_hdr) + \ 432static inline void *
433 (2 * (smb_var)->WordCount)) 433BCC(struct smb_hdr *smb)
434{
435 return (void *)smb + sizeof(*smb) + 2 * smb->WordCount;
436}
434 437
435/* given a pointer to an smb_hdr retrieve the pointer to the byte area */ 438/* given a pointer to an smb_hdr retrieve the pointer to the byte area */
436#define pByteArea(smb_var) (BCC(smb_var) + 2) 439#define pByteArea(smb_var) (BCC(smb_var) + 2)
437 440
438/* get the converted ByteCount for a SMB packet and return it */
439static inline __u16
440get_bcc(struct smb_hdr *hdr)
441{
442 __u16 *bc_ptr = (__u16 *)BCC(hdr);
443
444 return get_unaligned(bc_ptr);
445}
446
447/* get the unconverted ByteCount for a SMB packet and return it */ 441/* get the unconverted ByteCount for a SMB packet and return it */
448static inline __u16 442static inline __u16
449get_bcc_le(struct smb_hdr *hdr) 443get_bcc(struct smb_hdr *hdr)
450{ 444{
451 __le16 *bc_ptr = (__le16 *)BCC(hdr); 445 __le16 *bc_ptr = (__le16 *)BCC(hdr);
452 446
453 return get_unaligned_le16(bc_ptr); 447 return get_unaligned_le16(bc_ptr);
454} 448}
455 449
456/* set the ByteCount for a SMB packet in host-byte order */
457static inline void
458put_bcc(__u16 count, struct smb_hdr *hdr)
459{
460 __u16 *bc_ptr = (__u16 *)BCC(hdr);
461
462 put_unaligned(count, bc_ptr);
463}
464
465/* set the ByteCount for a SMB packet in little-endian */ 450/* set the ByteCount for a SMB packet in little-endian */
466static inline void 451static inline void
467put_bcc_le(__u16 count, struct smb_hdr *hdr) 452put_bcc(__u16 count, struct smb_hdr *hdr)
468{ 453{
469 __le16 *bc_ptr = (__le16 *)BCC(hdr); 454 __le16 *bc_ptr = (__le16 *)BCC(hdr);
470 455
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 8096f27ad9a8..6e69e06a30b3 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -53,6 +53,9 @@ do { \
53 cFYI(1, "CIFS VFS: leaving %s (xid = %d) rc = %d", \ 53 cFYI(1, "CIFS VFS: leaving %s (xid = %d) rc = %d", \
54 __func__, curr_xid, (int)rc); \ 54 __func__, curr_xid, (int)rc); \
55} while (0) 55} while (0)
56extern int init_cifs_idmap(void);
57extern void exit_cifs_idmap(void);
58extern void cifs_destroy_idmaptrees(void);
56extern char *build_path_from_dentry(struct dentry *); 59extern char *build_path_from_dentry(struct dentry *);
57extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb, 60extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
58 struct cifsTconInfo *tcon); 61 struct cifsTconInfo *tcon);
@@ -90,7 +93,6 @@ extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
90extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); 93extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
91extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); 94extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
92extern unsigned int smbCalcSize(struct smb_hdr *ptr); 95extern unsigned int smbCalcSize(struct smb_hdr *ptr);
93extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
94extern int decode_negTokenInit(unsigned char *security_blob, int length, 96extern int decode_negTokenInit(unsigned char *security_blob, int length,
95 struct TCP_Server_Info *server); 97 struct TCP_Server_Info *server);
96extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); 98extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
@@ -143,8 +145,10 @@ extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
143extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64); 145extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64);
144extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *, 146extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
145 const char *, u32 *); 147 const char *, u32 *);
148extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *,
149 const char *);
146 150
147extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, 151extern int cifs_mount(struct super_block *, struct cifs_sb_info *,
148 const char *); 152 const char *);
149extern int cifs_umount(struct super_block *, struct cifs_sb_info *); 153extern int cifs_umount(struct super_block *, struct cifs_sb_info *);
150extern void cifs_dfs_release_automount_timer(void); 154extern void cifs_dfs_release_automount_timer(void);
@@ -304,12 +308,13 @@ extern int CIFSSMBUnixQuerySymLink(const int xid,
304 struct cifsTconInfo *tcon, 308 struct cifsTconInfo *tcon,
305 const unsigned char *searchName, char **syminfo, 309 const unsigned char *searchName, char **syminfo,
306 const struct nls_table *nls_codepage); 310 const struct nls_table *nls_codepage);
311#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
307extern int CIFSSMBQueryReparseLinkInfo(const int xid, 312extern int CIFSSMBQueryReparseLinkInfo(const int xid,
308 struct cifsTconInfo *tcon, 313 struct cifsTconInfo *tcon,
309 const unsigned char *searchName, 314 const unsigned char *searchName,
310 char *symlinkinfo, const int buflen, __u16 fid, 315 char *symlinkinfo, const int buflen, __u16 fid,
311 const struct nls_table *nls_codepage); 316 const struct nls_table *nls_codepage);
312 317#endif /* temporarily unused until cifs_symlink fixed */
313extern int CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon, 318extern int CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon,
314 const char *fileName, const int disposition, 319 const char *fileName, const int disposition,
315 const int access_flags, const int omode, 320 const int access_flags, const int omode,
@@ -348,8 +353,6 @@ extern int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
348 const unsigned char *searchName, __u64 *inode_number, 353 const unsigned char *searchName, __u64 *inode_number,
349 const struct nls_table *nls_codepage, 354 const struct nls_table *nls_codepage,
350 int remap_special_chars); 355 int remap_special_chars);
351extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
352 const struct nls_table *cp, int mapChars);
353 356
354extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, 357extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
355 const __u16 netfid, const __u64 len, 358 const __u16 netfid, const __u64 len,
@@ -383,9 +386,15 @@ extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
383extern int calc_seckey(struct cifsSesInfo *); 386extern int calc_seckey(struct cifsSesInfo *);
384 387
385#ifdef CONFIG_CIFS_WEAK_PW_HASH 388#ifdef CONFIG_CIFS_WEAK_PW_HASH
386extern void calc_lanman_hash(const char *password, const char *cryptkey, 389extern int calc_lanman_hash(const char *password, const char *cryptkey,
387 bool encrypt, char *lnm_session_key); 390 bool encrypt, char *lnm_session_key);
388#endif /* CIFS_WEAK_PW_HASH */ 391#endif /* CIFS_WEAK_PW_HASH */
392#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
393extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
394 const int notify_subdirs, const __u16 netfid,
395 __u32 filter, struct file *file, int multishot,
396 const struct nls_table *nls_codepage);
397#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
389extern int CIFSSMBCopy(int xid, 398extern int CIFSSMBCopy(int xid,
390 struct cifsTconInfo *source_tcon, 399 struct cifsTconInfo *source_tcon,
391 const char *fromName, 400 const char *fromName,
@@ -393,10 +402,6 @@ extern int CIFSSMBCopy(int xid,
393 const char *toName, const int flags, 402 const char *toName, const int flags,
394 const struct nls_table *nls_codepage, 403 const struct nls_table *nls_codepage,
395 int remap_special_chars); 404 int remap_special_chars);
396extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
397 const int notify_subdirs, const __u16 netfid,
398 __u32 filter, struct file *file, int multishot,
399 const struct nls_table *nls_codepage);
400extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon, 405extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon,
401 const unsigned char *searchName, 406 const unsigned char *searchName,
402 const unsigned char *ea_name, char *EAData, 407 const unsigned char *ea_name, char *EAData,
@@ -427,9 +432,6 @@ extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr,
427 struct cifs_sb_info *cifs_sb, int xid); 432 struct cifs_sb_info *cifs_sb, int xid);
428extern int mdfour(unsigned char *, unsigned char *, int); 433extern int mdfour(unsigned char *, unsigned char *, int);
429extern int E_md4hash(const unsigned char *passwd, unsigned char *p16); 434extern int E_md4hash(const unsigned char *passwd, unsigned char *p16);
430extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8, 435extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
431 unsigned char *p24);
432extern void E_P16(unsigned char *p14, unsigned char *p16);
433extern void E_P24(unsigned char *p21, const unsigned char *c8,
434 unsigned char *p24); 436 unsigned char *p24);
435#endif /* _CIFSPROTO_H */ 437#endif /* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index df959bae6728..83df937b814e 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -339,12 +339,13 @@ static int validate_t2(struct smb_t2_rsp *pSMB)
339 get_unaligned_le16(&pSMB->t2_rsp.DataOffset) > 1024) 339 get_unaligned_le16(&pSMB->t2_rsp.DataOffset) > 1024)
340 goto vt2_err; 340 goto vt2_err;
341 341
342 /* check that bcc is at least as big as parms + data */
343 /* check that bcc is less than negotiated smb buffer */
344 total_size = get_unaligned_le16(&pSMB->t2_rsp.ParameterCount); 342 total_size = get_unaligned_le16(&pSMB->t2_rsp.ParameterCount);
345 if (total_size >= 512) 343 if (total_size >= 512)
346 goto vt2_err; 344 goto vt2_err;
347 345
346 /* check that bcc is at least as big as parms + data, and that it is
347 * less than negotiated smb buffer
348 */
348 total_size += get_unaligned_le16(&pSMB->t2_rsp.DataCount); 349 total_size += get_unaligned_le16(&pSMB->t2_rsp.DataCount);
349 if (total_size > get_bcc(&pSMB->hdr) || 350 if (total_size > get_bcc(&pSMB->hdr) ||
350 total_size >= CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) 351 total_size >= CIFSMaxBufSize + MAX_CIFS_HDR_SIZE)
@@ -357,6 +358,13 @@ vt2_err:
357 return -EINVAL; 358 return -EINVAL;
358} 359}
359 360
361static inline void inc_rfc1001_len(void *pSMB, int count)
362{
363 struct smb_hdr *hdr = (struct smb_hdr *)pSMB;
364
365 be32_add_cpu(&hdr->smb_buf_length, count);
366}
367
360int 368int
361CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) 369CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
362{ 370{
@@ -409,7 +417,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
409 count += strlen(protocols[i].name) + 1; 417 count += strlen(protocols[i].name) + 1;
410 /* null at end of source and target buffers anyway */ 418 /* null at end of source and target buffers anyway */
411 } 419 }
412 pSMB->hdr.smb_buf_length += count; 420 inc_rfc1001_len(pSMB, count);
413 pSMB->ByteCount = cpu_to_le16(count); 421 pSMB->ByteCount = cpu_to_le16(count);
414 422
415 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, 423 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
@@ -541,10 +549,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
541 server->secType = RawNTLMSSP; 549 server->secType = RawNTLMSSP;
542 else if (secFlags & CIFSSEC_MAY_LANMAN) 550 else if (secFlags & CIFSSEC_MAY_LANMAN)
543 server->secType = LANMAN; 551 server->secType = LANMAN;
544/* #ifdef CONFIG_CIFS_EXPERIMENTAL
545 else if (secFlags & CIFSSEC_MAY_PLNTXT)
546 server->secType = ??
547#endif */
548 else { 552 else {
549 rc = -EOPNOTSUPP; 553 rc = -EOPNOTSUPP;
550 cERROR(1, "Invalid security type"); 554 cERROR(1, "Invalid security type");
@@ -578,7 +582,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
578 582
579 if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) && 583 if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) &&
580 (server->capabilities & CAP_EXTENDED_SECURITY)) { 584 (server->capabilities & CAP_EXTENDED_SECURITY)) {
581 count = pSMBr->ByteCount; 585 count = get_bcc(&pSMBr->hdr);
582 if (count < 16) { 586 if (count < 16) {
583 rc = -EIO; 587 rc = -EIO;
584 goto neg_err_exit; 588 goto neg_err_exit;
@@ -732,9 +736,9 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
732 smb->hdr.Tid = 0xffff; 736 smb->hdr.Tid = 0xffff;
733 smb->hdr.WordCount = 1; 737 smb->hdr.WordCount = 1;
734 put_unaligned_le16(1, &smb->EchoCount); 738 put_unaligned_le16(1, &smb->EchoCount);
735 put_bcc_le(1, &smb->hdr); 739 put_bcc(1, &smb->hdr);
736 smb->Data[0] = 'a'; 740 smb->Data[0] = 'a';
737 smb->hdr.smb_buf_length += 3; 741 inc_rfc1001_len(smb, 3);
738 742
739 rc = cifs_call_async(server, (struct smb_hdr *)smb, 743 rc = cifs_call_async(server, (struct smb_hdr *)smb,
740 cifs_echo_callback, server); 744 cifs_echo_callback, server);
@@ -852,7 +856,7 @@ PsxDelete:
852 pSMB->TotalParameterCount = pSMB->ParameterCount; 856 pSMB->TotalParameterCount = pSMB->ParameterCount;
853 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_UNLINK); 857 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_UNLINK);
854 pSMB->Reserved4 = 0; 858 pSMB->Reserved4 = 0;
855 pSMB->hdr.smb_buf_length += byte_count; 859 inc_rfc1001_len(pSMB, byte_count);
856 pSMB->ByteCount = cpu_to_le16(byte_count); 860 pSMB->ByteCount = cpu_to_le16(byte_count);
857 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 861 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
858 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 862 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -898,7 +902,7 @@ DelFileRetry:
898 pSMB->SearchAttributes = 902 pSMB->SearchAttributes =
899 cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM); 903 cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM);
900 pSMB->BufferFormat = 0x04; 904 pSMB->BufferFormat = 0x04;
901 pSMB->hdr.smb_buf_length += name_len + 1; 905 inc_rfc1001_len(pSMB, name_len + 1);
902 pSMB->ByteCount = cpu_to_le16(name_len + 1); 906 pSMB->ByteCount = cpu_to_le16(name_len + 1);
903 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 907 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
904 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 908 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -942,7 +946,7 @@ RmDirRetry:
942 } 946 }
943 947
944 pSMB->BufferFormat = 0x04; 948 pSMB->BufferFormat = 0x04;
945 pSMB->hdr.smb_buf_length += name_len + 1; 949 inc_rfc1001_len(pSMB, name_len + 1);
946 pSMB->ByteCount = cpu_to_le16(name_len + 1); 950 pSMB->ByteCount = cpu_to_le16(name_len + 1);
947 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 951 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
948 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 952 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -985,7 +989,7 @@ MkDirRetry:
985 } 989 }
986 990
987 pSMB->BufferFormat = 0x04; 991 pSMB->BufferFormat = 0x04;
988 pSMB->hdr.smb_buf_length += name_len + 1; 992 inc_rfc1001_len(pSMB, name_len + 1);
989 pSMB->ByteCount = cpu_to_le16(name_len + 1); 993 pSMB->ByteCount = cpu_to_le16(name_len + 1);
990 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 994 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
991 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 995 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -1063,7 +1067,7 @@ PsxCreat:
1063 pSMB->TotalParameterCount = pSMB->ParameterCount; 1067 pSMB->TotalParameterCount = pSMB->ParameterCount;
1064 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_OPEN); 1068 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_OPEN);
1065 pSMB->Reserved4 = 0; 1069 pSMB->Reserved4 = 0;
1066 pSMB->hdr.smb_buf_length += byte_count; 1070 inc_rfc1001_len(pSMB, byte_count);
1067 pSMB->ByteCount = cpu_to_le16(byte_count); 1071 pSMB->ByteCount = cpu_to_le16(byte_count);
1068 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 1072 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
1069 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 1073 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -1075,7 +1079,7 @@ PsxCreat:
1075 cFYI(1, "copying inode info"); 1079 cFYI(1, "copying inode info");
1076 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 1080 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
1077 1081
1078 if (rc || (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP))) { 1082 if (rc || get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)) {
1079 rc = -EIO; /* bad smb */ 1083 rc = -EIO; /* bad smb */
1080 goto psx_create_err; 1084 goto psx_create_err;
1081 } 1085 }
@@ -1096,7 +1100,7 @@ PsxCreat:
1096 pRetData->Type = cpu_to_le32(-1); /* unknown */ 1100 pRetData->Type = cpu_to_le32(-1); /* unknown */
1097 cFYI(DBG2, "unknown type"); 1101 cFYI(DBG2, "unknown type");
1098 } else { 1102 } else {
1099 if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP) 1103 if (get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)
1100 + sizeof(FILE_UNIX_BASIC_INFO)) { 1104 + sizeof(FILE_UNIX_BASIC_INFO)) {
1101 cERROR(1, "Open response data too small"); 1105 cERROR(1, "Open response data too small");
1102 pRetData->Type = cpu_to_le32(-1); 1106 pRetData->Type = cpu_to_le32(-1);
@@ -1228,7 +1232,7 @@ OldOpenRetry:
1228 pSMB->Sattr = cpu_to_le16(ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY); 1232 pSMB->Sattr = cpu_to_le16(ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY);
1229 pSMB->OpenFunction = cpu_to_le16(convert_disposition(openDisposition)); 1233 pSMB->OpenFunction = cpu_to_le16(convert_disposition(openDisposition));
1230 count += name_len; 1234 count += name_len;
1231 pSMB->hdr.smb_buf_length += count; 1235 inc_rfc1001_len(pSMB, count);
1232 1236
1233 pSMB->ByteCount = cpu_to_le16(count); 1237 pSMB->ByteCount = cpu_to_le16(count);
1234 /* long_op set to 1 to allow for oplock break timeouts */ 1238 /* long_op set to 1 to allow for oplock break timeouts */
@@ -1341,7 +1345,7 @@ openRetry:
1341 SECURITY_CONTEXT_TRACKING | SECURITY_EFFECTIVE_ONLY; 1345 SECURITY_CONTEXT_TRACKING | SECURITY_EFFECTIVE_ONLY;
1342 1346
1343 count += name_len; 1347 count += name_len;
1344 pSMB->hdr.smb_buf_length += count; 1348 inc_rfc1001_len(pSMB, count);
1345 1349
1346 pSMB->ByteCount = cpu_to_le16(count); 1350 pSMB->ByteCount = cpu_to_le16(count);
1347 /* long_op set to 1 to allow for oplock break timeouts */ 1351 /* long_op set to 1 to allow for oplock break timeouts */
@@ -1426,7 +1430,7 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1426 } 1430 }
1427 1431
1428 iov[0].iov_base = (char *)pSMB; 1432 iov[0].iov_base = (char *)pSMB;
1429 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; 1433 iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
1430 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */, 1434 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
1431 &resp_buf_type, CIFS_LOG_ERROR); 1435 &resp_buf_type, CIFS_LOG_ERROR);
1432 cifs_stats_inc(&tcon->num_reads); 1436 cifs_stats_inc(&tcon->num_reads);
@@ -1560,7 +1564,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1560 1564
1561 pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF); 1565 pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF);
1562 pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16); 1566 pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16);
1563 pSMB->hdr.smb_buf_length += byte_count; 1567 inc_rfc1001_len(pSMB, byte_count);
1564 1568
1565 if (wct == 14) 1569 if (wct == 14)
1566 pSMB->ByteCount = cpu_to_le16(byte_count); 1570 pSMB->ByteCount = cpu_to_le16(byte_count);
@@ -1644,11 +1648,12 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1644 1648
1645 pSMB->DataLengthLow = cpu_to_le16(count & 0xFFFF); 1649 pSMB->DataLengthLow = cpu_to_le16(count & 0xFFFF);
1646 pSMB->DataLengthHigh = cpu_to_le16(count >> 16); 1650 pSMB->DataLengthHigh = cpu_to_le16(count >> 16);
1647 smb_hdr_len = pSMB->hdr.smb_buf_length + 1; /* hdr + 1 byte pad */ 1651 /* header + 1 byte pad */
1652 smb_hdr_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 1;
1648 if (wct == 14) 1653 if (wct == 14)
1649 pSMB->hdr.smb_buf_length += count+1; 1654 inc_rfc1001_len(pSMB, count + 1);
1650 else /* wct == 12 */ 1655 else /* wct == 12 */
1651 pSMB->hdr.smb_buf_length += count+5; /* smb data starts later */ 1656 inc_rfc1001_len(pSMB, count + 5); /* smb data starts later */
1652 if (wct == 14) 1657 if (wct == 14)
1653 pSMB->ByteCount = cpu_to_le16(count + 1); 1658 pSMB->ByteCount = cpu_to_le16(count + 1);
1654 else /* wct == 12 */ /* bigger pad, smaller smb hdr, keep offset ok */ { 1659 else /* wct == 12 */ /* bigger pad, smaller smb hdr, keep offset ok */ {
@@ -1748,7 +1753,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1748 /* oplock break */ 1753 /* oplock break */
1749 count = 0; 1754 count = 0;
1750 } 1755 }
1751 pSMB->hdr.smb_buf_length += count; 1756 inc_rfc1001_len(pSMB, count);
1752 pSMB->ByteCount = cpu_to_le16(count); 1757 pSMB->ByteCount = cpu_to_le16(count);
1753 1758
1754 if (waitFlag) { 1759 if (waitFlag) {
@@ -1839,14 +1844,14 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1839 pSMB->Fid = smb_file_id; 1844 pSMB->Fid = smb_file_id;
1840 pSMB->InformationLevel = cpu_to_le16(SMB_SET_POSIX_LOCK); 1845 pSMB->InformationLevel = cpu_to_le16(SMB_SET_POSIX_LOCK);
1841 pSMB->Reserved4 = 0; 1846 pSMB->Reserved4 = 0;
1842 pSMB->hdr.smb_buf_length += byte_count; 1847 inc_rfc1001_len(pSMB, byte_count);
1843 pSMB->ByteCount = cpu_to_le16(byte_count); 1848 pSMB->ByteCount = cpu_to_le16(byte_count);
1844 if (waitFlag) { 1849 if (waitFlag) {
1845 rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, 1850 rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB,
1846 (struct smb_hdr *) pSMBr, &bytes_returned); 1851 (struct smb_hdr *) pSMBr, &bytes_returned);
1847 } else { 1852 } else {
1848 iov[0].iov_base = (char *)pSMB; 1853 iov[0].iov_base = (char *)pSMB;
1849 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; 1854 iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
1850 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */, 1855 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
1851 &resp_buf_type, timeout); 1856 &resp_buf_type, timeout);
1852 pSMB = NULL; /* request buf already freed by SendReceive2. Do 1857 pSMB = NULL; /* request buf already freed by SendReceive2. Do
@@ -1862,7 +1867,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1862 __u16 data_count; 1867 __u16 data_count;
1863 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 1868 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
1864 1869
1865 if (rc || (pSMBr->ByteCount < sizeof(struct cifs_posix_lock))) { 1870 if (rc || get_bcc(&pSMBr->hdr) < sizeof(*parm_data)) {
1866 rc = -EIO; /* bad smb */ 1871 rc = -EIO; /* bad smb */
1867 goto plk_err_exit; 1872 goto plk_err_exit;
1868 } 1873 }
@@ -2012,7 +2017,7 @@ renameRetry:
2012 } 2017 }
2013 2018
2014 count = 1 /* 1st signature byte */ + name_len + name_len2; 2019 count = 1 /* 1st signature byte */ + name_len + name_len2;
2015 pSMB->hdr.smb_buf_length += count; 2020 inc_rfc1001_len(pSMB, count);
2016 pSMB->ByteCount = cpu_to_le16(count); 2021 pSMB->ByteCount = cpu_to_le16(count);
2017 2022
2018 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2023 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2092,7 +2097,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
2092 pSMB->InformationLevel = 2097 pSMB->InformationLevel =
2093 cpu_to_le16(SMB_SET_FILE_RENAME_INFORMATION); 2098 cpu_to_le16(SMB_SET_FILE_RENAME_INFORMATION);
2094 pSMB->Reserved4 = 0; 2099 pSMB->Reserved4 = 0;
2095 pSMB->hdr.smb_buf_length += byte_count; 2100 inc_rfc1001_len(pSMB, byte_count);
2096 pSMB->ByteCount = cpu_to_le16(byte_count); 2101 pSMB->ByteCount = cpu_to_le16(byte_count);
2097 rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB, 2102 rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB,
2098 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2103 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2159,7 +2164,7 @@ copyRetry:
2159 } 2164 }
2160 2165
2161 count = 1 /* 1st signature byte */ + name_len + name_len2; 2166 count = 1 /* 1st signature byte */ + name_len + name_len2;
2162 pSMB->hdr.smb_buf_length += count; 2167 inc_rfc1001_len(pSMB, count);
2163 pSMB->ByteCount = cpu_to_le16(count); 2168 pSMB->ByteCount = cpu_to_le16(count);
2164 2169
2165 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2170 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2249,7 +2254,7 @@ createSymLinkRetry:
2249 pSMB->DataOffset = cpu_to_le16(offset); 2254 pSMB->DataOffset = cpu_to_le16(offset);
2250 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_LINK); 2255 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_LINK);
2251 pSMB->Reserved4 = 0; 2256 pSMB->Reserved4 = 0;
2252 pSMB->hdr.smb_buf_length += byte_count; 2257 inc_rfc1001_len(pSMB, byte_count);
2253 pSMB->ByteCount = cpu_to_le16(byte_count); 2258 pSMB->ByteCount = cpu_to_le16(byte_count);
2254 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2259 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2255 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2260 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2335,7 +2340,7 @@ createHardLinkRetry:
2335 pSMB->DataOffset = cpu_to_le16(offset); 2340 pSMB->DataOffset = cpu_to_le16(offset);
2336 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_HLINK); 2341 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_HLINK);
2337 pSMB->Reserved4 = 0; 2342 pSMB->Reserved4 = 0;
2338 pSMB->hdr.smb_buf_length += byte_count; 2343 inc_rfc1001_len(pSMB, byte_count);
2339 pSMB->ByteCount = cpu_to_le16(byte_count); 2344 pSMB->ByteCount = cpu_to_le16(byte_count);
2340 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2345 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2341 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2346 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2406,7 +2411,7 @@ winCreateHardLinkRetry:
2406 } 2411 }
2407 2412
2408 count = 1 /* string type byte */ + name_len + name_len2; 2413 count = 1 /* string type byte */ + name_len + name_len2;
2409 pSMB->hdr.smb_buf_length += count; 2414 inc_rfc1001_len(pSMB, count);
2410 pSMB->ByteCount = cpu_to_le16(count); 2415 pSMB->ByteCount = cpu_to_le16(count);
2411 2416
2412 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2417 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2477,7 +2482,7 @@ querySymLinkRetry:
2477 pSMB->ParameterCount = pSMB->TotalParameterCount; 2482 pSMB->ParameterCount = pSMB->TotalParameterCount;
2478 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_LINK); 2483 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_LINK);
2479 pSMB->Reserved4 = 0; 2484 pSMB->Reserved4 = 0;
2480 pSMB->hdr.smb_buf_length += byte_count; 2485 inc_rfc1001_len(pSMB, byte_count);
2481 pSMB->ByteCount = cpu_to_le16(byte_count); 2486 pSMB->ByteCount = cpu_to_le16(byte_count);
2482 2487
2483 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2488 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2489,7 +2494,7 @@ querySymLinkRetry:
2489 2494
2490 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 2495 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
2491 /* BB also check enough total bytes returned */ 2496 /* BB also check enough total bytes returned */
2492 if (rc || (pSMBr->ByteCount < 2)) 2497 if (rc || get_bcc(&pSMBr->hdr) < 2)
2493 rc = -EIO; 2498 rc = -EIO;
2494 else { 2499 else {
2495 bool is_unicode; 2500 bool is_unicode;
@@ -2516,7 +2521,17 @@ querySymLinkRetry:
2516 return rc; 2521 return rc;
2517} 2522}
2518 2523
2519#ifdef CONFIG_CIFS_EXPERIMENTAL 2524#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
2525/*
2526 * Recent Windows versions now create symlinks more frequently
2527 * and they use the "reparse point" mechanism below. We can of course
2528 * do symlinks nicely to Samba and other servers which support the
2529 * CIFS Unix Extensions and we can also do SFU symlinks and "client only"
2530 * "MF" symlinks optionally, but for recent Windows we really need to
2531 * reenable the code below and fix the cifs_symlink callers to handle this.
2532 * In the interim this code has been moved to its own config option so
2533 * it is not compiled in by default until callers fixed up and more tested.
2534 */
2520int 2535int
2521CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, 2536CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2522 const unsigned char *searchName, 2537 const unsigned char *searchName,
@@ -2561,14 +2576,14 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2561 } else { /* decode response */ 2576 } else { /* decode response */
2562 __u32 data_offset = le32_to_cpu(pSMBr->DataOffset); 2577 __u32 data_offset = le32_to_cpu(pSMBr->DataOffset);
2563 __u32 data_count = le32_to_cpu(pSMBr->DataCount); 2578 __u32 data_count = le32_to_cpu(pSMBr->DataCount);
2564 if ((pSMBr->ByteCount < 2) || (data_offset > 512)) { 2579 if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) {
2565 /* BB also check enough total bytes returned */ 2580 /* BB also check enough total bytes returned */
2566 rc = -EIO; /* bad smb */ 2581 rc = -EIO; /* bad smb */
2567 goto qreparse_out; 2582 goto qreparse_out;
2568 } 2583 }
2569 if (data_count && (data_count < 2048)) { 2584 if (data_count && (data_count < 2048)) {
2570 char *end_of_smb = 2 /* sizeof byte count */ + 2585 char *end_of_smb = 2 /* sizeof byte count */ +
2571 pSMBr->ByteCount + (char *)&pSMBr->ByteCount; 2586 get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount;
2572 2587
2573 struct reparse_data *reparse_buf = 2588 struct reparse_data *reparse_buf =
2574 (struct reparse_data *) 2589 (struct reparse_data *)
@@ -2618,7 +2633,7 @@ qreparse_out:
2618 2633
2619 return rc; 2634 return rc;
2620} 2635}
2621#endif /* CIFS_EXPERIMENTAL */ 2636#endif /* CIFS_SYMLINK_EXPERIMENTAL */ /* BB temporarily unused */
2622 2637
2623#ifdef CONFIG_CIFS_POSIX 2638#ifdef CONFIG_CIFS_POSIX
2624 2639
@@ -2814,7 +2829,7 @@ queryAclRetry:
2814 pSMB->ParameterCount = pSMB->TotalParameterCount; 2829 pSMB->ParameterCount = pSMB->TotalParameterCount;
2815 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_ACL); 2830 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_ACL);
2816 pSMB->Reserved4 = 0; 2831 pSMB->Reserved4 = 0;
2817 pSMB->hdr.smb_buf_length += byte_count; 2832 inc_rfc1001_len(pSMB, byte_count);
2818 pSMB->ByteCount = cpu_to_le16(byte_count); 2833 pSMB->ByteCount = cpu_to_le16(byte_count);
2819 2834
2820 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2835 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2826,8 +2841,8 @@ queryAclRetry:
2826 /* decode response */ 2841 /* decode response */
2827 2842
2828 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 2843 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
2829 if (rc || (pSMBr->ByteCount < 2))
2830 /* BB also check enough total bytes returned */ 2844 /* BB also check enough total bytes returned */
2845 if (rc || get_bcc(&pSMBr->hdr) < 2)
2831 rc = -EIO; /* bad smb */ 2846 rc = -EIO; /* bad smb */
2832 else { 2847 else {
2833 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 2848 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -2908,7 +2923,7 @@ setAclRetry:
2908 pSMB->ParameterCount = cpu_to_le16(params); 2923 pSMB->ParameterCount = cpu_to_le16(params);
2909 pSMB->TotalParameterCount = pSMB->ParameterCount; 2924 pSMB->TotalParameterCount = pSMB->ParameterCount;
2910 pSMB->Reserved4 = 0; 2925 pSMB->Reserved4 = 0;
2911 pSMB->hdr.smb_buf_length += byte_count; 2926 inc_rfc1001_len(pSMB, byte_count);
2912 pSMB->ByteCount = cpu_to_le16(byte_count); 2927 pSMB->ByteCount = cpu_to_le16(byte_count);
2913 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2928 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2914 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2929 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2966,7 +2981,7 @@ GetExtAttrRetry:
2966 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_ATTR_FLAGS); 2981 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_ATTR_FLAGS);
2967 pSMB->Pad = 0; 2982 pSMB->Pad = 0;
2968 pSMB->Fid = netfid; 2983 pSMB->Fid = netfid;
2969 pSMB->hdr.smb_buf_length += byte_count; 2984 inc_rfc1001_len(pSMB, byte_count);
2970 pSMB->t2.ByteCount = cpu_to_le16(byte_count); 2985 pSMB->t2.ByteCount = cpu_to_le16(byte_count);
2971 2986
2972 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2987 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2976,8 +2991,8 @@ GetExtAttrRetry:
2976 } else { 2991 } else {
2977 /* decode response */ 2992 /* decode response */
2978 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 2993 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
2979 if (rc || (pSMBr->ByteCount < 2))
2980 /* BB also check enough total bytes returned */ 2994 /* BB also check enough total bytes returned */
2995 if (rc || get_bcc(&pSMBr->hdr) < 2)
2981 /* If rc should we check for EOPNOSUPP and 2996 /* If rc should we check for EOPNOSUPP and
2982 disable the srvino flag? or in caller? */ 2997 disable the srvino flag? or in caller? */
2983 rc = -EIO; /* bad smb */ 2998 rc = -EIO; /* bad smb */
@@ -3052,6 +3067,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
3052 char *end_of_smb; 3067 char *end_of_smb;
3053 __u32 data_count, data_offset, parm_count, parm_offset; 3068 __u32 data_count, data_offset, parm_count, parm_offset;
3054 struct smb_com_ntransact_rsp *pSMBr; 3069 struct smb_com_ntransact_rsp *pSMBr;
3070 u16 bcc;
3055 3071
3056 *pdatalen = 0; 3072 *pdatalen = 0;
3057 *pparmlen = 0; 3073 *pparmlen = 0;
@@ -3061,8 +3077,8 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
3061 3077
3062 pSMBr = (struct smb_com_ntransact_rsp *)buf; 3078 pSMBr = (struct smb_com_ntransact_rsp *)buf;
3063 3079
3064 /* ByteCount was converted from little endian in SendReceive */ 3080 bcc = get_bcc(&pSMBr->hdr);
3065 end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount + 3081 end_of_smb = 2 /* sizeof byte count */ + bcc +
3066 (char *)&pSMBr->ByteCount; 3082 (char *)&pSMBr->ByteCount;
3067 3083
3068 data_offset = le32_to_cpu(pSMBr->DataOffset); 3084 data_offset = le32_to_cpu(pSMBr->DataOffset);
@@ -3088,7 +3104,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
3088 *ppdata, data_count, (data_count + *ppdata), 3104 *ppdata, data_count, (data_count + *ppdata),
3089 end_of_smb, pSMBr); 3105 end_of_smb, pSMBr);
3090 return -EINVAL; 3106 return -EINVAL;
3091 } else if (parm_count + data_count > pSMBr->ByteCount) { 3107 } else if (parm_count + data_count > bcc) {
3092 cFYI(1, "parm count and data count larger than SMB"); 3108 cFYI(1, "parm count and data count larger than SMB");
3093 return -EINVAL; 3109 return -EINVAL;
3094 } 3110 }
@@ -3124,9 +3140,9 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3124 pSMB->AclFlags = cpu_to_le32(CIFS_ACL_OWNER | CIFS_ACL_GROUP | 3140 pSMB->AclFlags = cpu_to_le32(CIFS_ACL_OWNER | CIFS_ACL_GROUP |
3125 CIFS_ACL_DACL); 3141 CIFS_ACL_DACL);
3126 pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */ 3142 pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */
3127 pSMB->hdr.smb_buf_length += 11; 3143 inc_rfc1001_len(pSMB, 11);
3128 iov[0].iov_base = (char *)pSMB; 3144 iov[0].iov_base = (char *)pSMB;
3129 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; 3145 iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
3130 3146
3131 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type, 3147 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type,
3132 0); 3148 0);
@@ -3235,10 +3251,9 @@ setCifsAclRetry:
3235 memcpy((char *) &pSMBr->hdr.Protocol + data_offset, 3251 memcpy((char *) &pSMBr->hdr.Protocol + data_offset,
3236 (char *) pntsd, 3252 (char *) pntsd,
3237 acllen); 3253 acllen);
3238 pSMB->hdr.smb_buf_length += (byte_count + data_count); 3254 inc_rfc1001_len(pSMB, byte_count + data_count);
3239
3240 } else 3255 } else
3241 pSMB->hdr.smb_buf_length += byte_count; 3256 inc_rfc1001_len(pSMB, byte_count);
3242 3257
3243 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3258 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3244 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3259 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3289,7 +3304,7 @@ QInfRetry:
3289 } 3304 }
3290 pSMB->BufferFormat = 0x04; 3305 pSMB->BufferFormat = 0x04;
3291 name_len++; /* account for buffer type byte */ 3306 name_len++; /* account for buffer type byte */
3292 pSMB->hdr.smb_buf_length += (__u16) name_len; 3307 inc_rfc1001_len(pSMB, (__u16)name_len);
3293 pSMB->ByteCount = cpu_to_le16(name_len); 3308 pSMB->ByteCount = cpu_to_le16(name_len);
3294 3309
3295 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3310 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3364,7 +3379,7 @@ QFileInfoRetry:
3364 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO); 3379 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
3365 pSMB->Pad = 0; 3380 pSMB->Pad = 0;
3366 pSMB->Fid = netfid; 3381 pSMB->Fid = netfid;
3367 pSMB->hdr.smb_buf_length += byte_count; 3382 inc_rfc1001_len(pSMB, byte_count);
3368 3383
3369 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3384 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3370 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3385 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3375,7 +3390,7 @@ QFileInfoRetry:
3375 3390
3376 if (rc) /* BB add auto retry on EOPNOTSUPP? */ 3391 if (rc) /* BB add auto retry on EOPNOTSUPP? */
3377 rc = -EIO; 3392 rc = -EIO;
3378 else if (pSMBr->ByteCount < 40) 3393 else if (get_bcc(&pSMBr->hdr) < 40)
3379 rc = -EIO; /* bad smb */ 3394 rc = -EIO; /* bad smb */
3380 else if (pFindData) { 3395 else if (pFindData) {
3381 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 3396 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -3451,7 +3466,7 @@ QPathInfoRetry:
3451 else 3466 else
3452 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO); 3467 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
3453 pSMB->Reserved4 = 0; 3468 pSMB->Reserved4 = 0;
3454 pSMB->hdr.smb_buf_length += byte_count; 3469 inc_rfc1001_len(pSMB, byte_count);
3455 pSMB->ByteCount = cpu_to_le16(byte_count); 3470 pSMB->ByteCount = cpu_to_le16(byte_count);
3456 3471
3457 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3472 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3463,9 +3478,9 @@ QPathInfoRetry:
3463 3478
3464 if (rc) /* BB add auto retry on EOPNOTSUPP? */ 3479 if (rc) /* BB add auto retry on EOPNOTSUPP? */
3465 rc = -EIO; 3480 rc = -EIO;
3466 else if (!legacy && (pSMBr->ByteCount < 40)) 3481 else if (!legacy && get_bcc(&pSMBr->hdr) < 40)
3467 rc = -EIO; /* bad smb */ 3482 rc = -EIO; /* bad smb */
3468 else if (legacy && (pSMBr->ByteCount < 24)) 3483 else if (legacy && get_bcc(&pSMBr->hdr) < 24)
3469 rc = -EIO; /* 24 or 26 expected but we do not read 3484 rc = -EIO; /* 24 or 26 expected but we do not read
3470 last field */ 3485 last field */
3471 else if (pFindData) { 3486 else if (pFindData) {
@@ -3532,7 +3547,7 @@ UnixQFileInfoRetry:
3532 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC); 3547 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
3533 pSMB->Pad = 0; 3548 pSMB->Pad = 0;
3534 pSMB->Fid = netfid; 3549 pSMB->Fid = netfid;
3535 pSMB->hdr.smb_buf_length += byte_count; 3550 inc_rfc1001_len(pSMB, byte_count);
3536 3551
3537 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3552 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3538 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3553 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3541,7 +3556,7 @@ UnixQFileInfoRetry:
3541 } else { /* decode response */ 3556 } else { /* decode response */
3542 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3557 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3543 3558
3544 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) { 3559 if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
3545 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n" 3560 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
3546 "Unix Extensions can be disabled on mount " 3561 "Unix Extensions can be disabled on mount "
3547 "by specifying the nosfu mount option."); 3562 "by specifying the nosfu mount option.");
@@ -3617,7 +3632,7 @@ UnixQPathInfoRetry:
3617 pSMB->ParameterCount = pSMB->TotalParameterCount; 3632 pSMB->ParameterCount = pSMB->TotalParameterCount;
3618 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC); 3633 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
3619 pSMB->Reserved4 = 0; 3634 pSMB->Reserved4 = 0;
3620 pSMB->hdr.smb_buf_length += byte_count; 3635 inc_rfc1001_len(pSMB, byte_count);
3621 pSMB->ByteCount = cpu_to_le16(byte_count); 3636 pSMB->ByteCount = cpu_to_le16(byte_count);
3622 3637
3623 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3638 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3627,7 +3642,7 @@ UnixQPathInfoRetry:
3627 } else { /* decode response */ 3642 } else { /* decode response */
3628 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3643 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3629 3644
3630 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) { 3645 if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
3631 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n" 3646 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
3632 "Unix Extensions can be disabled on mount " 3647 "Unix Extensions can be disabled on mount "
3633 "by specifying the nosfu mount option."); 3648 "by specifying the nosfu mount option.");
@@ -3731,7 +3746,7 @@ findFirstRetry:
3731 3746
3732 /* BB what should we set StorageType to? Does it matter? BB */ 3747 /* BB what should we set StorageType to? Does it matter? BB */
3733 pSMB->SearchStorageType = 0; 3748 pSMB->SearchStorageType = 0;
3734 pSMB->hdr.smb_buf_length += byte_count; 3749 inc_rfc1001_len(pSMB, byte_count);
3735 pSMB->ByteCount = cpu_to_le16(byte_count); 3750 pSMB->ByteCount = cpu_to_le16(byte_count);
3736 3751
3737 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3752 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3860,7 +3875,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
3860 byte_count = params + 1 /* pad */ ; 3875 byte_count = params + 1 /* pad */ ;
3861 pSMB->TotalParameterCount = cpu_to_le16(params); 3876 pSMB->TotalParameterCount = cpu_to_le16(params);
3862 pSMB->ParameterCount = pSMB->TotalParameterCount; 3877 pSMB->ParameterCount = pSMB->TotalParameterCount;
3863 pSMB->hdr.smb_buf_length += byte_count; 3878 inc_rfc1001_len(pSMB, byte_count);
3864 pSMB->ByteCount = cpu_to_le16(byte_count); 3879 pSMB->ByteCount = cpu_to_le16(byte_count);
3865 3880
3866 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3881 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4022,7 +4037,7 @@ GetInodeNumberRetry:
4022 pSMB->ParameterCount = pSMB->TotalParameterCount; 4037 pSMB->ParameterCount = pSMB->TotalParameterCount;
4023 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_INTERNAL_INFO); 4038 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_INTERNAL_INFO);
4024 pSMB->Reserved4 = 0; 4039 pSMB->Reserved4 = 0;
4025 pSMB->hdr.smb_buf_length += byte_count; 4040 inc_rfc1001_len(pSMB, byte_count);
4026 pSMB->ByteCount = cpu_to_le16(byte_count); 4041 pSMB->ByteCount = cpu_to_le16(byte_count);
4027 4042
4028 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4043 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4032,8 +4047,8 @@ GetInodeNumberRetry:
4032 } else { 4047 } else {
4033 /* decode response */ 4048 /* decode response */
4034 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4049 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4035 if (rc || (pSMBr->ByteCount < 2))
4036 /* BB also check enough total bytes returned */ 4050 /* BB also check enough total bytes returned */
4051 if (rc || get_bcc(&pSMBr->hdr) < 2)
4037 /* If rc should we check for EOPNOSUPP and 4052 /* If rc should we check for EOPNOSUPP and
4038 disable the srvino flag? or in caller? */ 4053 disable the srvino flag? or in caller? */
4039 rc = -EIO; /* bad smb */ 4054 rc = -EIO; /* bad smb */
@@ -4246,7 +4261,7 @@ getDFSRetry:
4246 pSMB->ParameterCount = cpu_to_le16(params); 4261 pSMB->ParameterCount = cpu_to_le16(params);
4247 pSMB->TotalParameterCount = pSMB->ParameterCount; 4262 pSMB->TotalParameterCount = pSMB->ParameterCount;
4248 pSMB->MaxReferralLevel = cpu_to_le16(3); 4263 pSMB->MaxReferralLevel = cpu_to_le16(3);
4249 pSMB->hdr.smb_buf_length += byte_count; 4264 inc_rfc1001_len(pSMB, byte_count);
4250 pSMB->ByteCount = cpu_to_le16(byte_count); 4265 pSMB->ByteCount = cpu_to_le16(byte_count);
4251 4266
4252 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, 4267 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
@@ -4258,13 +4273,13 @@ getDFSRetry:
4258 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4273 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4259 4274
4260 /* BB Also check if enough total bytes returned? */ 4275 /* BB Also check if enough total bytes returned? */
4261 if (rc || (pSMBr->ByteCount < 17)) { 4276 if (rc || get_bcc(&pSMBr->hdr) < 17) {
4262 rc = -EIO; /* bad smb */ 4277 rc = -EIO; /* bad smb */
4263 goto GetDFSRefExit; 4278 goto GetDFSRefExit;
4264 } 4279 }
4265 4280
4266 cFYI(1, "Decoding GetDFSRefer response BCC: %d Offset %d", 4281 cFYI(1, "Decoding GetDFSRefer response BCC: %d Offset %d",
4267 pSMBr->ByteCount, 4282 get_bcc(&pSMBr->hdr),
4268 le16_to_cpu(pSMBr->t2.DataOffset)); 4283 le16_to_cpu(pSMBr->t2.DataOffset));
4269 4284
4270 /* parse returned result into more usable form */ 4285 /* parse returned result into more usable form */
@@ -4320,7 +4335,7 @@ oldQFSInfoRetry:
4320 pSMB->Reserved3 = 0; 4335 pSMB->Reserved3 = 0;
4321 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4336 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4322 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_ALLOCATION); 4337 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_ALLOCATION);
4323 pSMB->hdr.smb_buf_length += byte_count; 4338 inc_rfc1001_len(pSMB, byte_count);
4324 pSMB->ByteCount = cpu_to_le16(byte_count); 4339 pSMB->ByteCount = cpu_to_le16(byte_count);
4325 4340
4326 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4341 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4330,12 +4345,12 @@ oldQFSInfoRetry:
4330 } else { /* decode response */ 4345 } else { /* decode response */
4331 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4346 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4332 4347
4333 if (rc || (pSMBr->ByteCount < 18)) 4348 if (rc || get_bcc(&pSMBr->hdr) < 18)
4334 rc = -EIO; /* bad smb */ 4349 rc = -EIO; /* bad smb */
4335 else { 4350 else {
4336 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4351 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
4337 cFYI(1, "qfsinf resp BCC: %d Offset %d", 4352 cFYI(1, "qfsinf resp BCC: %d Offset %d",
4338 pSMBr->ByteCount, data_offset); 4353 get_bcc(&pSMBr->hdr), data_offset);
4339 4354
4340 response_data = (FILE_SYSTEM_ALLOC_INFO *) 4355 response_data = (FILE_SYSTEM_ALLOC_INFO *)
4341 (((char *) &pSMBr->hdr.Protocol) + data_offset); 4356 (((char *) &pSMBr->hdr.Protocol) + data_offset);
@@ -4399,7 +4414,7 @@ QFSInfoRetry:
4399 pSMB->Reserved3 = 0; 4414 pSMB->Reserved3 = 0;
4400 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4415 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4401 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_SIZE_INFO); 4416 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_SIZE_INFO);
4402 pSMB->hdr.smb_buf_length += byte_count; 4417 inc_rfc1001_len(pSMB, byte_count);
4403 pSMB->ByteCount = cpu_to_le16(byte_count); 4418 pSMB->ByteCount = cpu_to_le16(byte_count);
4404 4419
4405 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4420 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4409,7 +4424,7 @@ QFSInfoRetry:
4409 } else { /* decode response */ 4424 } else { /* decode response */
4410 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4425 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4411 4426
4412 if (rc || (pSMBr->ByteCount < 24)) 4427 if (rc || get_bcc(&pSMBr->hdr) < 24)
4413 rc = -EIO; /* bad smb */ 4428 rc = -EIO; /* bad smb */
4414 else { 4429 else {
4415 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4430 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4479,7 +4494,7 @@ QFSAttributeRetry:
4479 pSMB->Reserved3 = 0; 4494 pSMB->Reserved3 = 0;
4480 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4495 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4481 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_ATTRIBUTE_INFO); 4496 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_ATTRIBUTE_INFO);
4482 pSMB->hdr.smb_buf_length += byte_count; 4497 inc_rfc1001_len(pSMB, byte_count);
4483 pSMB->ByteCount = cpu_to_le16(byte_count); 4498 pSMB->ByteCount = cpu_to_le16(byte_count);
4484 4499
4485 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4500 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4489,7 +4504,7 @@ QFSAttributeRetry:
4489 } else { /* decode response */ 4504 } else { /* decode response */
4490 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4505 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4491 4506
4492 if (rc || (pSMBr->ByteCount < 13)) { 4507 if (rc || get_bcc(&pSMBr->hdr) < 13) {
4493 /* BB also check if enough bytes returned */ 4508 /* BB also check if enough bytes returned */
4494 rc = -EIO; /* bad smb */ 4509 rc = -EIO; /* bad smb */
4495 } else { 4510 } else {
@@ -4550,7 +4565,7 @@ QFSDeviceRetry:
4550 pSMB->Reserved3 = 0; 4565 pSMB->Reserved3 = 0;
4551 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4566 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4552 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_DEVICE_INFO); 4567 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_DEVICE_INFO);
4553 pSMB->hdr.smb_buf_length += byte_count; 4568 inc_rfc1001_len(pSMB, byte_count);
4554 pSMB->ByteCount = cpu_to_le16(byte_count); 4569 pSMB->ByteCount = cpu_to_le16(byte_count);
4555 4570
4556 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4571 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4560,7 +4575,8 @@ QFSDeviceRetry:
4560 } else { /* decode response */ 4575 } else { /* decode response */
4561 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4576 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4562 4577
4563 if (rc || (pSMBr->ByteCount < sizeof(FILE_SYSTEM_DEVICE_INFO))) 4578 if (rc || get_bcc(&pSMBr->hdr) <
4579 sizeof(FILE_SYSTEM_DEVICE_INFO))
4564 rc = -EIO; /* bad smb */ 4580 rc = -EIO; /* bad smb */
4565 else { 4581 else {
4566 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4582 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4619,7 +4635,7 @@ QFSUnixRetry:
4619 pSMB->Reserved3 = 0; 4635 pSMB->Reserved3 = 0;
4620 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4636 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4621 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_CIFS_UNIX_INFO); 4637 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_CIFS_UNIX_INFO);
4622 pSMB->hdr.smb_buf_length += byte_count; 4638 inc_rfc1001_len(pSMB, byte_count);
4623 pSMB->ByteCount = cpu_to_le16(byte_count); 4639 pSMB->ByteCount = cpu_to_le16(byte_count);
4624 4640
4625 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4641 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4629,7 +4645,7 @@ QFSUnixRetry:
4629 } else { /* decode response */ 4645 } else { /* decode response */
4630 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4646 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4631 4647
4632 if (rc || (pSMBr->ByteCount < 13)) { 4648 if (rc || get_bcc(&pSMBr->hdr) < 13) {
4633 rc = -EIO; /* bad smb */ 4649 rc = -EIO; /* bad smb */
4634 } else { 4650 } else {
4635 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4651 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4702,7 +4718,7 @@ SETFSUnixRetry:
4702 pSMB->ClientUnixMinor = cpu_to_le16(CIFS_UNIX_MINOR_VERSION); 4718 pSMB->ClientUnixMinor = cpu_to_le16(CIFS_UNIX_MINOR_VERSION);
4703 pSMB->ClientUnixCap = cpu_to_le64(cap); 4719 pSMB->ClientUnixCap = cpu_to_le64(cap);
4704 4720
4705 pSMB->hdr.smb_buf_length += byte_count; 4721 inc_rfc1001_len(pSMB, byte_count);
4706 pSMB->ByteCount = cpu_to_le16(byte_count); 4722 pSMB->ByteCount = cpu_to_le16(byte_count);
4707 4723
4708 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4724 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4764,7 +4780,7 @@ QFSPosixRetry:
4764 pSMB->Reserved3 = 0; 4780 pSMB->Reserved3 = 0;
4765 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4781 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4766 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_FS_INFO); 4782 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_FS_INFO);
4767 pSMB->hdr.smb_buf_length += byte_count; 4783 inc_rfc1001_len(pSMB, byte_count);
4768 pSMB->ByteCount = cpu_to_le16(byte_count); 4784 pSMB->ByteCount = cpu_to_le16(byte_count);
4769 4785
4770 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4786 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4774,7 +4790,7 @@ QFSPosixRetry:
4774 } else { /* decode response */ 4790 } else { /* decode response */
4775 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4791 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4776 4792
4777 if (rc || (pSMBr->ByteCount < 13)) { 4793 if (rc || get_bcc(&pSMBr->hdr) < 13) {
4778 rc = -EIO; /* bad smb */ 4794 rc = -EIO; /* bad smb */
4779 } else { 4795 } else {
4780 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4796 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4890,7 +4906,7 @@ SetEOFRetry:
4890 pSMB->ParameterCount = cpu_to_le16(params); 4906 pSMB->ParameterCount = cpu_to_le16(params);
4891 pSMB->TotalParameterCount = pSMB->ParameterCount; 4907 pSMB->TotalParameterCount = pSMB->ParameterCount;
4892 pSMB->Reserved4 = 0; 4908 pSMB->Reserved4 = 0;
4893 pSMB->hdr.smb_buf_length += byte_count; 4909 inc_rfc1001_len(pSMB, byte_count);
4894 parm_data->FileSize = cpu_to_le64(size); 4910 parm_data->FileSize = cpu_to_le64(size);
4895 pSMB->ByteCount = cpu_to_le16(byte_count); 4911 pSMB->ByteCount = cpu_to_le16(byte_count);
4896 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4912 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4969,7 +4985,7 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4969 cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO); 4985 cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO);
4970 } 4986 }
4971 pSMB->Reserved4 = 0; 4987 pSMB->Reserved4 = 0;
4972 pSMB->hdr.smb_buf_length += byte_count; 4988 inc_rfc1001_len(pSMB, byte_count);
4973 pSMB->ByteCount = cpu_to_le16(byte_count); 4989 pSMB->ByteCount = cpu_to_le16(byte_count);
4974 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 4990 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
4975 if (rc) { 4991 if (rc) {
@@ -5037,7 +5053,7 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5037 else 5053 else
5038 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO); 5054 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
5039 pSMB->Reserved4 = 0; 5055 pSMB->Reserved4 = 0;
5040 pSMB->hdr.smb_buf_length += byte_count; 5056 inc_rfc1001_len(pSMB, byte_count);
5041 pSMB->ByteCount = cpu_to_le16(byte_count); 5057 pSMB->ByteCount = cpu_to_le16(byte_count);
5042 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); 5058 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
5043 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5059 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
@@ -5096,7 +5112,7 @@ CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon,
5096 pSMB->Fid = fid; 5112 pSMB->Fid = fid;
5097 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO); 5113 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO);
5098 pSMB->Reserved4 = 0; 5114 pSMB->Reserved4 = 0;
5099 pSMB->hdr.smb_buf_length += byte_count; 5115 inc_rfc1001_len(pSMB, byte_count);
5100 pSMB->ByteCount = cpu_to_le16(byte_count); 5116 pSMB->ByteCount = cpu_to_le16(byte_count);
5101 *data_offset = delete_file ? 1 : 0; 5117 *data_offset = delete_file ? 1 : 0;
5102 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5118 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
@@ -5169,7 +5185,7 @@ SetTimesRetry:
5169 else 5185 else
5170 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO); 5186 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
5171 pSMB->Reserved4 = 0; 5187 pSMB->Reserved4 = 0;
5172 pSMB->hdr.smb_buf_length += byte_count; 5188 inc_rfc1001_len(pSMB, byte_count);
5173 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); 5189 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
5174 pSMB->ByteCount = cpu_to_le16(byte_count); 5190 pSMB->ByteCount = cpu_to_le16(byte_count);
5175 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5191 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -5221,7 +5237,7 @@ SetAttrLgcyRetry:
5221 } 5237 }
5222 pSMB->attr = cpu_to_le16(dos_attrs); 5238 pSMB->attr = cpu_to_le16(dos_attrs);
5223 pSMB->BufferFormat = 0x04; 5239 pSMB->BufferFormat = 0x04;
5224 pSMB->hdr.smb_buf_length += name_len + 1; 5240 inc_rfc1001_len(pSMB, name_len + 1);
5225 pSMB->ByteCount = cpu_to_le16(name_len + 1); 5241 pSMB->ByteCount = cpu_to_le16(name_len + 1);
5226 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5242 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5227 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5243 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5326,7 +5342,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5326 pSMB->Fid = fid; 5342 pSMB->Fid = fid;
5327 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); 5343 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
5328 pSMB->Reserved4 = 0; 5344 pSMB->Reserved4 = 0;
5329 pSMB->hdr.smb_buf_length += byte_count; 5345 inc_rfc1001_len(pSMB, byte_count);
5330 pSMB->ByteCount = cpu_to_le16(byte_count); 5346 pSMB->ByteCount = cpu_to_le16(byte_count);
5331 5347
5332 cifs_fill_unix_set_info(data_offset, args); 5348 cifs_fill_unix_set_info(data_offset, args);
@@ -5402,7 +5418,7 @@ setPermsRetry:
5402 pSMB->TotalDataCount = pSMB->DataCount; 5418 pSMB->TotalDataCount = pSMB->DataCount;
5403 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); 5419 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
5404 pSMB->Reserved4 = 0; 5420 pSMB->Reserved4 = 0;
5405 pSMB->hdr.smb_buf_length += byte_count; 5421 inc_rfc1001_len(pSMB, byte_count);
5406 5422
5407 cifs_fill_unix_set_info(data_offset, args); 5423 cifs_fill_unix_set_info(data_offset, args);
5408 5424
@@ -5418,79 +5434,6 @@ setPermsRetry:
5418 return rc; 5434 return rc;
5419} 5435}
5420 5436
5421int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
5422 const int notify_subdirs, const __u16 netfid,
5423 __u32 filter, struct file *pfile, int multishot,
5424 const struct nls_table *nls_codepage)
5425{
5426 int rc = 0;
5427 struct smb_com_transaction_change_notify_req *pSMB = NULL;
5428 struct smb_com_ntransaction_change_notify_rsp *pSMBr = NULL;
5429 struct dir_notify_req *dnotify_req;
5430 int bytes_returned;
5431
5432 cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
5433 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
5434 (void **) &pSMBr);
5435 if (rc)
5436 return rc;
5437
5438 pSMB->TotalParameterCount = 0 ;
5439 pSMB->TotalDataCount = 0;
5440 pSMB->MaxParameterCount = cpu_to_le32(2);
5441 /* BB find exact data count max from sess structure BB */
5442 pSMB->MaxDataCount = 0; /* same in little endian or be */
5443/* BB VERIFY verify which is correct for above BB */
5444 pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
5445 MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
5446
5447 pSMB->MaxSetupCount = 4;
5448 pSMB->Reserved = 0;
5449 pSMB->ParameterOffset = 0;
5450 pSMB->DataCount = 0;
5451 pSMB->DataOffset = 0;
5452 pSMB->SetupCount = 4; /* single byte does not need le conversion */
5453 pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_NOTIFY_CHANGE);
5454 pSMB->ParameterCount = pSMB->TotalParameterCount;
5455 if (notify_subdirs)
5456 pSMB->WatchTree = 1; /* one byte - no le conversion needed */
5457 pSMB->Reserved2 = 0;
5458 pSMB->CompletionFilter = cpu_to_le32(filter);
5459 pSMB->Fid = netfid; /* file handle always le */
5460 pSMB->ByteCount = 0;
5461
5462 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5463 (struct smb_hdr *)pSMBr, &bytes_returned,
5464 CIFS_ASYNC_OP);
5465 if (rc) {
5466 cFYI(1, "Error in Notify = %d", rc);
5467 } else {
5468 /* Add file to outstanding requests */
5469 /* BB change to kmem cache alloc */
5470 dnotify_req = kmalloc(
5471 sizeof(struct dir_notify_req),
5472 GFP_KERNEL);
5473 if (dnotify_req) {
5474 dnotify_req->Pid = pSMB->hdr.Pid;
5475 dnotify_req->PidHigh = pSMB->hdr.PidHigh;
5476 dnotify_req->Mid = pSMB->hdr.Mid;
5477 dnotify_req->Tid = pSMB->hdr.Tid;
5478 dnotify_req->Uid = pSMB->hdr.Uid;
5479 dnotify_req->netfid = netfid;
5480 dnotify_req->pfile = pfile;
5481 dnotify_req->filter = filter;
5482 dnotify_req->multishot = multishot;
5483 spin_lock(&GlobalMid_Lock);
5484 list_add_tail(&dnotify_req->lhead,
5485 &GlobalDnotifyReqList);
5486 spin_unlock(&GlobalMid_Lock);
5487 } else
5488 rc = -ENOMEM;
5489 }
5490 cifs_buf_release(pSMB);
5491 return rc;
5492}
5493
5494#ifdef CONFIG_CIFS_XATTR 5437#ifdef CONFIG_CIFS_XATTR
5495/* 5438/*
5496 * Do a path-based QUERY_ALL_EAS call and parse the result. This is a common 5439 * Do a path-based QUERY_ALL_EAS call and parse the result. This is a common
@@ -5560,7 +5503,7 @@ QAllEAsRetry:
5560 pSMB->ParameterCount = pSMB->TotalParameterCount; 5503 pSMB->ParameterCount = pSMB->TotalParameterCount;
5561 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_QUERY_ALL_EAS); 5504 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_QUERY_ALL_EAS);
5562 pSMB->Reserved4 = 0; 5505 pSMB->Reserved4 = 0;
5563 pSMB->hdr.smb_buf_length += byte_count; 5506 inc_rfc1001_len(pSMB, byte_count);
5564 pSMB->ByteCount = cpu_to_le16(byte_count); 5507 pSMB->ByteCount = cpu_to_le16(byte_count);
5565 5508
5566 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5509 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -5576,7 +5519,7 @@ QAllEAsRetry:
5576 of these trans2 responses */ 5519 of these trans2 responses */
5577 5520
5578 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 5521 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
5579 if (rc || (pSMBr->ByteCount < 4)) { 5522 if (rc || get_bcc(&pSMBr->hdr) < 4) {
5580 rc = -EIO; /* bad smb */ 5523 rc = -EIO; /* bad smb */
5581 goto QAllEAsOut; 5524 goto QAllEAsOut;
5582 } 5525 }
@@ -5773,7 +5716,7 @@ SetEARetry:
5773 pSMB->ParameterCount = cpu_to_le16(params); 5716 pSMB->ParameterCount = cpu_to_le16(params);
5774 pSMB->TotalParameterCount = pSMB->ParameterCount; 5717 pSMB->TotalParameterCount = pSMB->ParameterCount;
5775 pSMB->Reserved4 = 0; 5718 pSMB->Reserved4 = 0;
5776 pSMB->hdr.smb_buf_length += byte_count; 5719 inc_rfc1001_len(pSMB, byte_count);
5777 pSMB->ByteCount = cpu_to_le16(byte_count); 5720 pSMB->ByteCount = cpu_to_le16(byte_count);
5778 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5721 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5779 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5722 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5787,5 +5730,99 @@ SetEARetry:
5787 5730
5788 return rc; 5731 return rc;
5789} 5732}
5790
5791#endif 5733#endif
5734
5735#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* BB unused temporarily */
5736/*
5737 * Years ago the kernel added a "dnotify" function for Samba server,
5738 * to allow network clients (such as Windows) to display updated
5739 * lists of files in directory listings automatically when
5740 * files are added by one user when another user has the
5741 * same directory open on their desktop. The Linux cifs kernel
5742 * client hooked into the kernel side of this interface for
5743 * the same reason, but ironically when the VFS moved from
5744 * "dnotify" to "inotify" it became harder to plug in Linux
5745 * network file system clients (the most obvious use case
5746 * for notify interfaces is when multiple users can update
5747 * the contents of the same directory - exactly what network
5748 * file systems can do) although the server (Samba) could
5749 * still use it. For the short term we leave the worker
5750 * function ifdeffed out (below) until inotify is fixed
5751 * in the VFS to make it easier to plug in network file
5752 * system clients. If inotify turns out to be permanently
5753 * incompatible for network fs clients, we could instead simply
5754 * expose this config flag by adding a future cifs (and smb2) notify ioctl.
5755 */
5756int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
5757 const int notify_subdirs, const __u16 netfid,
5758 __u32 filter, struct file *pfile, int multishot,
5759 const struct nls_table *nls_codepage)
5760{
5761 int rc = 0;
5762 struct smb_com_transaction_change_notify_req *pSMB = NULL;
5763 struct smb_com_ntransaction_change_notify_rsp *pSMBr = NULL;
5764 struct dir_notify_req *dnotify_req;
5765 int bytes_returned;
5766
5767 cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
5768 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
5769 (void **) &pSMBr);
5770 if (rc)
5771 return rc;
5772
5773 pSMB->TotalParameterCount = 0 ;
5774 pSMB->TotalDataCount = 0;
5775 pSMB->MaxParameterCount = cpu_to_le32(2);
5776 /* BB find exact data count max from sess structure BB */
5777 pSMB->MaxDataCount = 0; /* same in little endian or be */
5778/* BB VERIFY verify which is correct for above BB */
5779 pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
5780 MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
5781
5782 pSMB->MaxSetupCount = 4;
5783 pSMB->Reserved = 0;
5784 pSMB->ParameterOffset = 0;
5785 pSMB->DataCount = 0;
5786 pSMB->DataOffset = 0;
5787 pSMB->SetupCount = 4; /* single byte does not need le conversion */
5788 pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_NOTIFY_CHANGE);
5789 pSMB->ParameterCount = pSMB->TotalParameterCount;
5790 if (notify_subdirs)
5791 pSMB->WatchTree = 1; /* one byte - no le conversion needed */
5792 pSMB->Reserved2 = 0;
5793 pSMB->CompletionFilter = cpu_to_le32(filter);
5794 pSMB->Fid = netfid; /* file handle always le */
5795 pSMB->ByteCount = 0;
5796
5797 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5798 (struct smb_hdr *)pSMBr, &bytes_returned,
5799 CIFS_ASYNC_OP);
5800 if (rc) {
5801 cFYI(1, "Error in Notify = %d", rc);
5802 } else {
5803 /* Add file to outstanding requests */
5804 /* BB change to kmem cache alloc */
5805 dnotify_req = kmalloc(
5806 sizeof(struct dir_notify_req),
5807 GFP_KERNEL);
5808 if (dnotify_req) {
5809 dnotify_req->Pid = pSMB->hdr.Pid;
5810 dnotify_req->PidHigh = pSMB->hdr.PidHigh;
5811 dnotify_req->Mid = pSMB->hdr.Mid;
5812 dnotify_req->Tid = pSMB->hdr.Tid;
5813 dnotify_req->Uid = pSMB->hdr.Uid;
5814 dnotify_req->netfid = netfid;
5815 dnotify_req->pfile = pfile;
5816 dnotify_req->filter = filter;
5817 dnotify_req->multishot = multishot;
5818 spin_lock(&GlobalMid_Lock);
5819 list_add_tail(&dnotify_req->lhead,
5820 &GlobalDnotifyReqList);
5821 spin_unlock(&GlobalMid_Lock);
5822 } else
5823 rc = -ENOMEM;
5824 }
5825 cifs_buf_release(pSMB);
5826 return rc;
5827}
5828#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 277262a8e82f..da284e3cb653 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -102,6 +102,7 @@ struct smb_vol {
102 bool fsc:1; /* enable fscache */ 102 bool fsc:1; /* enable fscache */
103 bool mfsymlinks:1; /* use Minshall+French Symlinks */ 103 bool mfsymlinks:1; /* use Minshall+French Symlinks */
104 bool multiuser:1; 104 bool multiuser:1;
105 bool use_smb2:1; /* force smb2 use on mount instead of cifs */
105 unsigned int rsize; 106 unsigned int rsize;
106 unsigned int wsize; 107 unsigned int wsize;
107 bool sockopt_tcp_nodelay:1; 108 bool sockopt_tcp_nodelay:1;
@@ -316,19 +317,19 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
316 put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount); 317 put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount);
317 318
318 /* fix up the BCC */ 319 /* fix up the BCC */
319 byte_count = get_bcc_le(pTargetSMB); 320 byte_count = get_bcc(pTargetSMB);
320 byte_count += total_in_buf2; 321 byte_count += total_in_buf2;
321 /* is the result too big for the field? */ 322 /* is the result too big for the field? */
322 if (byte_count > USHRT_MAX) 323 if (byte_count > USHRT_MAX)
323 return -EPROTO; 324 return -EPROTO;
324 put_bcc_le(byte_count, pTargetSMB); 325 put_bcc(byte_count, pTargetSMB);
325 326
326 byte_count = pTargetSMB->smb_buf_length; 327 byte_count = be32_to_cpu(pTargetSMB->smb_buf_length);
327 byte_count += total_in_buf2; 328 byte_count += total_in_buf2;
328 /* don't allow buffer to overflow */ 329 /* don't allow buffer to overflow */
329 if (byte_count > CIFSMaxBufSize) 330 if (byte_count > CIFSMaxBufSize)
330 return -ENOBUFS; 331 return -ENOBUFS;
331 pTargetSMB->smb_buf_length = byte_count; 332 pTargetSMB->smb_buf_length = cpu_to_be32(byte_count);
332 333
333 memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2); 334 memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2);
334 335
@@ -495,8 +496,7 @@ incomplete_rcv:
495 /* Note that FC 1001 length is big endian on the wire, 496 /* Note that FC 1001 length is big endian on the wire,
496 but we convert it here so it is always manipulated 497 but we convert it here so it is always manipulated
497 as host byte order */ 498 as host byte order */
498 pdu_length = be32_to_cpu((__force __be32)smb_buffer->smb_buf_length); 499 pdu_length = be32_to_cpu(smb_buffer->smb_buf_length);
499 smb_buffer->smb_buf_length = pdu_length;
500 500
501 cFYI(1, "rfc1002 length 0x%x", pdu_length+4); 501 cFYI(1, "rfc1002 length 0x%x", pdu_length+4);
502 502
@@ -735,7 +735,7 @@ multi_t2_fnd:
735 sock_release(csocket); 735 sock_release(csocket);
736 server->ssocket = NULL; 736 server->ssocket = NULL;
737 } 737 }
738 /* buffer usuallly freed in free_mid - need to free it here on exit */ 738 /* buffer usually freed in free_mid - need to free it here on exit */
739 cifs_buf_release(bigbuf); 739 cifs_buf_release(bigbuf);
740 if (smallbuf) /* no sense logging a debug message if NULL */ 740 if (smallbuf) /* no sense logging a debug message if NULL */
741 cifs_small_buf_release(smallbuf); 741 cifs_small_buf_release(smallbuf);
@@ -818,10 +818,11 @@ extract_hostname(const char *unc)
818} 818}
819 819
820static int 820static int
821cifs_parse_mount_options(char *options, const char *devname, 821cifs_parse_mount_options(const char *mountdata, const char *devname,
822 struct smb_vol *vol) 822 struct smb_vol *vol)
823{ 823{
824 char *value, *data, *end; 824 char *value, *data, *end;
825 char *mountdata_copy, *options;
825 unsigned int temp_len, i, j; 826 unsigned int temp_len, i, j;
826 char separator[2]; 827 char separator[2];
827 short int override_uid = -1; 828 short int override_uid = -1;
@@ -861,9 +862,14 @@ cifs_parse_mount_options(char *options, const char *devname,
861 862
862 vol->actimeo = CIFS_DEF_ACTIMEO; 863 vol->actimeo = CIFS_DEF_ACTIMEO;
863 864
864 if (!options) 865 if (!mountdata)
865 return 1; 866 goto cifs_parse_mount_err;
867
868 mountdata_copy = kstrndup(mountdata, PAGE_SIZE, GFP_KERNEL);
869 if (!mountdata_copy)
870 goto cifs_parse_mount_err;
866 871
872 options = mountdata_copy;
867 end = options + strlen(options); 873 end = options + strlen(options);
868 if (strncmp(options, "sep=", 4) == 0) { 874 if (strncmp(options, "sep=", 4) == 0) {
869 if (options[4] != 0) { 875 if (options[4] != 0) {
@@ -889,17 +895,22 @@ cifs_parse_mount_options(char *options, const char *devname,
889 if (!value) { 895 if (!value) {
890 printk(KERN_WARNING 896 printk(KERN_WARNING
891 "CIFS: invalid or missing username\n"); 897 "CIFS: invalid or missing username\n");
892 return 1; /* needs_arg; */ 898 goto cifs_parse_mount_err;
893 } else if (!*value) { 899 } else if (!*value) {
894 /* null user, ie anonymous, authentication */ 900 /* null user, ie anonymous, authentication */
895 vol->nullauth = 1; 901 vol->nullauth = 1;
896 } 902 }
897 if (strnlen(value, MAX_USERNAME_SIZE) < 903 if (strnlen(value, MAX_USERNAME_SIZE) <
898 MAX_USERNAME_SIZE) { 904 MAX_USERNAME_SIZE) {
899 vol->username = value; 905 vol->username = kstrdup(value, GFP_KERNEL);
906 if (!vol->username) {
907 printk(KERN_WARNING "CIFS: no memory "
908 "for username\n");
909 goto cifs_parse_mount_err;
910 }
900 } else { 911 } else {
901 printk(KERN_WARNING "CIFS: username too long\n"); 912 printk(KERN_WARNING "CIFS: username too long\n");
902 return 1; 913 goto cifs_parse_mount_err;
903 } 914 }
904 } else if (strnicmp(data, "pass", 4) == 0) { 915 } else if (strnicmp(data, "pass", 4) == 0) {
905 if (!value) { 916 if (!value) {
@@ -963,7 +974,7 @@ cifs_parse_mount_options(char *options, const char *devname,
963 if (vol->password == NULL) { 974 if (vol->password == NULL) {
964 printk(KERN_WARNING "CIFS: no memory " 975 printk(KERN_WARNING "CIFS: no memory "
965 "for password\n"); 976 "for password\n");
966 return 1; 977 goto cifs_parse_mount_err;
967 } 978 }
968 for (i = 0, j = 0; i < temp_len; i++, j++) { 979 for (i = 0, j = 0; i < temp_len; i++, j++) {
969 vol->password[j] = value[i]; 980 vol->password[j] = value[i];
@@ -979,7 +990,7 @@ cifs_parse_mount_options(char *options, const char *devname,
979 if (vol->password == NULL) { 990 if (vol->password == NULL) {
980 printk(KERN_WARNING "CIFS: no memory " 991 printk(KERN_WARNING "CIFS: no memory "
981 "for password\n"); 992 "for password\n");
982 return 1; 993 goto cifs_parse_mount_err;
983 } 994 }
984 strcpy(vol->password, value); 995 strcpy(vol->password, value);
985 } 996 }
@@ -989,11 +1000,16 @@ cifs_parse_mount_options(char *options, const char *devname,
989 vol->UNCip = NULL; 1000 vol->UNCip = NULL;
990 } else if (strnlen(value, INET6_ADDRSTRLEN) < 1001 } else if (strnlen(value, INET6_ADDRSTRLEN) <
991 INET6_ADDRSTRLEN) { 1002 INET6_ADDRSTRLEN) {
992 vol->UNCip = value; 1003 vol->UNCip = kstrdup(value, GFP_KERNEL);
1004 if (!vol->UNCip) {
1005 printk(KERN_WARNING "CIFS: no memory "
1006 "for UNC IP\n");
1007 goto cifs_parse_mount_err;
1008 }
993 } else { 1009 } else {
994 printk(KERN_WARNING "CIFS: ip address " 1010 printk(KERN_WARNING "CIFS: ip address "
995 "too long\n"); 1011 "too long\n");
996 return 1; 1012 goto cifs_parse_mount_err;
997 } 1013 }
998 } else if (strnicmp(data, "sec", 3) == 0) { 1014 } else if (strnicmp(data, "sec", 3) == 0) {
999 if (!value || !*value) { 1015 if (!value || !*value) {
@@ -1006,7 +1022,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1006 /* vol->secFlg |= CIFSSEC_MUST_SEAL | 1022 /* vol->secFlg |= CIFSSEC_MUST_SEAL |
1007 CIFSSEC_MAY_KRB5; */ 1023 CIFSSEC_MAY_KRB5; */
1008 cERROR(1, "Krb5 cifs privacy not supported"); 1024 cERROR(1, "Krb5 cifs privacy not supported");
1009 return 1; 1025 goto cifs_parse_mount_err;
1010 } else if (strnicmp(value, "krb5", 4) == 0) { 1026 } else if (strnicmp(value, "krb5", 4) == 0) {
1011 vol->secFlg |= CIFSSEC_MAY_KRB5; 1027 vol->secFlg |= CIFSSEC_MAY_KRB5;
1012 } else if (strnicmp(value, "ntlmsspi", 8) == 0) { 1028 } else if (strnicmp(value, "ntlmsspi", 8) == 0) {
@@ -1036,7 +1052,23 @@ cifs_parse_mount_options(char *options, const char *devname,
1036 vol->nullauth = 1; 1052 vol->nullauth = 1;
1037 } else { 1053 } else {
1038 cERROR(1, "bad security option: %s", value); 1054 cERROR(1, "bad security option: %s", value);
1039 return 1; 1055 goto cifs_parse_mount_err;
1056 }
1057 } else if (strnicmp(data, "vers", 3) == 0) {
1058 if (!value || !*value) {
1059 cERROR(1, "no protocol version specified"
1060 " after vers= mount option");
1061 } else if ((strnicmp(value, "cifs", 4) == 0) ||
1062 (strnicmp(value, "1", 1) == 0)) {
1063 /* this is the default */
1064 continue;
1065 } else if ((strnicmp(value, "smb2", 4) == 0) ||
1066 (strnicmp(value, "2", 1) == 0)) {
1067#ifdef CONFIG_CIFS_SMB2
1068 vol->use_smb2 = true;
1069#else
1070 cERROR(1, "smb2 support not enabled");
1071#endif /* CONFIG_CIFS_SMB2 */
1040 } 1072 }
1041 } else if ((strnicmp(data, "unc", 3) == 0) 1073 } else if ((strnicmp(data, "unc", 3) == 0)
1042 || (strnicmp(data, "target", 6) == 0) 1074 || (strnicmp(data, "target", 6) == 0)
@@ -1044,12 +1076,12 @@ cifs_parse_mount_options(char *options, const char *devname,
1044 if (!value || !*value) { 1076 if (!value || !*value) {
1045 printk(KERN_WARNING "CIFS: invalid path to " 1077 printk(KERN_WARNING "CIFS: invalid path to "
1046 "network resource\n"); 1078 "network resource\n");
1047 return 1; /* needs_arg; */ 1079 goto cifs_parse_mount_err;
1048 } 1080 }
1049 if ((temp_len = strnlen(value, 300)) < 300) { 1081 if ((temp_len = strnlen(value, 300)) < 300) {
1050 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL); 1082 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
1051 if (vol->UNC == NULL) 1083 if (vol->UNC == NULL)
1052 return 1; 1084 goto cifs_parse_mount_err;
1053 strcpy(vol->UNC, value); 1085 strcpy(vol->UNC, value);
1054 if (strncmp(vol->UNC, "//", 2) == 0) { 1086 if (strncmp(vol->UNC, "//", 2) == 0) {
1055 vol->UNC[0] = '\\'; 1087 vol->UNC[0] = '\\';
@@ -1058,27 +1090,32 @@ cifs_parse_mount_options(char *options, const char *devname,
1058 printk(KERN_WARNING 1090 printk(KERN_WARNING
1059 "CIFS: UNC Path does not begin " 1091 "CIFS: UNC Path does not begin "
1060 "with // or \\\\ \n"); 1092 "with // or \\\\ \n");
1061 return 1; 1093 goto cifs_parse_mount_err;
1062 } 1094 }
1063 } else { 1095 } else {
1064 printk(KERN_WARNING "CIFS: UNC name too long\n"); 1096 printk(KERN_WARNING "CIFS: UNC name too long\n");
1065 return 1; 1097 goto cifs_parse_mount_err;
1066 } 1098 }
1067 } else if ((strnicmp(data, "domain", 3) == 0) 1099 } else if ((strnicmp(data, "domain", 3) == 0)
1068 || (strnicmp(data, "workgroup", 5) == 0)) { 1100 || (strnicmp(data, "workgroup", 5) == 0)) {
1069 if (!value || !*value) { 1101 if (!value || !*value) {
1070 printk(KERN_WARNING "CIFS: invalid domain name\n"); 1102 printk(KERN_WARNING "CIFS: invalid domain name\n");
1071 return 1; /* needs_arg; */ 1103 goto cifs_parse_mount_err;
1072 } 1104 }
1073 /* BB are there cases in which a comma can be valid in 1105 /* BB are there cases in which a comma can be valid in
1074 a domain name and need special handling? */ 1106 a domain name and need special handling? */
1075 if (strnlen(value, 256) < 256) { 1107 if (strnlen(value, 256) < 256) {
1076 vol->domainname = value; 1108 vol->domainname = kstrdup(value, GFP_KERNEL);
1109 if (!vol->domainname) {
1110 printk(KERN_WARNING "CIFS: no memory "
1111 "for domainname\n");
1112 goto cifs_parse_mount_err;
1113 }
1077 cFYI(1, "Domain name set"); 1114 cFYI(1, "Domain name set");
1078 } else { 1115 } else {
1079 printk(KERN_WARNING "CIFS: domain name too " 1116 printk(KERN_WARNING "CIFS: domain name too "
1080 "long\n"); 1117 "long\n");
1081 return 1; 1118 goto cifs_parse_mount_err;
1082 } 1119 }
1083 } else if (strnicmp(data, "srcaddr", 7) == 0) { 1120 } else if (strnicmp(data, "srcaddr", 7) == 0) {
1084 vol->srcaddr.ss_family = AF_UNSPEC; 1121 vol->srcaddr.ss_family = AF_UNSPEC;
@@ -1086,7 +1123,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1086 if (!value || !*value) { 1123 if (!value || !*value) {
1087 printk(KERN_WARNING "CIFS: srcaddr value" 1124 printk(KERN_WARNING "CIFS: srcaddr value"
1088 " not specified.\n"); 1125 " not specified.\n");
1089 return 1; /* needs_arg; */ 1126 goto cifs_parse_mount_err;
1090 } 1127 }
1091 i = cifs_convert_address((struct sockaddr *)&vol->srcaddr, 1128 i = cifs_convert_address((struct sockaddr *)&vol->srcaddr,
1092 value, strlen(value)); 1129 value, strlen(value));
@@ -1094,20 +1131,20 @@ cifs_parse_mount_options(char *options, const char *devname,
1094 printk(KERN_WARNING "CIFS: Could not parse" 1131 printk(KERN_WARNING "CIFS: Could not parse"
1095 " srcaddr: %s\n", 1132 " srcaddr: %s\n",
1096 value); 1133 value);
1097 return 1; 1134 goto cifs_parse_mount_err;
1098 } 1135 }
1099 } else if (strnicmp(data, "prefixpath", 10) == 0) { 1136 } else if (strnicmp(data, "prefixpath", 10) == 0) {
1100 if (!value || !*value) { 1137 if (!value || !*value) {
1101 printk(KERN_WARNING 1138 printk(KERN_WARNING
1102 "CIFS: invalid path prefix\n"); 1139 "CIFS: invalid path prefix\n");
1103 return 1; /* needs_argument */ 1140 goto cifs_parse_mount_err;
1104 } 1141 }
1105 if ((temp_len = strnlen(value, 1024)) < 1024) { 1142 if ((temp_len = strnlen(value, 1024)) < 1024) {
1106 if (value[0] != '/') 1143 if (value[0] != '/')
1107 temp_len++; /* missing leading slash */ 1144 temp_len++; /* missing leading slash */
1108 vol->prepath = kmalloc(temp_len+1, GFP_KERNEL); 1145 vol->prepath = kmalloc(temp_len+1, GFP_KERNEL);
1109 if (vol->prepath == NULL) 1146 if (vol->prepath == NULL)
1110 return 1; 1147 goto cifs_parse_mount_err;
1111 if (value[0] != '/') { 1148 if (value[0] != '/') {
1112 vol->prepath[0] = '/'; 1149 vol->prepath[0] = '/';
1113 strcpy(vol->prepath+1, value); 1150 strcpy(vol->prepath+1, value);
@@ -1116,24 +1153,33 @@ cifs_parse_mount_options(char *options, const char *devname,
1116 cFYI(1, "prefix path %s", vol->prepath); 1153 cFYI(1, "prefix path %s", vol->prepath);
1117 } else { 1154 } else {
1118 printk(KERN_WARNING "CIFS: prefix too long\n"); 1155 printk(KERN_WARNING "CIFS: prefix too long\n");
1119 return 1; 1156 goto cifs_parse_mount_err;
1120 } 1157 }
1121 } else if (strnicmp(data, "iocharset", 9) == 0) { 1158 } else if (strnicmp(data, "iocharset", 9) == 0) {
1122 if (!value || !*value) { 1159 if (!value || !*value) {
1123 printk(KERN_WARNING "CIFS: invalid iocharset " 1160 printk(KERN_WARNING "CIFS: invalid iocharset "
1124 "specified\n"); 1161 "specified\n");
1125 return 1; /* needs_arg; */ 1162 goto cifs_parse_mount_err;
1126 } 1163 }
1127 if (strnlen(value, 65) < 65) { 1164 if (strnlen(value, 65) < 65) {
1128 if (strnicmp(value, "default", 7)) 1165 if (strnicmp(value, "default", 7)) {
1129 vol->iocharset = value; 1166 vol->iocharset = kstrdup(value,
1167 GFP_KERNEL);
1168
1169 if (!vol->iocharset) {
1170 printk(KERN_WARNING "CIFS: no "
1171 "memory for"
1172 "charset\n");
1173 goto cifs_parse_mount_err;
1174 }
1175 }
1130 /* if iocharset not set then load_nls_default 1176 /* if iocharset not set then load_nls_default
1131 is used by caller */ 1177 is used by caller */
1132 cFYI(1, "iocharset set to %s", value); 1178 cFYI(1, "iocharset set to %s", value);
1133 } else { 1179 } else {
1134 printk(KERN_WARNING "CIFS: iocharset name " 1180 printk(KERN_WARNING "CIFS: iocharset name "
1135 "too long.\n"); 1181 "too long.\n");
1136 return 1; 1182 goto cifs_parse_mount_err;
1137 } 1183 }
1138 } else if (!strnicmp(data, "uid", 3) && value && *value) { 1184 } else if (!strnicmp(data, "uid", 3) && value && *value) {
1139 vol->linux_uid = simple_strtoul(value, &value, 0); 1185 vol->linux_uid = simple_strtoul(value, &value, 0);
@@ -1246,7 +1292,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1246 if (vol->actimeo > CIFS_MAX_ACTIMEO) { 1292 if (vol->actimeo > CIFS_MAX_ACTIMEO) {
1247 cERROR(1, "CIFS: attribute cache" 1293 cERROR(1, "CIFS: attribute cache"
1248 "timeout too large"); 1294 "timeout too large");
1249 return 1; 1295 goto cifs_parse_mount_err;
1250 } 1296 }
1251 } 1297 }
1252 } else if (strnicmp(data, "credentials", 4) == 0) { 1298 } else if (strnicmp(data, "credentials", 4) == 0) {
@@ -1390,7 +1436,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1390#ifndef CONFIG_CIFS_FSCACHE 1436#ifndef CONFIG_CIFS_FSCACHE
1391 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE" 1437 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE"
1392 "kernel config option set"); 1438 "kernel config option set");
1393 return 1; 1439 goto cifs_parse_mount_err;
1394#endif 1440#endif
1395 vol->fsc = true; 1441 vol->fsc = true;
1396 } else if (strnicmp(data, "mfsymlinks", 10) == 0) { 1442 } else if (strnicmp(data, "mfsymlinks", 10) == 0) {
@@ -1405,12 +1451,12 @@ cifs_parse_mount_options(char *options, const char *devname,
1405 if (devname == NULL) { 1451 if (devname == NULL) {
1406 printk(KERN_WARNING "CIFS: Missing UNC name for mount " 1452 printk(KERN_WARNING "CIFS: Missing UNC name for mount "
1407 "target\n"); 1453 "target\n");
1408 return 1; 1454 goto cifs_parse_mount_err;
1409 } 1455 }
1410 if ((temp_len = strnlen(devname, 300)) < 300) { 1456 if ((temp_len = strnlen(devname, 300)) < 300) {
1411 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL); 1457 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
1412 if (vol->UNC == NULL) 1458 if (vol->UNC == NULL)
1413 return 1; 1459 goto cifs_parse_mount_err;
1414 strcpy(vol->UNC, devname); 1460 strcpy(vol->UNC, devname);
1415 if (strncmp(vol->UNC, "//", 2) == 0) { 1461 if (strncmp(vol->UNC, "//", 2) == 0) {
1416 vol->UNC[0] = '\\'; 1462 vol->UNC[0] = '\\';
@@ -1418,21 +1464,21 @@ cifs_parse_mount_options(char *options, const char *devname,
1418 } else if (strncmp(vol->UNC, "\\\\", 2) != 0) { 1464 } else if (strncmp(vol->UNC, "\\\\", 2) != 0) {
1419 printk(KERN_WARNING "CIFS: UNC Path does not " 1465 printk(KERN_WARNING "CIFS: UNC Path does not "
1420 "begin with // or \\\\ \n"); 1466 "begin with // or \\\\ \n");
1421 return 1; 1467 goto cifs_parse_mount_err;
1422 } 1468 }
1423 value = strpbrk(vol->UNC+2, "/\\"); 1469 value = strpbrk(vol->UNC+2, "/\\");
1424 if (value) 1470 if (value)
1425 *value = '\\'; 1471 *value = '\\';
1426 } else { 1472 } else {
1427 printk(KERN_WARNING "CIFS: UNC name too long\n"); 1473 printk(KERN_WARNING "CIFS: UNC name too long\n");
1428 return 1; 1474 goto cifs_parse_mount_err;
1429 } 1475 }
1430 } 1476 }
1431 1477
1432 if (vol->multiuser && !(vol->secFlg & CIFSSEC_MAY_KRB5)) { 1478 if (vol->multiuser && !(vol->secFlg & CIFSSEC_MAY_KRB5)) {
1433 cERROR(1, "Multiuser mounts currently require krb5 " 1479 cERROR(1, "Multiuser mounts currently require krb5 "
1434 "authentication!"); 1480 "authentication!");
1435 return 1; 1481 goto cifs_parse_mount_err;
1436 } 1482 }
1437 1483
1438 if (vol->UNCip == NULL) 1484 if (vol->UNCip == NULL)
@@ -1450,7 +1496,12 @@ cifs_parse_mount_options(char *options, const char *devname,
1450 printk(KERN_NOTICE "CIFS: ignoring forcegid mount option " 1496 printk(KERN_NOTICE "CIFS: ignoring forcegid mount option "
1451 "specified with no gid= option.\n"); 1497 "specified with no gid= option.\n");
1452 1498
1499 kfree(mountdata_copy);
1453 return 0; 1500 return 0;
1501
1502cifs_parse_mount_err:
1503 kfree(mountdata_copy);
1504 return 1;
1454} 1505}
1455 1506
1456/** Returns true if srcaddr isn't specified and rhs isn't 1507/** Returns true if srcaddr isn't specified and rhs isn't
@@ -2280,7 +2331,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
2280 smb_buf = (struct smb_hdr *)ses_init_buf; 2331 smb_buf = (struct smb_hdr *)ses_init_buf;
2281 2332
2282 /* sizeof RFC1002_SESSION_REQUEST with no scope */ 2333 /* sizeof RFC1002_SESSION_REQUEST with no scope */
2283 smb_buf->smb_buf_length = 0x81000044; 2334 smb_buf->smb_buf_length = cpu_to_be32(0x81000044);
2284 rc = smb_send(server, smb_buf, 0x44); 2335 rc = smb_send(server, smb_buf, 0x44);
2285 kfree(ses_init_buf); 2336 kfree(ses_init_buf);
2286 /* 2337 /*
@@ -2691,8 +2742,12 @@ cleanup_volume_info(struct smb_vol **pvolume_info)
2691 return; 2742 return;
2692 2743
2693 volume_info = *pvolume_info; 2744 volume_info = *pvolume_info;
2745 kfree(volume_info->username);
2694 kzfree(volume_info->password); 2746 kzfree(volume_info->password);
2695 kfree(volume_info->UNC); 2747 kfree(volume_info->UNC);
2748 kfree(volume_info->UNCip);
2749 kfree(volume_info->domainname);
2750 kfree(volume_info->iocharset);
2696 kfree(volume_info->prepath); 2751 kfree(volume_info->prepath);
2697 kfree(volume_info); 2752 kfree(volume_info);
2698 *pvolume_info = NULL; 2753 *pvolume_info = NULL;
@@ -2729,11 +2784,65 @@ build_unc_path_to_root(const struct smb_vol *volume_info,
2729 full_path[unc_len + cifs_sb->prepathlen] = 0; /* add trailing null */ 2784 full_path[unc_len + cifs_sb->prepathlen] = 0; /* add trailing null */
2730 return full_path; 2785 return full_path;
2731} 2786}
2787
2788/*
2789 * Perform a dfs referral query for a share and (optionally) prefix
2790 *
2791 * If a referral is found, cifs_sb->mountdata will be (re-)allocated
2792 * to a string containing updated options for the submount. Otherwise it
2793 * will be left untouched.
2794 *
2795 * Returns the rc from get_dfs_path to the caller, which can be used to
2796 * determine whether there were referrals.
2797 */
2798static int
2799expand_dfs_referral(int xid, struct cifsSesInfo *pSesInfo,
2800 struct smb_vol *volume_info, struct cifs_sb_info *cifs_sb,
2801 int check_prefix)
2802{
2803 int rc;
2804 unsigned int num_referrals = 0;
2805 struct dfs_info3_param *referrals = NULL;
2806 char *full_path = NULL, *ref_path = NULL, *mdata = NULL;
2807
2808 full_path = build_unc_path_to_root(volume_info, cifs_sb);
2809 if (IS_ERR(full_path))
2810 return PTR_ERR(full_path);
2811
2812 /* For DFS paths, skip the first '\' of the UNC */
2813 ref_path = check_prefix ? full_path + 1 : volume_info->UNC + 1;
2814
2815 rc = get_dfs_path(xid, pSesInfo , ref_path, cifs_sb->local_nls,
2816 &num_referrals, &referrals,
2817 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
2818
2819 if (!rc && num_referrals > 0) {
2820 char *fake_devname = NULL;
2821
2822 mdata = cifs_compose_mount_options(cifs_sb->mountdata,
2823 full_path + 1, referrals,
2824 &fake_devname);
2825
2826 free_dfs_info_array(referrals, num_referrals);
2827 kfree(fake_devname);
2828
2829 if (cifs_sb->mountdata != NULL)
2830 kfree(cifs_sb->mountdata);
2831
2832 if (IS_ERR(mdata)) {
2833 rc = PTR_ERR(mdata);
2834 mdata = NULL;
2835 }
2836 cifs_sb->mountdata = mdata;
2837 }
2838 kfree(full_path);
2839 return rc;
2840}
2732#endif 2841#endif
2733 2842
2734int 2843int
2735cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 2844cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2736 char *mount_data_global, const char *devname) 2845 const char *devname)
2737{ 2846{
2738 int rc; 2847 int rc;
2739 int xid; 2848 int xid;
@@ -2742,13 +2851,20 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2742 struct cifsTconInfo *tcon; 2851 struct cifsTconInfo *tcon;
2743 struct TCP_Server_Info *srvTcp; 2852 struct TCP_Server_Info *srvTcp;
2744 char *full_path; 2853 char *full_path;
2745 char *mount_data = mount_data_global;
2746 struct tcon_link *tlink; 2854 struct tcon_link *tlink;
2747#ifdef CONFIG_CIFS_DFS_UPCALL 2855#ifdef CONFIG_CIFS_DFS_UPCALL
2748 struct dfs_info3_param *referrals = NULL;
2749 unsigned int num_referrals = 0;
2750 int referral_walks_count = 0; 2856 int referral_walks_count = 0;
2751try_mount_again: 2857try_mount_again:
2858 /* cleanup activities if we're chasing a referral */
2859 if (referral_walks_count) {
2860 if (tcon)
2861 cifs_put_tcon(tcon);
2862 else if (pSesInfo)
2863 cifs_put_smb_ses(pSesInfo);
2864
2865 cleanup_volume_info(&volume_info);
2866 FreeXid(xid);
2867 }
2752#endif 2868#endif
2753 rc = 0; 2869 rc = 0;
2754 tcon = NULL; 2870 tcon = NULL;
@@ -2765,7 +2881,8 @@ try_mount_again:
2765 goto out; 2881 goto out;
2766 } 2882 }
2767 2883
2768 if (cifs_parse_mount_options(mount_data, devname, volume_info)) { 2884 if (cifs_parse_mount_options(cifs_sb->mountdata, devname,
2885 volume_info)) {
2769 rc = -EINVAL; 2886 rc = -EINVAL;
2770 goto out; 2887 goto out;
2771 } 2888 }
@@ -2861,6 +2978,24 @@ try_mount_again:
2861 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE)); 2978 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE));
2862 2979
2863remote_path_check: 2980remote_path_check:
2981#ifdef CONFIG_CIFS_DFS_UPCALL
2982 /*
2983 * Perform an unconditional check for whether there are DFS
2984 * referrals for this path without prefix, to provide support
2985 * for DFS referrals from w2k8 servers which don't seem to respond
2986 * with PATH_NOT_COVERED to requests that include the prefix.
2987 * Chase the referral if found, otherwise continue normally.
2988 */
2989 if (referral_walks_count == 0) {
2990 int refrc = expand_dfs_referral(xid, pSesInfo, volume_info,
2991 cifs_sb, false);
2992 if (!refrc) {
2993 referral_walks_count++;
2994 goto try_mount_again;
2995 }
2996 }
2997#endif
2998
2864 /* check if a whole path (including prepath) is not remote */ 2999 /* check if a whole path (including prepath) is not remote */
2865 if (!rc && tcon) { 3000 if (!rc && tcon) {
2866 /* build_path_to_root works only when we have a valid tcon */ 3001 /* build_path_to_root works only when we have a valid tcon */
@@ -2894,46 +3029,15 @@ remote_path_check:
2894 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0) 3029 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0)
2895 convert_delimiter(cifs_sb->prepath, 3030 convert_delimiter(cifs_sb->prepath,
2896 CIFS_DIR_SEP(cifs_sb)); 3031 CIFS_DIR_SEP(cifs_sb));
2897 full_path = build_unc_path_to_root(volume_info, cifs_sb);
2898 if (IS_ERR(full_path)) {
2899 rc = PTR_ERR(full_path);
2900 goto mount_fail_check;
2901 }
2902
2903 cFYI(1, "Getting referral for: %s", full_path);
2904 rc = get_dfs_path(xid, pSesInfo , full_path + 1,
2905 cifs_sb->local_nls, &num_referrals, &referrals,
2906 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
2907 if (!rc && num_referrals > 0) {
2908 char *fake_devname = NULL;
2909
2910 if (mount_data != mount_data_global)
2911 kfree(mount_data);
2912 3032
2913 mount_data = cifs_compose_mount_options( 3033 rc = expand_dfs_referral(xid, pSesInfo, volume_info, cifs_sb,
2914 cifs_sb->mountdata, full_path + 1, 3034 true);
2915 referrals, &fake_devname);
2916 3035
2917 free_dfs_info_array(referrals, num_referrals); 3036 if (!rc) {
2918 kfree(fake_devname);
2919 kfree(full_path);
2920
2921 if (IS_ERR(mount_data)) {
2922 rc = PTR_ERR(mount_data);
2923 mount_data = NULL;
2924 goto mount_fail_check;
2925 }
2926
2927 if (tcon)
2928 cifs_put_tcon(tcon);
2929 else if (pSesInfo)
2930 cifs_put_smb_ses(pSesInfo);
2931
2932 cleanup_volume_info(&volume_info);
2933 referral_walks_count++; 3037 referral_walks_count++;
2934 FreeXid(xid);
2935 goto try_mount_again; 3038 goto try_mount_again;
2936 } 3039 }
3040 goto mount_fail_check;
2937#else /* No DFS support, return error on mount */ 3041#else /* No DFS support, return error on mount */
2938 rc = -EOPNOTSUPP; 3042 rc = -EOPNOTSUPP;
2939#endif 3043#endif
@@ -2966,8 +3070,6 @@ remote_path_check:
2966mount_fail_check: 3070mount_fail_check:
2967 /* on error free sesinfo and tcon struct if needed */ 3071 /* on error free sesinfo and tcon struct if needed */
2968 if (rc) { 3072 if (rc) {
2969 if (mount_data != mount_data_global)
2970 kfree(mount_data);
2971 /* If find_unc succeeded then rc == 0 so we can not end */ 3073 /* If find_unc succeeded then rc == 0 so we can not end */
2972 /* up accidentally freeing someone elses tcon struct */ 3074 /* up accidentally freeing someone elses tcon struct */
2973 if (tcon) 3075 if (tcon)
@@ -3083,7 +3185,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3083 bcc_ptr += strlen("?????"); 3185 bcc_ptr += strlen("?????");
3084 bcc_ptr += 1; 3186 bcc_ptr += 1;
3085 count = bcc_ptr - &pSMB->Password[0]; 3187 count = bcc_ptr - &pSMB->Password[0];
3086 pSMB->hdr.smb_buf_length += count; 3188 pSMB->hdr.smb_buf_length = cpu_to_be32(be32_to_cpu(
3189 pSMB->hdr.smb_buf_length) + count);
3087 pSMB->ByteCount = cpu_to_le16(count); 3190 pSMB->ByteCount = cpu_to_le16(count);
3088 3191
3089 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length, 3192 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length,
@@ -3258,7 +3361,9 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid)
3258 struct cifsSesInfo *ses; 3361 struct cifsSesInfo *ses;
3259 struct cifsTconInfo *tcon = NULL; 3362 struct cifsTconInfo *tcon = NULL;
3260 struct smb_vol *vol_info; 3363 struct smb_vol *vol_info;
3261 char username[MAX_USERNAME_SIZE + 1]; 3364 char username[28]; /* big enough for "krb50x" + hex of ULONG_MAX 6+16 */
3365 /* We used to have this as MAX_USERNAME which is */
3366 /* way too big now (256 instead of 32) */
3262 3367
3263 vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL); 3368 vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL);
3264 if (vol_info == NULL) { 3369 if (vol_info == NULL) {
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 993f82045bf6..55d87ac52000 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -45,7 +45,7 @@
45#include "cifs_debug.h" 45#include "cifs_debug.h"
46#include "cifsfs.h" 46#include "cifsfs.h"
47 47
48#ifdef CONFIG_CIFS_EXPERIMENTAL 48#ifdef CIFS_NFSD_EXPORT
49static struct dentry *cifs_get_parent(struct dentry *dentry) 49static struct dentry *cifs_get_parent(struct dentry *dentry)
50{ 50{
51 /* BB need to add code here eventually to enable export via NFSD */ 51 /* BB need to add code here eventually to enable export via NFSD */
@@ -63,5 +63,5 @@ const struct export_operations cifs_export_ops = {
63 .encode_fs = */ 63 .encode_fs = */
64}; 64};
65 65
66#endif /* EXPERIMENTAL */ 66#endif /* CIFS_NFSD_EXPORT */
67 67
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index faf59529e847..c672afef0c09 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -857,95 +857,6 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
857 cifsi->server_eof = end_of_write; 857 cifsi->server_eof = end_of_write;
858} 858}
859 859
860ssize_t cifs_user_write(struct file *file, const char __user *write_data,
861 size_t write_size, loff_t *poffset)
862{
863 struct inode *inode = file->f_path.dentry->d_inode;
864 int rc = 0;
865 unsigned int bytes_written = 0;
866 unsigned int total_written;
867 struct cifs_sb_info *cifs_sb;
868 struct cifsTconInfo *pTcon;
869 int xid;
870 struct cifsFileInfo *open_file;
871 struct cifsInodeInfo *cifsi = CIFS_I(inode);
872
873 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
874
875 /* cFYI(1, " write %d bytes to offset %lld of %s", write_size,
876 *poffset, file->f_path.dentry->d_name.name); */
877
878 if (file->private_data == NULL)
879 return -EBADF;
880
881 open_file = file->private_data;
882 pTcon = tlink_tcon(open_file->tlink);
883
884 rc = generic_write_checks(file, poffset, &write_size, 0);
885 if (rc)
886 return rc;
887
888 xid = GetXid();
889
890 for (total_written = 0; write_size > total_written;
891 total_written += bytes_written) {
892 rc = -EAGAIN;
893 while (rc == -EAGAIN) {
894 if (file->private_data == NULL) {
895 /* file has been closed on us */
896 FreeXid(xid);
897 /* if we have gotten here we have written some data
898 and blocked, and the file has been freed on us while
899 we blocked so return what we managed to write */
900 return total_written;
901 }
902 if (open_file->invalidHandle) {
903 /* we could deadlock if we called
904 filemap_fdatawait from here so tell
905 reopen_file not to flush data to server
906 now */
907 rc = cifs_reopen_file(open_file, false);
908 if (rc != 0)
909 break;
910 }
911
912 rc = CIFSSMBWrite(xid, pTcon,
913 open_file->netfid,
914 min_t(const int, cifs_sb->wsize,
915 write_size - total_written),
916 *poffset, &bytes_written,
917 NULL, write_data + total_written, 0);
918 }
919 if (rc || (bytes_written == 0)) {
920 if (total_written)
921 break;
922 else {
923 FreeXid(xid);
924 return rc;
925 }
926 } else {
927 cifs_update_eof(cifsi, *poffset, bytes_written);
928 *poffset += bytes_written;
929 }
930 }
931
932 cifs_stats_bytes_written(pTcon, total_written);
933
934/* Do not update local mtime - server will set its actual value on write
935 * inode->i_ctime = inode->i_mtime =
936 * current_fs_time(inode->i_sb);*/
937 if (total_written > 0) {
938 spin_lock(&inode->i_lock);
939 if (*poffset > inode->i_size)
940 i_size_write(inode, *poffset);
941 spin_unlock(&inode->i_lock);
942 }
943 mark_inode_dirty_sync(inode);
944
945 FreeXid(xid);
946 return total_written;
947}
948
949static ssize_t cifs_write(struct cifsFileInfo *open_file, 860static ssize_t cifs_write(struct cifsFileInfo *open_file,
950 const char *write_data, size_t write_size, 861 const char *write_data, size_t write_size,
951 loff_t *poffset) 862 loff_t *poffset)
@@ -1420,9 +1331,10 @@ retry_write:
1420 return rc; 1331 return rc;
1421} 1332}
1422 1333
1423static int cifs_writepage(struct page *page, struct writeback_control *wbc) 1334static int
1335cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1424{ 1336{
1425 int rc = -EFAULT; 1337 int rc;
1426 int xid; 1338 int xid;
1427 1339
1428 xid = GetXid(); 1340 xid = GetXid();
@@ -1442,15 +1354,29 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1442 * to fail to update with the state of the page correctly. 1354 * to fail to update with the state of the page correctly.
1443 */ 1355 */
1444 set_page_writeback(page); 1356 set_page_writeback(page);
1357retry_write:
1445 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE); 1358 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1446 SetPageUptodate(page); /* BB add check for error and Clearuptodate? */ 1359 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1447 unlock_page(page); 1360 goto retry_write;
1361 else if (rc == -EAGAIN)
1362 redirty_page_for_writepage(wbc, page);
1363 else if (rc != 0)
1364 SetPageError(page);
1365 else
1366 SetPageUptodate(page);
1448 end_page_writeback(page); 1367 end_page_writeback(page);
1449 page_cache_release(page); 1368 page_cache_release(page);
1450 FreeXid(xid); 1369 FreeXid(xid);
1451 return rc; 1370 return rc;
1452} 1371}
1453 1372
1373static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1374{
1375 int rc = cifs_writepage_locked(page, wbc);
1376 unlock_page(page);
1377 return rc;
1378}
1379
1454static int cifs_write_end(struct file *file, struct address_space *mapping, 1380static int cifs_write_end(struct file *file, struct address_space *mapping,
1455 loff_t pos, unsigned len, unsigned copied, 1381 loff_t pos, unsigned len, unsigned copied,
1456 struct page *page, void *fsdata) 1382 struct page *page, void *fsdata)
@@ -1519,8 +1445,13 @@ int cifs_strict_fsync(struct file *file, int datasync)
1519 cFYI(1, "Sync file - name: %s datasync: 0x%x", 1445 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1520 file->f_path.dentry->d_name.name, datasync); 1446 file->f_path.dentry->d_name.name, datasync);
1521 1447
1522 if (!CIFS_I(inode)->clientCanCacheRead) 1448 if (!CIFS_I(inode)->clientCanCacheRead) {
1523 cifs_invalidate_mapping(inode); 1449 rc = cifs_invalidate_mapping(inode);
1450 if (rc) {
1451 cFYI(1, "rc: %d during invalidate phase", rc);
1452 rc = 0; /* don't care about it in fsync */
1453 }
1454 }
1524 1455
1525 tcon = tlink_tcon(smbfile->tlink); 1456 tcon = tlink_tcon(smbfile->tlink);
1526 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) 1457 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
@@ -1726,7 +1657,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
1726 return total_written; 1657 return total_written;
1727} 1658}
1728 1659
1729static ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, 1660ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
1730 unsigned long nr_segs, loff_t pos) 1661 unsigned long nr_segs, loff_t pos)
1731{ 1662{
1732 ssize_t written; 1663 ssize_t written;
@@ -1849,17 +1780,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
1849 return total_read; 1780 return total_read;
1850} 1781}
1851 1782
1852ssize_t cifs_user_read(struct file *file, char __user *read_data, 1783ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
1853 size_t read_size, loff_t *poffset)
1854{
1855 struct iovec iov;
1856 iov.iov_base = read_data;
1857 iov.iov_len = read_size;
1858
1859 return cifs_iovec_read(file, &iov, 1, poffset);
1860}
1861
1862static ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
1863 unsigned long nr_segs, loff_t pos) 1784 unsigned long nr_segs, loff_t pos)
1864{ 1785{
1865 ssize_t read; 1786 ssize_t read;
@@ -1987,8 +1908,11 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
1987 1908
1988 xid = GetXid(); 1909 xid = GetXid();
1989 1910
1990 if (!CIFS_I(inode)->clientCanCacheRead) 1911 if (!CIFS_I(inode)->clientCanCacheRead) {
1991 cifs_invalidate_mapping(inode); 1912 rc = cifs_invalidate_mapping(inode);
1913 if (rc)
1914 return rc;
1915 }
1992 1916
1993 rc = generic_file_mmap(file, vma); 1917 rc = generic_file_mmap(file, vma);
1994 if (rc == 0) 1918 if (rc == 0)
@@ -2415,6 +2339,27 @@ static void cifs_invalidate_page(struct page *page, unsigned long offset)
2415 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode); 2339 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
2416} 2340}
2417 2341
2342static int cifs_launder_page(struct page *page)
2343{
2344 int rc = 0;
2345 loff_t range_start = page_offset(page);
2346 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
2347 struct writeback_control wbc = {
2348 .sync_mode = WB_SYNC_ALL,
2349 .nr_to_write = 0,
2350 .range_start = range_start,
2351 .range_end = range_end,
2352 };
2353
2354 cFYI(1, "Launder page: %p", page);
2355
2356 if (clear_page_dirty_for_io(page))
2357 rc = cifs_writepage_locked(page, &wbc);
2358
2359 cifs_fscache_invalidate_page(page, page->mapping->host);
2360 return rc;
2361}
2362
2418void cifs_oplock_break(struct work_struct *work) 2363void cifs_oplock_break(struct work_struct *work)
2419{ 2364{
2420 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 2365 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
@@ -2486,7 +2431,7 @@ const struct address_space_operations cifs_addr_ops = {
2486 .set_page_dirty = __set_page_dirty_nobuffers, 2431 .set_page_dirty = __set_page_dirty_nobuffers,
2487 .releasepage = cifs_release_page, 2432 .releasepage = cifs_release_page,
2488 .invalidatepage = cifs_invalidate_page, 2433 .invalidatepage = cifs_invalidate_page,
2489 /* .direct_IO = */ 2434 .launder_page = cifs_launder_page,
2490}; 2435};
2491 2436
2492/* 2437/*
@@ -2503,5 +2448,5 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
2503 .set_page_dirty = __set_page_dirty_nobuffers, 2448 .set_page_dirty = __set_page_dirty_nobuffers,
2504 .releasepage = cifs_release_page, 2449 .releasepage = cifs_release_page,
2505 .invalidatepage = cifs_invalidate_page, 2450 .invalidatepage = cifs_invalidate_page,
2506 /* .direct_IO = */ 2451 .launder_page = cifs_launder_page,
2507}; 2452};
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 8852470b4fbb..de02ed5e25c2 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -878,7 +878,7 @@ retry_iget5_locked:
878} 878}
879 879
880/* gets root inode */ 880/* gets root inode */
881struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) 881struct inode *cifs_root_iget(struct super_block *sb)
882{ 882{
883 int xid; 883 int xid;
884 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 884 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -1683,71 +1683,70 @@ cifs_inode_needs_reval(struct inode *inode)
1683/* 1683/*
1684 * Zap the cache. Called when invalid_mapping flag is set. 1684 * Zap the cache. Called when invalid_mapping flag is set.
1685 */ 1685 */
1686void 1686int
1687cifs_invalidate_mapping(struct inode *inode) 1687cifs_invalidate_mapping(struct inode *inode)
1688{ 1688{
1689 int rc; 1689 int rc = 0;
1690 struct cifsInodeInfo *cifs_i = CIFS_I(inode); 1690 struct cifsInodeInfo *cifs_i = CIFS_I(inode);
1691 1691
1692 cifs_i->invalid_mapping = false; 1692 cifs_i->invalid_mapping = false;
1693 1693
1694 /* write back any cached data */
1695 if (inode->i_mapping && inode->i_mapping->nrpages != 0) { 1694 if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
1696 rc = filemap_write_and_wait(inode->i_mapping); 1695 rc = invalidate_inode_pages2(inode->i_mapping);
1697 mapping_set_error(inode->i_mapping, rc); 1696 if (rc) {
1697 cERROR(1, "%s: could not invalidate inode %p", __func__,
1698 inode);
1699 cifs_i->invalid_mapping = true;
1700 }
1698 } 1701 }
1699 invalidate_remote_inode(inode); 1702
1700 cifs_fscache_reset_inode_cookie(inode); 1703 cifs_fscache_reset_inode_cookie(inode);
1704 return rc;
1701} 1705}
1702 1706
1703int cifs_revalidate_file(struct file *filp) 1707int cifs_revalidate_file_attr(struct file *filp)
1704{ 1708{
1705 int rc = 0; 1709 int rc = 0;
1706 struct inode *inode = filp->f_path.dentry->d_inode; 1710 struct inode *inode = filp->f_path.dentry->d_inode;
1707 struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data; 1711 struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
1708 1712
1709 if (!cifs_inode_needs_reval(inode)) 1713 if (!cifs_inode_needs_reval(inode))
1710 goto check_inval; 1714 return rc;
1711 1715
1712 if (tlink_tcon(cfile->tlink)->unix_ext) 1716 if (tlink_tcon(cfile->tlink)->unix_ext)
1713 rc = cifs_get_file_info_unix(filp); 1717 rc = cifs_get_file_info_unix(filp);
1714 else 1718 else
1715 rc = cifs_get_file_info(filp); 1719 rc = cifs_get_file_info(filp);
1716 1720
1717check_inval:
1718 if (CIFS_I(inode)->invalid_mapping)
1719 cifs_invalidate_mapping(inode);
1720
1721 return rc; 1721 return rc;
1722} 1722}
1723 1723
1724/* revalidate a dentry's inode attributes */ 1724int cifs_revalidate_dentry_attr(struct dentry *dentry)
1725int cifs_revalidate_dentry(struct dentry *dentry)
1726{ 1725{
1727 int xid; 1726 int xid;
1728 int rc = 0; 1727 int rc = 0;
1729 char *full_path = NULL;
1730 struct inode *inode = dentry->d_inode; 1728 struct inode *inode = dentry->d_inode;
1731 struct super_block *sb = dentry->d_sb; 1729 struct super_block *sb = dentry->d_sb;
1730 char *full_path = NULL;
1732 1731
1733 if (inode == NULL) 1732 if (inode == NULL)
1734 return -ENOENT; 1733 return -ENOENT;
1735 1734
1736 xid = GetXid();
1737
1738 if (!cifs_inode_needs_reval(inode)) 1735 if (!cifs_inode_needs_reval(inode))
1739 goto check_inval; 1736 return rc;
1737
1738 xid = GetXid();
1740 1739
1741 /* can not safely grab the rename sem here if rename calls revalidate 1740 /* can not safely grab the rename sem here if rename calls revalidate
1742 since that would deadlock */ 1741 since that would deadlock */
1743 full_path = build_path_from_dentry(dentry); 1742 full_path = build_path_from_dentry(dentry);
1744 if (full_path == NULL) { 1743 if (full_path == NULL) {
1745 rc = -ENOMEM; 1744 rc = -ENOMEM;
1746 goto check_inval; 1745 goto out;
1747 } 1746 }
1748 1747
1749 cFYI(1, "Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld " 1748 cFYI(1, "Update attributes: %s inode 0x%p count %d dentry: 0x%p d_time "
1750 "jiffies %ld", full_path, inode, inode->i_count.counter, 1749 "%ld jiffies %ld", full_path, inode, inode->i_count.counter,
1751 dentry, dentry->d_time, jiffies); 1750 dentry, dentry->d_time, jiffies);
1752 1751
1753 if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext) 1752 if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)
@@ -1756,41 +1755,83 @@ int cifs_revalidate_dentry(struct dentry *dentry)
1756 rc = cifs_get_inode_info(&inode, full_path, NULL, sb, 1755 rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
1757 xid, NULL); 1756 xid, NULL);
1758 1757
1759check_inval: 1758out:
1760 if (CIFS_I(inode)->invalid_mapping)
1761 cifs_invalidate_mapping(inode);
1762
1763 kfree(full_path); 1759 kfree(full_path);
1764 FreeXid(xid); 1760 FreeXid(xid);
1765 return rc; 1761 return rc;
1766} 1762}
1767 1763
1764int cifs_revalidate_file(struct file *filp)
1765{
1766 int rc;
1767 struct inode *inode = filp->f_path.dentry->d_inode;
1768
1769 rc = cifs_revalidate_file_attr(filp);
1770 if (rc)
1771 return rc;
1772
1773 if (CIFS_I(inode)->invalid_mapping)
1774 rc = cifs_invalidate_mapping(inode);
1775 return rc;
1776}
1777
1778/* revalidate a dentry's inode attributes */
1779int cifs_revalidate_dentry(struct dentry *dentry)
1780{
1781 int rc;
1782 struct inode *inode = dentry->d_inode;
1783
1784 rc = cifs_revalidate_dentry_attr(dentry);
1785 if (rc)
1786 return rc;
1787
1788 if (CIFS_I(inode)->invalid_mapping)
1789 rc = cifs_invalidate_mapping(inode);
1790 return rc;
1791}
1792
1768int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry, 1793int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1769 struct kstat *stat) 1794 struct kstat *stat)
1770{ 1795{
1771 struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb); 1796 struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
1772 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 1797 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
1773 int err = cifs_revalidate_dentry(dentry); 1798 struct inode *inode = dentry->d_inode;
1774 1799 int rc;
1775 if (!err) {
1776 generic_fillattr(dentry->d_inode, stat);
1777 stat->blksize = CIFS_MAX_MSGSIZE;
1778 stat->ino = CIFS_I(dentry->d_inode)->uniqueid;
1779 1800
1780 /* 1801 /*
1781 * If on a multiuser mount without unix extensions, and the 1802 * We need to be sure that all dirty pages are written and the server
1782 * admin hasn't overridden them, set the ownership to the 1803 * has actual ctime, mtime and file length.
1783 * fsuid/fsgid of the current process. 1804 */
1784 */ 1805 if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
1785 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) && 1806 inode->i_mapping->nrpages != 0) {
1786 !tcon->unix_ext) { 1807 rc = filemap_fdatawait(inode->i_mapping);
1787 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) 1808 if (rc) {
1788 stat->uid = current_fsuid(); 1809 mapping_set_error(inode->i_mapping, rc);
1789 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) 1810 return rc;
1790 stat->gid = current_fsgid();
1791 } 1811 }
1792 } 1812 }
1793 return err; 1813
1814 rc = cifs_revalidate_dentry_attr(dentry);
1815 if (rc)
1816 return rc;
1817
1818 generic_fillattr(inode, stat);
1819 stat->blksize = CIFS_MAX_MSGSIZE;
1820 stat->ino = CIFS_I(inode)->uniqueid;
1821
1822 /*
1823 * If on a multiuser mount without unix extensions, and the admin hasn't
1824 * overridden them, set the ownership to the fsuid/fsgid of the current
1825 * process.
1826 */
1827 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) &&
1828 !tcon->unix_ext) {
1829 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID))
1830 stat->uid = current_fsuid();
1831 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID))
1832 stat->gid = current_fsgid();
1833 }
1834 return rc;
1794} 1835}
1795 1836
1796static int cifs_truncate_page(struct address_space *mapping, loff_t from) 1837static int cifs_truncate_page(struct address_space *mapping, loff_t from)
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 0c684ae4c071..907531ac5888 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -304,12 +304,10 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
304 304
305 memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */ 305 memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */
306 306
307 buffer->smb_buf_length = 307 buffer->smb_buf_length = cpu_to_be32(
308 (2 * word_count) + sizeof(struct smb_hdr) - 308 (2 * word_count) + sizeof(struct smb_hdr) -
309 4 /* RFC 1001 length field does not count */ + 309 4 /* RFC 1001 length field does not count */ +
310 2 /* for bcc field itself */ ; 310 2 /* for bcc field itself */) ;
311 /* Note that this is the only network field that has to be converted
312 to big endian and it is done just before we send it */
313 311
314 buffer->Protocol[0] = 0xFF; 312 buffer->Protocol[0] = 0xFF;
315 buffer->Protocol[1] = 'S'; 313 buffer->Protocol[1] = 'S';
@@ -424,7 +422,7 @@ check_smb_hdr(struct smb_hdr *smb, __u16 mid)
424int 422int
425checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) 423checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
426{ 424{
427 __u32 len = smb->smb_buf_length; 425 __u32 len = be32_to_cpu(smb->smb_buf_length);
428 __u32 clc_len; /* calculated length */ 426 __u32 clc_len; /* calculated length */
429 cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len); 427 cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len);
430 428
@@ -464,7 +462,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
464 462
465 if (check_smb_hdr(smb, mid)) 463 if (check_smb_hdr(smb, mid))
466 return 1; 464 return 1;
467 clc_len = smbCalcSize_LE(smb); 465 clc_len = smbCalcSize(smb);
468 466
469 if (4 + len != length) { 467 if (4 + len != length) {
470 cERROR(1, "Length read does not match RFC1001 length %d", 468 cERROR(1, "Length read does not match RFC1001 length %d",
@@ -521,7 +519,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
521 (struct smb_com_transaction_change_notify_rsp *)buf; 519 (struct smb_com_transaction_change_notify_rsp *)buf;
522 struct file_notify_information *pnotify; 520 struct file_notify_information *pnotify;
523 __u32 data_offset = 0; 521 __u32 data_offset = 0;
524 if (get_bcc_le(buf) > sizeof(struct file_notify_information)) { 522 if (get_bcc(buf) > sizeof(struct file_notify_information)) {
525 data_offset = le32_to_cpu(pSMBr->DataOffset); 523 data_offset = le32_to_cpu(pSMBr->DataOffset);
526 524
527 pnotify = (struct file_notify_information *) 525 pnotify = (struct file_notify_information *)
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 79f641eeda30..79b71c2c7c9d 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -919,13 +919,6 @@ smbCalcSize(struct smb_hdr *ptr)
919 2 /* size of the bcc field */ + get_bcc(ptr)); 919 2 /* size of the bcc field */ + get_bcc(ptr));
920} 920}
921 921
922unsigned int
923smbCalcSize_LE(struct smb_hdr *ptr)
924{
925 return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) +
926 2 /* size of the bcc field */ + get_bcc_le(ptr));
927}
928
929/* The following are taken from fs/ntfs/util.c */ 922/* The following are taken from fs/ntfs/util.c */
930 923
931#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000) 924#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 645114ad0a10..7dd462100378 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -621,7 +621,7 @@ ssetup_ntlmssp_authenticate:
621 and rest of bcc area. This allows us to avoid 621 and rest of bcc area. This allows us to avoid
622 a large buffer 17K allocation */ 622 a large buffer 17K allocation */
623 iov[0].iov_base = (char *)pSMB; 623 iov[0].iov_base = (char *)pSMB;
624 iov[0].iov_len = smb_buf->smb_buf_length + 4; 624 iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4;
625 625
626 /* setting this here allows the code at the end of the function 626 /* setting this here allows the code at the end of the function
627 to free the request buffer if there's an error */ 627 to free the request buffer if there's an error */
@@ -656,7 +656,7 @@ ssetup_ntlmssp_authenticate:
656 * to use challenge/response method (i.e. Password bit is 1). 656 * to use challenge/response method (i.e. Password bit is 1).
657 */ 657 */
658 658
659 calc_lanman_hash(ses->password, ses->server->cryptkey, 659 rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
660 ses->server->secMode & SECMODE_PW_ENCRYPT ? 660 ses->server->secMode & SECMODE_PW_ENCRYPT ?
661 true : false, lnm_session_key); 661 true : false, lnm_session_key);
662 662
@@ -859,9 +859,10 @@ ssetup_ntlmssp_authenticate:
859 iov[2].iov_len = (long) bcc_ptr - (long) str_area; 859 iov[2].iov_len = (long) bcc_ptr - (long) str_area;
860 860
861 count = iov[1].iov_len + iov[2].iov_len; 861 count = iov[1].iov_len + iov[2].iov_len;
862 smb_buf->smb_buf_length += count; 862 smb_buf->smb_buf_length =
863 cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count);
863 864
864 put_bcc_le(count, smb_buf); 865 put_bcc(count, smb_buf);
865 866
866 rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type, 867 rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type,
867 CIFS_LOG_ERROR); 868 CIFS_LOG_ERROR);
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
deleted file mode 100644
index 04721485925d..000000000000
--- a/fs/cifs/smbdes.c
+++ /dev/null
@@ -1,418 +0,0 @@
1/*
2 Unix SMB/Netbios implementation.
3 Version 1.9.
4
5 a partial implementation of DES designed for use in the
6 SMB authentication protocol
7
8 Copyright (C) Andrew Tridgell 1998
9 Modified by Steve French (sfrench@us.ibm.com) 2002,2004
10
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24*/
25
26/* NOTES:
27
28 This code makes no attempt to be fast! In fact, it is a very
29 slow implementation
30
31 This code is NOT a complete DES implementation. It implements only
32 the minimum necessary for SMB authentication, as used by all SMB
33 products (including every copy of Microsoft Windows95 ever sold)
34
35 In particular, it can only do a unchained forward DES pass. This
36 means it is not possible to use this code for encryption/decryption
37 of data, instead it is only useful as a "hash" algorithm.
38
39 There is no entry point into this code that allows normal DES operation.
40
41 I believe this means that this code does not come under ITAR
42 regulations but this is NOT a legal opinion. If you are concerned
43 about the applicability of ITAR regulations to this code then you
44 should confirm it for yourself (and maybe let me know if you come
45 up with a different answer to the one above)
46*/
47#include <linux/slab.h>
48#define uchar unsigned char
49
50static uchar perm1[56] = { 57, 49, 41, 33, 25, 17, 9,
51 1, 58, 50, 42, 34, 26, 18,
52 10, 2, 59, 51, 43, 35, 27,
53 19, 11, 3, 60, 52, 44, 36,
54 63, 55, 47, 39, 31, 23, 15,
55 7, 62, 54, 46, 38, 30, 22,
56 14, 6, 61, 53, 45, 37, 29,
57 21, 13, 5, 28, 20, 12, 4
58};
59
60static uchar perm2[48] = { 14, 17, 11, 24, 1, 5,
61 3, 28, 15, 6, 21, 10,
62 23, 19, 12, 4, 26, 8,
63 16, 7, 27, 20, 13, 2,
64 41, 52, 31, 37, 47, 55,
65 30, 40, 51, 45, 33, 48,
66 44, 49, 39, 56, 34, 53,
67 46, 42, 50, 36, 29, 32
68};
69
70static uchar perm3[64] = { 58, 50, 42, 34, 26, 18, 10, 2,
71 60, 52, 44, 36, 28, 20, 12, 4,
72 62, 54, 46, 38, 30, 22, 14, 6,
73 64, 56, 48, 40, 32, 24, 16, 8,
74 57, 49, 41, 33, 25, 17, 9, 1,
75 59, 51, 43, 35, 27, 19, 11, 3,
76 61, 53, 45, 37, 29, 21, 13, 5,
77 63, 55, 47, 39, 31, 23, 15, 7
78};
79
80static uchar perm4[48] = { 32, 1, 2, 3, 4, 5,
81 4, 5, 6, 7, 8, 9,
82 8, 9, 10, 11, 12, 13,
83 12, 13, 14, 15, 16, 17,
84 16, 17, 18, 19, 20, 21,
85 20, 21, 22, 23, 24, 25,
86 24, 25, 26, 27, 28, 29,
87 28, 29, 30, 31, 32, 1
88};
89
90static uchar perm5[32] = { 16, 7, 20, 21,
91 29, 12, 28, 17,
92 1, 15, 23, 26,
93 5, 18, 31, 10,
94 2, 8, 24, 14,
95 32, 27, 3, 9,
96 19, 13, 30, 6,
97 22, 11, 4, 25
98};
99
100static uchar perm6[64] = { 40, 8, 48, 16, 56, 24, 64, 32,
101 39, 7, 47, 15, 55, 23, 63, 31,
102 38, 6, 46, 14, 54, 22, 62, 30,
103 37, 5, 45, 13, 53, 21, 61, 29,
104 36, 4, 44, 12, 52, 20, 60, 28,
105 35, 3, 43, 11, 51, 19, 59, 27,
106 34, 2, 42, 10, 50, 18, 58, 26,
107 33, 1, 41, 9, 49, 17, 57, 25
108};
109
110static uchar sc[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
111
112static uchar sbox[8][4][16] = {
113 {{14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7},
114 {0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8},
115 {4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0},
116 {15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13} },
117
118 {{15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10},
119 {3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5},
120 {0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15},
121 {13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9} },
122
123 {{10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8},
124 {13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1},
125 {13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7},
126 {1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12} },
127
128 {{7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15},
129 {13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9},
130 {10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4},
131 {3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14} },
132
133 {{2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9},
134 {14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6},
135 {4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14},
136 {11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3} },
137
138 {{12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11},
139 {10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8},
140 {9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6},
141 {4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13} },
142
143 {{4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1},
144 {13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6},
145 {1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2},
146 {6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12} },
147
148 {{13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7},
149 {1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2},
150 {7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8},
151 {2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11} }
152};
153
154static void
155permute(char *out, char *in, uchar *p, int n)
156{
157 int i;
158 for (i = 0; i < n; i++)
159 out[i] = in[p[i] - 1];
160}
161
162static void
163lshift(char *d, int count, int n)
164{
165 char out[64];
166 int i;
167 for (i = 0; i < n; i++)
168 out[i] = d[(i + count) % n];
169 for (i = 0; i < n; i++)
170 d[i] = out[i];
171}
172
173static void
174concat(char *out, char *in1, char *in2, int l1, int l2)
175{
176 while (l1--)
177 *out++ = *in1++;
178 while (l2--)
179 *out++ = *in2++;
180}
181
182static void
183xor(char *out, char *in1, char *in2, int n)
184{
185 int i;
186 for (i = 0; i < n; i++)
187 out[i] = in1[i] ^ in2[i];
188}
189
190static void
191dohash(char *out, char *in, char *key, int forw)
192{
193 int i, j, k;
194 char *pk1;
195 char c[28];
196 char d[28];
197 char *cd;
198 char (*ki)[48];
199 char *pd1;
200 char l[32], r[32];
201 char *rl;
202
203 /* Have to reduce stack usage */
204 pk1 = kmalloc(56+56+64+64, GFP_KERNEL);
205 if (pk1 == NULL)
206 return;
207
208 ki = kmalloc(16*48, GFP_KERNEL);
209 if (ki == NULL) {
210 kfree(pk1);
211 return;
212 }
213
214 cd = pk1 + 56;
215 pd1 = cd + 56;
216 rl = pd1 + 64;
217
218 permute(pk1, key, perm1, 56);
219
220 for (i = 0; i < 28; i++)
221 c[i] = pk1[i];
222 for (i = 0; i < 28; i++)
223 d[i] = pk1[i + 28];
224
225 for (i = 0; i < 16; i++) {
226 lshift(c, sc[i], 28);
227 lshift(d, sc[i], 28);
228
229 concat(cd, c, d, 28, 28);
230 permute(ki[i], cd, perm2, 48);
231 }
232
233 permute(pd1, in, perm3, 64);
234
235 for (j = 0; j < 32; j++) {
236 l[j] = pd1[j];
237 r[j] = pd1[j + 32];
238 }
239
240 for (i = 0; i < 16; i++) {
241 char *er; /* er[48] */
242 char *erk; /* erk[48] */
243 char b[8][6];
244 char *cb; /* cb[32] */
245 char *pcb; /* pcb[32] */
246 char *r2; /* r2[32] */
247
248 er = kmalloc(48+48+32+32+32, GFP_KERNEL);
249 if (er == NULL) {
250 kfree(pk1);
251 kfree(ki);
252 return;
253 }
254 erk = er+48;
255 cb = erk+48;
256 pcb = cb+32;
257 r2 = pcb+32;
258
259 permute(er, r, perm4, 48);
260
261 xor(erk, er, ki[forw ? i : 15 - i], 48);
262
263 for (j = 0; j < 8; j++)
264 for (k = 0; k < 6; k++)
265 b[j][k] = erk[j * 6 + k];
266
267 for (j = 0; j < 8; j++) {
268 int m, n;
269 m = (b[j][0] << 1) | b[j][5];
270
271 n = (b[j][1] << 3) | (b[j][2] << 2) | (b[j][3] <<
272 1) | b[j][4];
273
274 for (k = 0; k < 4; k++)
275 b[j][k] =
276 (sbox[j][m][n] & (1 << (3 - k))) ? 1 : 0;
277 }
278
279 for (j = 0; j < 8; j++)
280 for (k = 0; k < 4; k++)
281 cb[j * 4 + k] = b[j][k];
282 permute(pcb, cb, perm5, 32);
283
284 xor(r2, l, pcb, 32);
285
286 for (j = 0; j < 32; j++)
287 l[j] = r[j];
288
289 for (j = 0; j < 32; j++)
290 r[j] = r2[j];
291
292 kfree(er);
293 }
294
295 concat(rl, r, l, 32, 32);
296
297 permute(out, rl, perm6, 64);
298 kfree(pk1);
299 kfree(ki);
300}
301
302static void
303str_to_key(unsigned char *str, unsigned char *key)
304{
305 int i;
306
307 key[0] = str[0] >> 1;
308 key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
309 key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
310 key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
311 key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
312 key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
313 key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
314 key[7] = str[6] & 0x7F;
315 for (i = 0; i < 8; i++)
316 key[i] = (key[i] << 1);
317}
318
319static void
320smbhash(unsigned char *out, const unsigned char *in, unsigned char *key,
321 int forw)
322{
323 int i;
324 char *outb; /* outb[64] */
325 char *inb; /* inb[64] */
326 char *keyb; /* keyb[64] */
327 unsigned char key2[8];
328
329 outb = kmalloc(64 * 3, GFP_KERNEL);
330 if (outb == NULL)
331 return;
332
333 inb = outb + 64;
334 keyb = inb + 64;
335
336 str_to_key(key, key2);
337
338 for (i = 0; i < 64; i++) {
339 inb[i] = (in[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
340 keyb[i] = (key2[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
341 outb[i] = 0;
342 }
343
344 dohash(outb, inb, keyb, forw);
345
346 for (i = 0; i < 8; i++)
347 out[i] = 0;
348
349 for (i = 0; i < 64; i++) {
350 if (outb[i])
351 out[i / 8] |= (1 << (7 - (i % 8)));
352 }
353 kfree(outb);
354}
355
356void
357E_P16(unsigned char *p14, unsigned char *p16)
358{
359 unsigned char sp8[8] =
360 { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
361 smbhash(p16, sp8, p14, 1);
362 smbhash(p16 + 8, sp8, p14 + 7, 1);
363}
364
365void
366E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
367{
368 smbhash(p24, c8, p21, 1);
369 smbhash(p24 + 8, c8, p21 + 7, 1);
370 smbhash(p24 + 16, c8, p21 + 14, 1);
371}
372
373#if 0 /* currently unused */
374static void
375D_P16(unsigned char *p14, unsigned char *in, unsigned char *out)
376{
377 smbhash(out, in, p14, 0);
378 smbhash(out + 8, in + 8, p14 + 7, 0);
379}
380
381static void
382E_old_pw_hash(unsigned char *p14, unsigned char *in, unsigned char *out)
383{
384 smbhash(out, in, p14, 1);
385 smbhash(out + 8, in + 8, p14 + 7, 1);
386}
387/* these routines are currently unneeded, but may be
388 needed later */
389void
390cred_hash1(unsigned char *out, unsigned char *in, unsigned char *key)
391{
392 unsigned char buf[8];
393
394 smbhash(buf, in, key, 1);
395 smbhash(out, buf, key + 9, 1);
396}
397
398void
399cred_hash2(unsigned char *out, unsigned char *in, unsigned char *key)
400{
401 unsigned char buf[8];
402 static unsigned char key2[8];
403
404 smbhash(buf, in, key, 1);
405 key2[0] = key[7];
406 smbhash(out, buf, key2, 1);
407}
408
409void
410cred_hash3(unsigned char *out, unsigned char *in, unsigned char *key, int forw)
411{
412 static unsigned char key2[8];
413
414 smbhash(out, in, key, forw);
415 key2[0] = key[7];
416 smbhash(out + 8, in + 8, key2, forw);
417}
418#endif /* unneeded routines */
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index b5041c849981..1525d5e662b6 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -47,6 +47,88 @@
47#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8) 47#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8)
48#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val))) 48#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val)))
49 49
50static void
51str_to_key(unsigned char *str, unsigned char *key)
52{
53 int i;
54
55 key[0] = str[0] >> 1;
56 key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
57 key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
58 key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
59 key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
60 key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
61 key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
62 key[7] = str[6] & 0x7F;
63 for (i = 0; i < 8; i++)
64 key[i] = (key[i] << 1);
65}
66
67static int
68smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
69{
70 int rc;
71 unsigned char key2[8];
72 struct crypto_blkcipher *tfm_des;
73 struct scatterlist sgin, sgout;
74 struct blkcipher_desc desc;
75
76 str_to_key(key, key2);
77
78 tfm_des = crypto_alloc_blkcipher("ecb(des)", 0, CRYPTO_ALG_ASYNC);
79 if (IS_ERR(tfm_des)) {
80 rc = PTR_ERR(tfm_des);
81 cERROR(1, "could not allocate des crypto API\n");
82 goto smbhash_err;
83 }
84
85 desc.tfm = tfm_des;
86
87 crypto_blkcipher_setkey(tfm_des, key2, 8);
88
89 sg_init_one(&sgin, in, 8);
90 sg_init_one(&sgout, out, 8);
91
92 rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8);
93 if (rc) {
94 cERROR(1, "could not encrypt crypt key rc: %d\n", rc);
95 crypto_free_blkcipher(tfm_des);
96 goto smbhash_err;
97 }
98
99smbhash_err:
100 return rc;
101}
102
103static int
104E_P16(unsigned char *p14, unsigned char *p16)
105{
106 int rc;
107 unsigned char sp8[8] =
108 { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
109
110 rc = smbhash(p16, sp8, p14);
111 if (rc)
112 return rc;
113 rc = smbhash(p16 + 8, sp8, p14 + 7);
114 return rc;
115}
116
117static int
118E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
119{
120 int rc;
121
122 rc = smbhash(p24, c8, p21);
123 if (rc)
124 return rc;
125 rc = smbhash(p24 + 8, c8, p21 + 7);
126 if (rc)
127 return rc;
128 rc = smbhash(p24 + 16, c8, p21 + 14);
129 return rc;
130}
131
50/* produce a md4 message digest from data of length n bytes */ 132/* produce a md4 message digest from data of length n bytes */
51int 133int
52mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len) 134mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len)
@@ -87,40 +169,30 @@ mdfour_err:
87 return rc; 169 return rc;
88} 170}
89 171
90/* Does the des encryption from the NT or LM MD4 hash. */
91static void
92SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
93 unsigned char p24[24])
94{
95 unsigned char p21[21];
96
97 memset(p21, '\0', 21);
98
99 memcpy(p21, passwd, 16);
100 E_P24(p21, c8, p24);
101}
102
103/* 172/*
104 This implements the X/Open SMB password encryption 173 This implements the X/Open SMB password encryption
105 It takes a password, a 8 byte "crypt key" and puts 24 bytes of 174 It takes a password, a 8 byte "crypt key" and puts 24 bytes of
106 encrypted password into p24 */ 175 encrypted password into p24 */
107/* Note that password must be uppercased and null terminated */ 176/* Note that password must be uppercased and null terminated */
108void 177int
109SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24) 178SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24)
110{ 179{
111 unsigned char p14[15], p21[21]; 180 int rc;
181 unsigned char p14[14], p16[16], p21[21];
112 182
113 memset(p21, '\0', 21);
114 memset(p14, '\0', 14); 183 memset(p14, '\0', 14);
115 strncpy((char *) p14, (char *) passwd, 14); 184 memset(p16, '\0', 16);
185 memset(p21, '\0', 21);
116 186
117/* strupper((char *)p14); *//* BB at least uppercase the easy range */ 187 memcpy(p14, passwd, 14);
118 E_P16(p14, p21); 188 rc = E_P16(p14, p16);
189 if (rc)
190 return rc;
119 191
120 SMBOWFencrypt(p21, c8, p24); 192 memcpy(p21, p16, 16);
193 rc = E_P24(p21, c8, p24);
121 194
122 memset(p14, 0, 15); 195 return rc;
123 memset(p21, 0, 21);
124} 196}
125 197
126/* Routines for Windows NT MD4 Hash functions. */ 198/* Routines for Windows NT MD4 Hash functions. */
@@ -279,16 +351,18 @@ int
279SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) 351SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24)
280{ 352{
281 int rc; 353 int rc;
282 unsigned char p21[21]; 354 unsigned char p16[16], p21[21];
283 355
356 memset(p16, '\0', 16);
284 memset(p21, '\0', 21); 357 memset(p21, '\0', 21);
285 358
286 rc = E_md4hash(passwd, p21); 359 rc = E_md4hash(passwd, p16);
287 if (rc) { 360 if (rc) {
288 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc); 361 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
289 return rc; 362 return rc;
290 } 363 }
291 SMBOWFencrypt(p21, c8, p24); 364 memcpy(p21, p16, 16);
365 rc = E_P24(p21, c8, p24);
292 return rc; 366 return rc;
293} 367}
294 368
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 46d8756f2b24..f2513fb8c391 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -129,7 +129,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
129 unsigned int len = iov[0].iov_len; 129 unsigned int len = iov[0].iov_len;
130 unsigned int total_len; 130 unsigned int total_len;
131 int first_vec = 0; 131 int first_vec = 0;
132 unsigned int smb_buf_length = smb_buffer->smb_buf_length; 132 unsigned int smb_buf_length = be32_to_cpu(smb_buffer->smb_buf_length);
133 struct socket *ssocket = server->ssocket; 133 struct socket *ssocket = server->ssocket;
134 134
135 if (ssocket == NULL) 135 if (ssocket == NULL)
@@ -144,17 +144,10 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
144 else 144 else
145 smb_msg.msg_flags = MSG_NOSIGNAL; 145 smb_msg.msg_flags = MSG_NOSIGNAL;
146 146
147 /* smb header is converted in header_assemble. bcc and rest of SMB word
148 area, and byte area if necessary, is converted to littleendian in
149 cifssmb.c and RFC1001 len is converted to bigendian in smb_send
150 Flags2 is converted in SendReceive */
151
152
153 total_len = 0; 147 total_len = 0;
154 for (i = 0; i < n_vec; i++) 148 for (i = 0; i < n_vec; i++)
155 total_len += iov[i].iov_len; 149 total_len += iov[i].iov_len;
156 150
157 smb_buffer->smb_buf_length = cpu_to_be32(smb_buffer->smb_buf_length);
158 cFYI(1, "Sending smb: total_len %d", total_len); 151 cFYI(1, "Sending smb: total_len %d", total_len);
159 dump_smb(smb_buffer, len); 152 dump_smb(smb_buffer, len);
160 153
@@ -243,7 +236,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
243 236
244 /* Don't want to modify the buffer as a 237 /* Don't want to modify the buffer as a
245 side effect of this call. */ 238 side effect of this call. */
246 smb_buffer->smb_buf_length = smb_buf_length; 239 smb_buffer->smb_buf_length = cpu_to_be32(smb_buf_length);
247 240
248 return rc; 241 return rc;
249} 242}
@@ -387,7 +380,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
387#ifdef CONFIG_CIFS_STATS2 380#ifdef CONFIG_CIFS_STATS2
388 atomic_inc(&server->inSend); 381 atomic_inc(&server->inSend);
389#endif 382#endif
390 rc = smb_send(server, in_buf, in_buf->smb_buf_length); 383 rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
391#ifdef CONFIG_CIFS_STATS2 384#ifdef CONFIG_CIFS_STATS2
392 atomic_dec(&server->inSend); 385 atomic_dec(&server->inSend);
393 mid->when_sent = jiffies; 386 mid->when_sent = jiffies;
@@ -422,7 +415,7 @@ SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
422 int resp_buf_type; 415 int resp_buf_type;
423 416
424 iov[0].iov_base = (char *)in_buf; 417 iov[0].iov_base = (char *)in_buf;
425 iov[0].iov_len = in_buf->smb_buf_length + 4; 418 iov[0].iov_len = be32_to_cpu(in_buf->smb_buf_length) + 4;
426 flags |= CIFS_NO_RESP; 419 flags |= CIFS_NO_RESP;
427 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags); 420 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags);
428 cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc); 421 cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc);
@@ -488,10 +481,10 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
488 int rc = 0; 481 int rc = 0;
489 482
490 /* -4 for RFC1001 length and +2 for BCC field */ 483 /* -4 for RFC1001 length and +2 for BCC field */
491 in_buf->smb_buf_length = sizeof(struct smb_hdr) - 4 + 2; 484 in_buf->smb_buf_length = cpu_to_be32(sizeof(struct smb_hdr) - 4 + 2);
492 in_buf->Command = SMB_COM_NT_CANCEL; 485 in_buf->Command = SMB_COM_NT_CANCEL;
493 in_buf->WordCount = 0; 486 in_buf->WordCount = 0;
494 put_bcc_le(0, in_buf); 487 put_bcc(0, in_buf);
495 488
496 mutex_lock(&server->srv_mutex); 489 mutex_lock(&server->srv_mutex);
497 rc = cifs_sign_smb(in_buf, server, &mid->sequence_number); 490 rc = cifs_sign_smb(in_buf, server, &mid->sequence_number);
@@ -499,7 +492,7 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
499 mutex_unlock(&server->srv_mutex); 492 mutex_unlock(&server->srv_mutex);
500 return rc; 493 return rc;
501 } 494 }
502 rc = smb_send(server, in_buf, in_buf->smb_buf_length); 495 rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
503 mutex_unlock(&server->srv_mutex); 496 mutex_unlock(&server->srv_mutex);
504 497
505 cFYI(1, "issued NT_CANCEL for mid %u, rc = %d", 498 cFYI(1, "issued NT_CANCEL for mid %u, rc = %d",
@@ -612,7 +605,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
612 return rc; 605 return rc;
613 } 606 }
614 607
615 receive_len = midQ->resp_buf->smb_buf_length; 608 receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
616 609
617 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 610 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
618 cERROR(1, "Frame too large received. Length: %d Xid: %d", 611 cERROR(1, "Frame too large received. Length: %d Xid: %d",
@@ -651,11 +644,6 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
651 rc = map_smb_to_linux_error(midQ->resp_buf, 644 rc = map_smb_to_linux_error(midQ->resp_buf,
652 flags & CIFS_LOG_ERROR); 645 flags & CIFS_LOG_ERROR);
653 646
654 /* convert ByteCount if necessary */
655 if (receive_len >= sizeof(struct smb_hdr) - 4
656 /* do not count RFC1001 header */ +
657 (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
658 put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
659 if ((flags & CIFS_NO_RESP) == 0) 647 if ((flags & CIFS_NO_RESP) == 0)
660 midQ->resp_buf = NULL; /* mark it so buf will 648 midQ->resp_buf = NULL; /* mark it so buf will
661 not be freed by 649 not be freed by
@@ -698,9 +686,10 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
698 to the same server. We may make this configurable later or 686 to the same server. We may make this configurable later or
699 use ses->maxReq */ 687 use ses->maxReq */
700 688
701 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 689 if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
690 MAX_CIFS_HDR_SIZE - 4) {
702 cERROR(1, "Illegal length, greater than maximum frame, %d", 691 cERROR(1, "Illegal length, greater than maximum frame, %d",
703 in_buf->smb_buf_length); 692 be32_to_cpu(in_buf->smb_buf_length));
704 return -EIO; 693 return -EIO;
705 } 694 }
706 695
@@ -733,7 +722,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
733#ifdef CONFIG_CIFS_STATS2 722#ifdef CONFIG_CIFS_STATS2
734 atomic_inc(&ses->server->inSend); 723 atomic_inc(&ses->server->inSend);
735#endif 724#endif
736 rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length); 725 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
737#ifdef CONFIG_CIFS_STATS2 726#ifdef CONFIG_CIFS_STATS2
738 atomic_dec(&ses->server->inSend); 727 atomic_dec(&ses->server->inSend);
739 midQ->when_sent = jiffies; 728 midQ->when_sent = jiffies;
@@ -768,7 +757,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
768 return rc; 757 return rc;
769 } 758 }
770 759
771 receive_len = midQ->resp_buf->smb_buf_length; 760 receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
772 761
773 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 762 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
774 cERROR(1, "Frame too large received. Length: %d Xid: %d", 763 cERROR(1, "Frame too large received. Length: %d Xid: %d",
@@ -781,7 +770,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
781 770
782 if (midQ->resp_buf && out_buf 771 if (midQ->resp_buf && out_buf
783 && (midQ->midState == MID_RESPONSE_RECEIVED)) { 772 && (midQ->midState == MID_RESPONSE_RECEIVED)) {
784 out_buf->smb_buf_length = receive_len; 773 out_buf->smb_buf_length = cpu_to_be32(receive_len);
785 memcpy((char *)out_buf + 4, 774 memcpy((char *)out_buf + 4,
786 (char *)midQ->resp_buf + 4, 775 (char *)midQ->resp_buf + 4,
787 receive_len); 776 receive_len);
@@ -800,16 +789,10 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
800 } 789 }
801 } 790 }
802 791
803 *pbytes_returned = out_buf->smb_buf_length; 792 *pbytes_returned = be32_to_cpu(out_buf->smb_buf_length);
804 793
805 /* BB special case reconnect tid and uid here? */ 794 /* BB special case reconnect tid and uid here? */
806 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ ); 795 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
807
808 /* convert ByteCount if necessary */
809 if (receive_len >= sizeof(struct smb_hdr) - 4
810 /* do not count RFC1001 header */ +
811 (2 * out_buf->WordCount) + 2 /* bcc */ )
812 put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
813 } else { 796 } else {
814 rc = -EIO; 797 rc = -EIO;
815 cERROR(1, "Bad MID state?"); 798 cERROR(1, "Bad MID state?");
@@ -877,9 +860,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
877 to the same server. We may make this configurable later or 860 to the same server. We may make this configurable later or
878 use ses->maxReq */ 861 use ses->maxReq */
879 862
880 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 863 if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
864 MAX_CIFS_HDR_SIZE - 4) {
881 cERROR(1, "Illegal length, greater than maximum frame, %d", 865 cERROR(1, "Illegal length, greater than maximum frame, %d",
882 in_buf->smb_buf_length); 866 be32_to_cpu(in_buf->smb_buf_length));
883 return -EIO; 867 return -EIO;
884 } 868 }
885 869
@@ -910,7 +894,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
910#ifdef CONFIG_CIFS_STATS2 894#ifdef CONFIG_CIFS_STATS2
911 atomic_inc(&ses->server->inSend); 895 atomic_inc(&ses->server->inSend);
912#endif 896#endif
913 rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length); 897 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
914#ifdef CONFIG_CIFS_STATS2 898#ifdef CONFIG_CIFS_STATS2
915 atomic_dec(&ses->server->inSend); 899 atomic_dec(&ses->server->inSend);
916 midQ->when_sent = jiffies; 900 midQ->when_sent = jiffies;
@@ -977,7 +961,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
977 if (rc != 0) 961 if (rc != 0)
978 return rc; 962 return rc;
979 963
980 receive_len = midQ->resp_buf->smb_buf_length; 964 receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
981 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 965 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
982 cERROR(1, "Frame too large received. Length: %d Xid: %d", 966 cERROR(1, "Frame too large received. Length: %d Xid: %d",
983 receive_len, xid); 967 receive_len, xid);
@@ -993,7 +977,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
993 goto out; 977 goto out;
994 } 978 }
995 979
996 out_buf->smb_buf_length = receive_len; 980 out_buf->smb_buf_length = cpu_to_be32(receive_len);
997 memcpy((char *)out_buf + 4, 981 memcpy((char *)out_buf + 4,
998 (char *)midQ->resp_buf + 4, 982 (char *)midQ->resp_buf + 4,
999 receive_len); 983 receive_len);
@@ -1012,17 +996,11 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
1012 } 996 }
1013 } 997 }
1014 998
1015 *pbytes_returned = out_buf->smb_buf_length; 999 *pbytes_returned = be32_to_cpu(out_buf->smb_buf_length);
1016 1000
1017 /* BB special case reconnect tid and uid here? */ 1001 /* BB special case reconnect tid and uid here? */
1018 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ ); 1002 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
1019 1003
1020 /* convert ByteCount if necessary */
1021 if (receive_len >= sizeof(struct smb_hdr) - 4
1022 /* do not count RFC1001 header */ +
1023 (2 * out_buf->WordCount) + 2 /* bcc */ )
1024 put_bcc(get_bcc_le(out_buf), out_buf);
1025
1026out: 1004out:
1027 delete_mid(midQ); 1005 delete_mid(midQ);
1028 if (rstart && rc == -EACCES) 1006 if (rstart && rc == -EACCES)
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index eae2a1491608..912995e013ec 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -112,6 +112,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
112 struct cifsTconInfo *pTcon; 112 struct cifsTconInfo *pTcon;
113 struct super_block *sb; 113 struct super_block *sb;
114 char *full_path; 114 char *full_path;
115 struct cifs_ntsd *pacl;
115 116
116 if (direntry == NULL) 117 if (direntry == NULL)
117 return -EIO; 118 return -EIO;
@@ -166,6 +167,25 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
166 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, 167 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
167 (__u16)value_size, cifs_sb->local_nls, 168 (__u16)value_size, cifs_sb->local_nls,
168 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 169 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
170 } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
171 strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
172 pacl = kmalloc(value_size, GFP_KERNEL);
173 if (!pacl) {
174 cFYI(1, "%s: Can't allocate memory for ACL",
175 __func__);
176 rc = -ENOMEM;
177 } else {
178#ifdef CONFIG_CIFS_ACL
179 memcpy(pacl, ea_value, value_size);
180 rc = set_cifs_acl(pacl, value_size,
181 direntry->d_inode, full_path);
182 if (rc == 0) /* force revalidate of the inode */
183 CIFS_I(direntry->d_inode)->time = 0;
184 kfree(pacl);
185#else
186 cFYI(1, "Set CIFS ACL not supported yet");
187#endif /* CONFIG_CIFS_ACL */
188 }
169 } else { 189 } else {
170 int temp; 190 int temp;
171 temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS, 191 temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
diff --git a/fs/compat.c b/fs/compat.c
index 72fe6cda9108..0ea00832de23 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1306,241 +1306,6 @@ compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int
1306 return do_sys_open(dfd, filename, flags, mode); 1306 return do_sys_open(dfd, filename, flags, mode);
1307} 1307}
1308 1308
1309/*
1310 * compat_count() counts the number of arguments/envelopes. It is basically
1311 * a copy of count() from fs/exec.c, except that it works with 32 bit argv
1312 * and envp pointers.
1313 */
1314static int compat_count(compat_uptr_t __user *argv, int max)
1315{
1316 int i = 0;
1317
1318 if (argv != NULL) {
1319 for (;;) {
1320 compat_uptr_t p;
1321
1322 if (get_user(p, argv))
1323 return -EFAULT;
1324 if (!p)
1325 break;
1326 argv++;
1327 if (i++ >= max)
1328 return -E2BIG;
1329
1330 if (fatal_signal_pending(current))
1331 return -ERESTARTNOHAND;
1332 cond_resched();
1333 }
1334 }
1335 return i;
1336}
1337
1338/*
1339 * compat_copy_strings() is basically a copy of copy_strings() from fs/exec.c
1340 * except that it works with 32 bit argv and envp pointers.
1341 */
1342static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
1343 struct linux_binprm *bprm)
1344{
1345 struct page *kmapped_page = NULL;
1346 char *kaddr = NULL;
1347 unsigned long kpos = 0;
1348 int ret;
1349
1350 while (argc-- > 0) {
1351 compat_uptr_t str;
1352 int len;
1353 unsigned long pos;
1354
1355 if (get_user(str, argv+argc) ||
1356 !(len = strnlen_user(compat_ptr(str), MAX_ARG_STRLEN))) {
1357 ret = -EFAULT;
1358 goto out;
1359 }
1360
1361 if (len > MAX_ARG_STRLEN) {
1362 ret = -E2BIG;
1363 goto out;
1364 }
1365
1366 /* We're going to work our way backwords. */
1367 pos = bprm->p;
1368 str += len;
1369 bprm->p -= len;
1370
1371 while (len > 0) {
1372 int offset, bytes_to_copy;
1373
1374 if (fatal_signal_pending(current)) {
1375 ret = -ERESTARTNOHAND;
1376 goto out;
1377 }
1378 cond_resched();
1379
1380 offset = pos % PAGE_SIZE;
1381 if (offset == 0)
1382 offset = PAGE_SIZE;
1383
1384 bytes_to_copy = offset;
1385 if (bytes_to_copy > len)
1386 bytes_to_copy = len;
1387
1388 offset -= bytes_to_copy;
1389 pos -= bytes_to_copy;
1390 str -= bytes_to_copy;
1391 len -= bytes_to_copy;
1392
1393 if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
1394 struct page *page;
1395
1396 page = get_arg_page(bprm, pos, 1);
1397 if (!page) {
1398 ret = -E2BIG;
1399 goto out;
1400 }
1401
1402 if (kmapped_page) {
1403 flush_kernel_dcache_page(kmapped_page);
1404 kunmap(kmapped_page);
1405 put_page(kmapped_page);
1406 }
1407 kmapped_page = page;
1408 kaddr = kmap(kmapped_page);
1409 kpos = pos & PAGE_MASK;
1410 flush_cache_page(bprm->vma, kpos,
1411 page_to_pfn(kmapped_page));
1412 }
1413 if (copy_from_user(kaddr+offset, compat_ptr(str),
1414 bytes_to_copy)) {
1415 ret = -EFAULT;
1416 goto out;
1417 }
1418 }
1419 }
1420 ret = 0;
1421out:
1422 if (kmapped_page) {
1423 flush_kernel_dcache_page(kmapped_page);
1424 kunmap(kmapped_page);
1425 put_page(kmapped_page);
1426 }
1427 return ret;
1428}
1429
1430/*
1431 * compat_do_execve() is mostly a copy of do_execve(), with the exception
1432 * that it processes 32 bit argv and envp pointers.
1433 */
1434int compat_do_execve(char * filename,
1435 compat_uptr_t __user *argv,
1436 compat_uptr_t __user *envp,
1437 struct pt_regs * regs)
1438{
1439 struct linux_binprm *bprm;
1440 struct file *file;
1441 struct files_struct *displaced;
1442 bool clear_in_exec;
1443 int retval;
1444
1445 retval = unshare_files(&displaced);
1446 if (retval)
1447 goto out_ret;
1448
1449 retval = -ENOMEM;
1450 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1451 if (!bprm)
1452 goto out_files;
1453
1454 retval = prepare_bprm_creds(bprm);
1455 if (retval)
1456 goto out_free;
1457
1458 retval = check_unsafe_exec(bprm);
1459 if (retval < 0)
1460 goto out_free;
1461 clear_in_exec = retval;
1462 current->in_execve = 1;
1463
1464 file = open_exec(filename);
1465 retval = PTR_ERR(file);
1466 if (IS_ERR(file))
1467 goto out_unmark;
1468
1469 sched_exec();
1470
1471 bprm->file = file;
1472 bprm->filename = filename;
1473 bprm->interp = filename;
1474
1475 retval = bprm_mm_init(bprm);
1476 if (retval)
1477 goto out_file;
1478
1479 bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
1480 if ((retval = bprm->argc) < 0)
1481 goto out;
1482
1483 bprm->envc = compat_count(envp, MAX_ARG_STRINGS);
1484 if ((retval = bprm->envc) < 0)
1485 goto out;
1486
1487 retval = prepare_binprm(bprm);
1488 if (retval < 0)
1489 goto out;
1490
1491 retval = copy_strings_kernel(1, &bprm->filename, bprm);
1492 if (retval < 0)
1493 goto out;
1494
1495 bprm->exec = bprm->p;
1496 retval = compat_copy_strings(bprm->envc, envp, bprm);
1497 if (retval < 0)
1498 goto out;
1499
1500 retval = compat_copy_strings(bprm->argc, argv, bprm);
1501 if (retval < 0)
1502 goto out;
1503
1504 retval = search_binary_handler(bprm, regs);
1505 if (retval < 0)
1506 goto out;
1507
1508 /* execve succeeded */
1509 current->fs->in_exec = 0;
1510 current->in_execve = 0;
1511 acct_update_integrals(current);
1512 free_bprm(bprm);
1513 if (displaced)
1514 put_files_struct(displaced);
1515 return retval;
1516
1517out:
1518 if (bprm->mm) {
1519 acct_arg_size(bprm, 0);
1520 mmput(bprm->mm);
1521 }
1522
1523out_file:
1524 if (bprm->file) {
1525 allow_write_access(bprm->file);
1526 fput(bprm->file);
1527 }
1528
1529out_unmark:
1530 if (clear_in_exec)
1531 current->fs->in_exec = 0;
1532 current->in_execve = 0;
1533
1534out_free:
1535 free_bprm(bprm);
1536
1537out_files:
1538 if (displaced)
1539 reset_files_struct(displaced);
1540out_ret:
1541 return retval;
1542}
1543
1544#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) 1309#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
1545 1310
1546static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, 1311static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
diff --git a/fs/dcache.c b/fs/dcache.c
index 22a0ef41bad1..37f72ee5bf7c 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -35,6 +35,7 @@
35#include <linux/hardirq.h> 35#include <linux/hardirq.h>
36#include <linux/bit_spinlock.h> 36#include <linux/bit_spinlock.h>
37#include <linux/rculist_bl.h> 37#include <linux/rculist_bl.h>
38#include <linux/prefetch.h>
38#include "internal.h" 39#include "internal.h"
39 40
40/* 41/*
@@ -1219,7 +1220,7 @@ void shrink_dcache_parent(struct dentry * parent)
1219EXPORT_SYMBOL(shrink_dcache_parent); 1220EXPORT_SYMBOL(shrink_dcache_parent);
1220 1221
1221/* 1222/*
1222 * Scan `nr' dentries and return the number which remain. 1223 * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
1223 * 1224 *
1224 * We need to avoid reentering the filesystem if the caller is performing a 1225 * We need to avoid reentering the filesystem if the caller is performing a
1225 * GFP_NOFS allocation attempt. One example deadlock is: 1226 * GFP_NOFS allocation attempt. One example deadlock is:
@@ -1230,8 +1231,12 @@ EXPORT_SYMBOL(shrink_dcache_parent);
1230 * 1231 *
1231 * In this case we return -1 to tell the caller that we baled. 1232 * In this case we return -1 to tell the caller that we baled.
1232 */ 1233 */
1233static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 1234static int shrink_dcache_memory(struct shrinker *shrink,
1235 struct shrink_control *sc)
1234{ 1236{
1237 int nr = sc->nr_to_scan;
1238 gfp_t gfp_mask = sc->gfp_mask;
1239
1235 if (nr) { 1240 if (nr) {
1236 if (!(gfp_mask & __GFP_FS)) 1241 if (!(gfp_mask & __GFP_FS))
1237 return -1; 1242 return -1;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 89d394d8fe24..90f76575c056 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -428,26 +428,17 @@ static ssize_t write_file_bool(struct file *file, const char __user *user_buf,
428 size_t count, loff_t *ppos) 428 size_t count, loff_t *ppos)
429{ 429{
430 char buf[32]; 430 char buf[32];
431 int buf_size; 431 size_t buf_size;
432 bool bv;
432 u32 *val = file->private_data; 433 u32 *val = file->private_data;
433 434
434 buf_size = min(count, (sizeof(buf)-1)); 435 buf_size = min(count, (sizeof(buf)-1));
435 if (copy_from_user(buf, user_buf, buf_size)) 436 if (copy_from_user(buf, user_buf, buf_size))
436 return -EFAULT; 437 return -EFAULT;
437 438
438 switch (buf[0]) { 439 if (strtobool(buf, &bv) == 0)
439 case 'y': 440 *val = bv;
440 case 'Y': 441
441 case '1':
442 *val = 1;
443 break;
444 case 'n':
445 case 'N':
446 case '0':
447 *val = 0;
448 break;
449 }
450
451 return count; 442 return count;
452} 443}
453 444
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 0d329ff8ed4c..9b026ea8baa9 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -100,6 +100,7 @@ struct dlm_cluster {
100 unsigned int cl_log_debug; 100 unsigned int cl_log_debug;
101 unsigned int cl_protocol; 101 unsigned int cl_protocol;
102 unsigned int cl_timewarn_cs; 102 unsigned int cl_timewarn_cs;
103 unsigned int cl_waitwarn_us;
103}; 104};
104 105
105enum { 106enum {
@@ -114,6 +115,7 @@ enum {
114 CLUSTER_ATTR_LOG_DEBUG, 115 CLUSTER_ATTR_LOG_DEBUG,
115 CLUSTER_ATTR_PROTOCOL, 116 CLUSTER_ATTR_PROTOCOL,
116 CLUSTER_ATTR_TIMEWARN_CS, 117 CLUSTER_ATTR_TIMEWARN_CS,
118 CLUSTER_ATTR_WAITWARN_US,
117}; 119};
118 120
119struct cluster_attribute { 121struct cluster_attribute {
@@ -166,6 +168,7 @@ CLUSTER_ATTR(scan_secs, 1);
166CLUSTER_ATTR(log_debug, 0); 168CLUSTER_ATTR(log_debug, 0);
167CLUSTER_ATTR(protocol, 0); 169CLUSTER_ATTR(protocol, 0);
168CLUSTER_ATTR(timewarn_cs, 1); 170CLUSTER_ATTR(timewarn_cs, 1);
171CLUSTER_ATTR(waitwarn_us, 0);
169 172
170static struct configfs_attribute *cluster_attrs[] = { 173static struct configfs_attribute *cluster_attrs[] = {
171 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, 174 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -179,6 +182,7 @@ static struct configfs_attribute *cluster_attrs[] = {
179 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, 182 [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
180 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, 183 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
181 [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, 184 [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
185 [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr,
182 NULL, 186 NULL,
183}; 187};
184 188
@@ -439,6 +443,7 @@ static struct config_group *make_cluster(struct config_group *g,
439 cl->cl_log_debug = dlm_config.ci_log_debug; 443 cl->cl_log_debug = dlm_config.ci_log_debug;
440 cl->cl_protocol = dlm_config.ci_protocol; 444 cl->cl_protocol = dlm_config.ci_protocol;
441 cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; 445 cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
446 cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
442 447
443 space_list = &sps->ss_group; 448 space_list = &sps->ss_group;
444 comm_list = &cms->cs_group; 449 comm_list = &cms->cs_group;
@@ -986,6 +991,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
986#define DEFAULT_LOG_DEBUG 0 991#define DEFAULT_LOG_DEBUG 0
987#define DEFAULT_PROTOCOL 0 992#define DEFAULT_PROTOCOL 0
988#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ 993#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
994#define DEFAULT_WAITWARN_US 0
989 995
990struct dlm_config_info dlm_config = { 996struct dlm_config_info dlm_config = {
991 .ci_tcp_port = DEFAULT_TCP_PORT, 997 .ci_tcp_port = DEFAULT_TCP_PORT,
@@ -998,6 +1004,7 @@ struct dlm_config_info dlm_config = {
998 .ci_scan_secs = DEFAULT_SCAN_SECS, 1004 .ci_scan_secs = DEFAULT_SCAN_SECS,
999 .ci_log_debug = DEFAULT_LOG_DEBUG, 1005 .ci_log_debug = DEFAULT_LOG_DEBUG,
1000 .ci_protocol = DEFAULT_PROTOCOL, 1006 .ci_protocol = DEFAULT_PROTOCOL,
1001 .ci_timewarn_cs = DEFAULT_TIMEWARN_CS 1007 .ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
1008 .ci_waitwarn_us = DEFAULT_WAITWARN_US
1002}; 1009};
1003 1010
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 4f1d6fce58c5..dd0ce24d5a80 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -28,6 +28,7 @@ struct dlm_config_info {
28 int ci_log_debug; 28 int ci_log_debug;
29 int ci_protocol; 29 int ci_protocol;
30 int ci_timewarn_cs; 30 int ci_timewarn_cs;
31 int ci_waitwarn_us;
31}; 32};
32 33
33extern struct dlm_config_info dlm_config; 34extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index b94204913011..0262451eb9c6 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -209,6 +209,7 @@ struct dlm_args {
209#define DLM_IFL_WATCH_TIMEWARN 0x00400000 209#define DLM_IFL_WATCH_TIMEWARN 0x00400000
210#define DLM_IFL_TIMEOUT_CANCEL 0x00800000 210#define DLM_IFL_TIMEOUT_CANCEL 0x00800000
211#define DLM_IFL_DEADLOCK_CANCEL 0x01000000 211#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
212#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */
212#define DLM_IFL_USER 0x00000001 213#define DLM_IFL_USER 0x00000001
213#define DLM_IFL_ORPHAN 0x00000002 214#define DLM_IFL_ORPHAN 0x00000002
214 215
@@ -245,6 +246,7 @@ struct dlm_lkb {
245 246
246 int8_t lkb_wait_type; /* type of reply waiting for */ 247 int8_t lkb_wait_type; /* type of reply waiting for */
247 int8_t lkb_wait_count; 248 int8_t lkb_wait_count;
249 int lkb_wait_nodeid; /* for debugging */
248 250
249 struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ 251 struct list_head lkb_idtbl_list; /* lockspace lkbtbl */
250 struct list_head lkb_statequeue; /* rsb g/c/w list */ 252 struct list_head lkb_statequeue; /* rsb g/c/w list */
@@ -254,6 +256,7 @@ struct dlm_lkb {
254 struct list_head lkb_ownqueue; /* list of locks for a process */ 256 struct list_head lkb_ownqueue; /* list of locks for a process */
255 struct list_head lkb_time_list; 257 struct list_head lkb_time_list;
256 ktime_t lkb_timestamp; 258 ktime_t lkb_timestamp;
259 ktime_t lkb_wait_time;
257 unsigned long lkb_timeout_cs; 260 unsigned long lkb_timeout_cs;
258 261
259 struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE]; 262 struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE];
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 56d6bfcc1e48..f71d0b5abd95 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -799,10 +799,84 @@ static int msg_reply_type(int mstype)
799 return -1; 799 return -1;
800} 800}
801 801
802static int nodeid_warned(int nodeid, int num_nodes, int *warned)
803{
804 int i;
805
806 for (i = 0; i < num_nodes; i++) {
807 if (!warned[i]) {
808 warned[i] = nodeid;
809 return 0;
810 }
811 if (warned[i] == nodeid)
812 return 1;
813 }
814 return 0;
815}
816
817void dlm_scan_waiters(struct dlm_ls *ls)
818{
819 struct dlm_lkb *lkb;
820 ktime_t zero = ktime_set(0, 0);
821 s64 us;
822 s64 debug_maxus = 0;
823 u32 debug_scanned = 0;
824 u32 debug_expired = 0;
825 int num_nodes = 0;
826 int *warned = NULL;
827
828 if (!dlm_config.ci_waitwarn_us)
829 return;
830
831 mutex_lock(&ls->ls_waiters_mutex);
832
833 list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
834 if (ktime_equal(lkb->lkb_wait_time, zero))
835 continue;
836
837 debug_scanned++;
838
839 us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time));
840
841 if (us < dlm_config.ci_waitwarn_us)
842 continue;
843
844 lkb->lkb_wait_time = zero;
845
846 debug_expired++;
847 if (us > debug_maxus)
848 debug_maxus = us;
849
850 if (!num_nodes) {
851 num_nodes = ls->ls_num_nodes;
852 warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int));
853 if (warned)
854 memset(warned, 0, num_nodes * sizeof(int));
855 }
856 if (!warned)
857 continue;
858 if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned))
859 continue;
860
861 log_error(ls, "waitwarn %x %lld %d us check connection to "
862 "node %d", lkb->lkb_id, (long long)us,
863 dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
864 }
865 mutex_unlock(&ls->ls_waiters_mutex);
866
867 if (warned)
868 kfree(warned);
869
870 if (debug_expired)
871 log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
872 debug_scanned, debug_expired,
873 dlm_config.ci_waitwarn_us, (long long)debug_maxus);
874}
875
802/* add/remove lkb from global waiters list of lkb's waiting for 876/* add/remove lkb from global waiters list of lkb's waiting for
803 a reply from a remote node */ 877 a reply from a remote node */
804 878
805static int add_to_waiters(struct dlm_lkb *lkb, int mstype) 879static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
806{ 880{
807 struct dlm_ls *ls = lkb->lkb_resource->res_ls; 881 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
808 int error = 0; 882 int error = 0;
@@ -842,6 +916,8 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype)
842 916
843 lkb->lkb_wait_count++; 917 lkb->lkb_wait_count++;
844 lkb->lkb_wait_type = mstype; 918 lkb->lkb_wait_type = mstype;
919 lkb->lkb_wait_time = ktime_get();
920 lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
845 hold_lkb(lkb); 921 hold_lkb(lkb);
846 list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); 922 list_add(&lkb->lkb_wait_reply, &ls->ls_waiters);
847 out: 923 out:
@@ -961,10 +1037,10 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms)
961 struct dlm_ls *ls = lkb->lkb_resource->res_ls; 1037 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
962 int error; 1038 int error;
963 1039
964 if (ms != &ls->ls_stub_ms) 1040 if (ms->m_flags != DLM_IFL_STUB_MS)
965 mutex_lock(&ls->ls_waiters_mutex); 1041 mutex_lock(&ls->ls_waiters_mutex);
966 error = _remove_from_waiters(lkb, ms->m_type, ms); 1042 error = _remove_from_waiters(lkb, ms->m_type, ms);
967 if (ms != &ls->ls_stub_ms) 1043 if (ms->m_flags != DLM_IFL_STUB_MS)
968 mutex_unlock(&ls->ls_waiters_mutex); 1044 mutex_unlock(&ls->ls_waiters_mutex);
969 return error; 1045 return error;
970} 1046}
@@ -1157,6 +1233,16 @@ void dlm_adjust_timeouts(struct dlm_ls *ls)
1157 list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) 1233 list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
1158 lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us); 1234 lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us);
1159 mutex_unlock(&ls->ls_timeout_mutex); 1235 mutex_unlock(&ls->ls_timeout_mutex);
1236
1237 if (!dlm_config.ci_waitwarn_us)
1238 return;
1239
1240 mutex_lock(&ls->ls_waiters_mutex);
1241 list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
1242 if (ktime_to_us(lkb->lkb_wait_time))
1243 lkb->lkb_wait_time = ktime_get();
1244 }
1245 mutex_unlock(&ls->ls_waiters_mutex);
1160} 1246}
1161 1247
1162/* lkb is master or local copy */ 1248/* lkb is master or local copy */
@@ -1376,14 +1462,8 @@ static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb)
1376 ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become 1462 ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become
1377 compatible with other granted locks */ 1463 compatible with other granted locks */
1378 1464
1379static void munge_demoted(struct dlm_lkb *lkb, struct dlm_message *ms) 1465static void munge_demoted(struct dlm_lkb *lkb)
1380{ 1466{
1381 if (ms->m_type != DLM_MSG_CONVERT_REPLY) {
1382 log_print("munge_demoted %x invalid reply type %d",
1383 lkb->lkb_id, ms->m_type);
1384 return;
1385 }
1386
1387 if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) { 1467 if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) {
1388 log_print("munge_demoted %x invalid modes gr %d rq %d", 1468 log_print("munge_demoted %x invalid modes gr %d rq %d",
1389 lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode); 1469 lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode);
@@ -2844,12 +2924,12 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
2844 struct dlm_mhandle *mh; 2924 struct dlm_mhandle *mh;
2845 int to_nodeid, error; 2925 int to_nodeid, error;
2846 2926
2847 error = add_to_waiters(lkb, mstype); 2927 to_nodeid = r->res_nodeid;
2928
2929 error = add_to_waiters(lkb, mstype, to_nodeid);
2848 if (error) 2930 if (error)
2849 return error; 2931 return error;
2850 2932
2851 to_nodeid = r->res_nodeid;
2852
2853 error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); 2933 error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh);
2854 if (error) 2934 if (error)
2855 goto fail; 2935 goto fail;
@@ -2880,9 +2960,9 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2880 /* down conversions go without a reply from the master */ 2960 /* down conversions go without a reply from the master */
2881 if (!error && down_conversion(lkb)) { 2961 if (!error && down_conversion(lkb)) {
2882 remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY); 2962 remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY);
2963 r->res_ls->ls_stub_ms.m_flags = DLM_IFL_STUB_MS;
2883 r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; 2964 r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY;
2884 r->res_ls->ls_stub_ms.m_result = 0; 2965 r->res_ls->ls_stub_ms.m_result = 0;
2885 r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags;
2886 __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); 2966 __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms);
2887 } 2967 }
2888 2968
@@ -2951,12 +3031,12 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb)
2951 struct dlm_mhandle *mh; 3031 struct dlm_mhandle *mh;
2952 int to_nodeid, error; 3032 int to_nodeid, error;
2953 3033
2954 error = add_to_waiters(lkb, DLM_MSG_LOOKUP); 3034 to_nodeid = dlm_dir_nodeid(r);
3035
3036 error = add_to_waiters(lkb, DLM_MSG_LOOKUP, to_nodeid);
2955 if (error) 3037 if (error)
2956 return error; 3038 return error;
2957 3039
2958 to_nodeid = dlm_dir_nodeid(r);
2959
2960 error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh); 3040 error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh);
2961 if (error) 3041 if (error)
2962 goto fail; 3042 goto fail;
@@ -3070,6 +3150,9 @@ static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms)
3070 3150
3071static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms) 3151static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
3072{ 3152{
3153 if (ms->m_flags == DLM_IFL_STUB_MS)
3154 return;
3155
3073 lkb->lkb_sbflags = ms->m_sbflags; 3156 lkb->lkb_sbflags = ms->m_sbflags;
3074 lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | 3157 lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) |
3075 (ms->m_flags & 0x0000FFFF); 3158 (ms->m_flags & 0x0000FFFF);
@@ -3612,7 +3695,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
3612 /* convert was queued on remote master */ 3695 /* convert was queued on remote master */
3613 receive_flags_reply(lkb, ms); 3696 receive_flags_reply(lkb, ms);
3614 if (is_demoted(lkb)) 3697 if (is_demoted(lkb))
3615 munge_demoted(lkb, ms); 3698 munge_demoted(lkb);
3616 del_lkb(r, lkb); 3699 del_lkb(r, lkb);
3617 add_lkb(r, lkb, DLM_LKSTS_CONVERT); 3700 add_lkb(r, lkb, DLM_LKSTS_CONVERT);
3618 add_timeout(lkb); 3701 add_timeout(lkb);
@@ -3622,7 +3705,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
3622 /* convert was granted on remote master */ 3705 /* convert was granted on remote master */
3623 receive_flags_reply(lkb, ms); 3706 receive_flags_reply(lkb, ms);
3624 if (is_demoted(lkb)) 3707 if (is_demoted(lkb))
3625 munge_demoted(lkb, ms); 3708 munge_demoted(lkb);
3626 grant_lock_pc(r, lkb, ms); 3709 grant_lock_pc(r, lkb, ms);
3627 queue_cast(r, lkb, 0); 3710 queue_cast(r, lkb, 0);
3628 break; 3711 break;
@@ -3996,15 +4079,17 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid)
3996 dlm_put_lockspace(ls); 4079 dlm_put_lockspace(ls);
3997} 4080}
3998 4081
3999static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) 4082static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb,
4083 struct dlm_message *ms_stub)
4000{ 4084{
4001 if (middle_conversion(lkb)) { 4085 if (middle_conversion(lkb)) {
4002 hold_lkb(lkb); 4086 hold_lkb(lkb);
4003 ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; 4087 memset(ms_stub, 0, sizeof(struct dlm_message));
4004 ls->ls_stub_ms.m_result = -EINPROGRESS; 4088 ms_stub->m_flags = DLM_IFL_STUB_MS;
4005 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 4089 ms_stub->m_type = DLM_MSG_CONVERT_REPLY;
4006 ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; 4090 ms_stub->m_result = -EINPROGRESS;
4007 _receive_convert_reply(lkb, &ls->ls_stub_ms); 4091 ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
4092 _receive_convert_reply(lkb, ms_stub);
4008 4093
4009 /* Same special case as in receive_rcom_lock_args() */ 4094 /* Same special case as in receive_rcom_lock_args() */
4010 lkb->lkb_grmode = DLM_LOCK_IV; 4095 lkb->lkb_grmode = DLM_LOCK_IV;
@@ -4045,13 +4130,27 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb)
4045void dlm_recover_waiters_pre(struct dlm_ls *ls) 4130void dlm_recover_waiters_pre(struct dlm_ls *ls)
4046{ 4131{
4047 struct dlm_lkb *lkb, *safe; 4132 struct dlm_lkb *lkb, *safe;
4133 struct dlm_message *ms_stub;
4048 int wait_type, stub_unlock_result, stub_cancel_result; 4134 int wait_type, stub_unlock_result, stub_cancel_result;
4049 4135
4136 ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message));
4137 if (!ms_stub) {
4138 log_error(ls, "dlm_recover_waiters_pre no mem");
4139 return;
4140 }
4141
4050 mutex_lock(&ls->ls_waiters_mutex); 4142 mutex_lock(&ls->ls_waiters_mutex);
4051 4143
4052 list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) { 4144 list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) {
4053 log_debug(ls, "pre recover waiter lkid %x type %d flags %x", 4145
4054 lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags); 4146 /* exclude debug messages about unlocks because there can be so
4147 many and they aren't very interesting */
4148
4149 if (lkb->lkb_wait_type != DLM_MSG_UNLOCK) {
4150 log_debug(ls, "recover_waiter %x nodeid %d "
4151 "msg %d to %d", lkb->lkb_id, lkb->lkb_nodeid,
4152 lkb->lkb_wait_type, lkb->lkb_wait_nodeid);
4153 }
4055 4154
4056 /* all outstanding lookups, regardless of destination will be 4155 /* all outstanding lookups, regardless of destination will be
4057 resent after recovery is done */ 4156 resent after recovery is done */
@@ -4097,26 +4196,28 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
4097 break; 4196 break;
4098 4197
4099 case DLM_MSG_CONVERT: 4198 case DLM_MSG_CONVERT:
4100 recover_convert_waiter(ls, lkb); 4199 recover_convert_waiter(ls, lkb, ms_stub);
4101 break; 4200 break;
4102 4201
4103 case DLM_MSG_UNLOCK: 4202 case DLM_MSG_UNLOCK:
4104 hold_lkb(lkb); 4203 hold_lkb(lkb);
4105 ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY; 4204 memset(ms_stub, 0, sizeof(struct dlm_message));
4106 ls->ls_stub_ms.m_result = stub_unlock_result; 4205 ms_stub->m_flags = DLM_IFL_STUB_MS;
4107 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 4206 ms_stub->m_type = DLM_MSG_UNLOCK_REPLY;
4108 ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; 4207 ms_stub->m_result = stub_unlock_result;
4109 _receive_unlock_reply(lkb, &ls->ls_stub_ms); 4208 ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
4209 _receive_unlock_reply(lkb, ms_stub);
4110 dlm_put_lkb(lkb); 4210 dlm_put_lkb(lkb);
4111 break; 4211 break;
4112 4212
4113 case DLM_MSG_CANCEL: 4213 case DLM_MSG_CANCEL:
4114 hold_lkb(lkb); 4214 hold_lkb(lkb);
4115 ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY; 4215 memset(ms_stub, 0, sizeof(struct dlm_message));
4116 ls->ls_stub_ms.m_result = stub_cancel_result; 4216 ms_stub->m_flags = DLM_IFL_STUB_MS;
4117 ls->ls_stub_ms.m_flags = lkb->lkb_flags; 4217 ms_stub->m_type = DLM_MSG_CANCEL_REPLY;
4118 ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; 4218 ms_stub->m_result = stub_cancel_result;
4119 _receive_cancel_reply(lkb, &ls->ls_stub_ms); 4219 ms_stub->m_header.h_nodeid = lkb->lkb_nodeid;
4220 _receive_cancel_reply(lkb, ms_stub);
4120 dlm_put_lkb(lkb); 4221 dlm_put_lkb(lkb);
4121 break; 4222 break;
4122 4223
@@ -4127,6 +4228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
4127 schedule(); 4228 schedule();
4128 } 4229 }
4129 mutex_unlock(&ls->ls_waiters_mutex); 4230 mutex_unlock(&ls->ls_waiters_mutex);
4231 kfree(ms_stub);
4130} 4232}
4131 4233
4132static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls) 4234static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
@@ -4191,8 +4293,8 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
4191 ou = is_overlap_unlock(lkb); 4293 ou = is_overlap_unlock(lkb);
4192 err = 0; 4294 err = 0;
4193 4295
4194 log_debug(ls, "recover_waiters_post %x type %d flags %x %s", 4296 log_debug(ls, "recover_waiter %x nodeid %d msg %d r_nodeid %d",
4195 lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name); 4297 lkb->lkb_id, lkb->lkb_nodeid, mstype, r->res_nodeid);
4196 4298
4197 /* At this point we assume that we won't get a reply to any 4299 /* At this point we assume that we won't get a reply to any
4198 previous op or overlap op on this lock. First, do a big 4300 previous op or overlap op on this lock. First, do a big
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 88e93c80cc22..265017a7c3e7 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -24,6 +24,7 @@ int dlm_put_lkb(struct dlm_lkb *lkb);
24void dlm_scan_rsbs(struct dlm_ls *ls); 24void dlm_scan_rsbs(struct dlm_ls *ls);
25int dlm_lock_recovery_try(struct dlm_ls *ls); 25int dlm_lock_recovery_try(struct dlm_ls *ls);
26void dlm_unlock_recovery(struct dlm_ls *ls); 26void dlm_unlock_recovery(struct dlm_ls *ls);
27void dlm_scan_waiters(struct dlm_ls *ls);
27void dlm_scan_timeout(struct dlm_ls *ls); 28void dlm_scan_timeout(struct dlm_ls *ls);
28void dlm_adjust_timeouts(struct dlm_ls *ls); 29void dlm_adjust_timeouts(struct dlm_ls *ls);
29 30
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index f994a7dfda85..14cbf4099753 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -243,7 +243,6 @@ static struct dlm_ls *find_ls_to_scan(void)
243static int dlm_scand(void *data) 243static int dlm_scand(void *data)
244{ 244{
245 struct dlm_ls *ls; 245 struct dlm_ls *ls;
246 int timeout_jiffies = dlm_config.ci_scan_secs * HZ;
247 246
248 while (!kthread_should_stop()) { 247 while (!kthread_should_stop()) {
249 ls = find_ls_to_scan(); 248 ls = find_ls_to_scan();
@@ -252,13 +251,14 @@ static int dlm_scand(void *data)
252 ls->ls_scan_time = jiffies; 251 ls->ls_scan_time = jiffies;
253 dlm_scan_rsbs(ls); 252 dlm_scan_rsbs(ls);
254 dlm_scan_timeout(ls); 253 dlm_scan_timeout(ls);
254 dlm_scan_waiters(ls);
255 dlm_unlock_recovery(ls); 255 dlm_unlock_recovery(ls);
256 } else { 256 } else {
257 ls->ls_scan_time += HZ; 257 ls->ls_scan_time += HZ;
258 } 258 }
259 } else { 259 continue;
260 schedule_timeout_interruptible(timeout_jiffies);
261 } 260 }
261 schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
262 } 262 }
263 return 0; 263 return 0;
264} 264}
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 30d8b85febbf..e2b878004364 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -71,6 +71,36 @@ static void send_op(struct plock_op *op)
71 wake_up(&send_wq); 71 wake_up(&send_wq);
72} 72}
73 73
74/* If a process was killed while waiting for the only plock on a file,
75 locks_remove_posix will not see any lock on the file so it won't
76 send an unlock-close to us to pass on to userspace to clean up the
77 abandoned waiter. So, we have to insert the unlock-close when the
78 lock call is interrupted. */
79
80static void do_unlock_close(struct dlm_ls *ls, u64 number,
81 struct file *file, struct file_lock *fl)
82{
83 struct plock_op *op;
84
85 op = kzalloc(sizeof(*op), GFP_NOFS);
86 if (!op)
87 return;
88
89 op->info.optype = DLM_PLOCK_OP_UNLOCK;
90 op->info.pid = fl->fl_pid;
91 op->info.fsid = ls->ls_global_id;
92 op->info.number = number;
93 op->info.start = 0;
94 op->info.end = OFFSET_MAX;
95 if (fl->fl_lmops && fl->fl_lmops->fl_grant)
96 op->info.owner = (__u64) fl->fl_pid;
97 else
98 op->info.owner = (__u64)(long) fl->fl_owner;
99
100 op->info.flags |= DLM_PLOCK_FL_CLOSE;
101 send_op(op);
102}
103
74int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, 104int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
75 int cmd, struct file_lock *fl) 105 int cmd, struct file_lock *fl)
76{ 106{
@@ -114,9 +144,19 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
114 144
115 send_op(op); 145 send_op(op);
116 146
117 if (xop->callback == NULL) 147 if (xop->callback == NULL) {
118 wait_event(recv_wq, (op->done != 0)); 148 rv = wait_event_killable(recv_wq, (op->done != 0));
119 else { 149 if (rv == -ERESTARTSYS) {
150 log_debug(ls, "dlm_posix_lock: wait killed %llx",
151 (unsigned long long)number);
152 spin_lock(&ops_lock);
153 list_del(&op->list);
154 spin_unlock(&ops_lock);
155 kfree(xop);
156 do_unlock_close(ls, number, file, fl);
157 goto out;
158 }
159 } else {
120 rv = FILE_LOCK_DEFERRED; 160 rv = FILE_LOCK_DEFERRED;
121 goto out; 161 goto out;
122 } 162 }
@@ -233,6 +273,13 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
233 else 273 else
234 op->info.owner = (__u64)(long) fl->fl_owner; 274 op->info.owner = (__u64)(long) fl->fl_owner;
235 275
276 if (fl->fl_flags & FL_CLOSE) {
277 op->info.flags |= DLM_PLOCK_FL_CLOSE;
278 send_op(op);
279 rv = 0;
280 goto out;
281 }
282
236 send_op(op); 283 send_op(op);
237 wait_event(recv_wq, (op->done != 0)); 284 wait_event(recv_wq, (op->done != 0));
238 285
@@ -334,7 +381,10 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
334 spin_lock(&ops_lock); 381 spin_lock(&ops_lock);
335 if (!list_empty(&send_list)) { 382 if (!list_empty(&send_list)) {
336 op = list_entry(send_list.next, struct plock_op, list); 383 op = list_entry(send_list.next, struct plock_op, list);
337 list_move(&op->list, &recv_list); 384 if (op->info.flags & DLM_PLOCK_FL_CLOSE)
385 list_del(&op->list);
386 else
387 list_move(&op->list, &recv_list);
338 memcpy(&info, &op->info, sizeof(info)); 388 memcpy(&info, &op->info, sizeof(info));
339 } 389 }
340 spin_unlock(&ops_lock); 390 spin_unlock(&ops_lock);
@@ -342,6 +392,13 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count,
342 if (!op) 392 if (!op)
343 return -EAGAIN; 393 return -EAGAIN;
344 394
395 /* there is no need to get a reply from userspace for unlocks
396 that were generated by the vfs cleaning up for a close
397 (the process did not make an unlock call). */
398
399 if (op->info.flags & DLM_PLOCK_FL_CLOSE)
400 kfree(op);
401
345 if (copy_to_user(u, &info, sizeof(info))) 402 if (copy_to_user(u, &info, sizeof(info)))
346 return -EFAULT; 403 return -EFAULT;
347 return sizeof(info); 404 return sizeof(info);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index d5ab3fe7c198..e96bf3e9be88 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -611,7 +611,6 @@ static ssize_t device_write(struct file *file, const char __user *buf,
611 611
612 out_sig: 612 out_sig:
613 sigprocmask(SIG_SETMASK, &tmpsig, NULL); 613 sigprocmask(SIG_SETMASK, &tmpsig, NULL);
614 recalc_sigpending();
615 out_free: 614 out_free:
616 kfree(kbuf); 615 kfree(kbuf);
617 return error; 616 return error;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 98b77c89494c..c00e055b6282 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -40,9 +40,12 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
40static void drop_slab(void) 40static void drop_slab(void)
41{ 41{
42 int nr_objects; 42 int nr_objects;
43 struct shrink_control shrink = {
44 .gfp_mask = GFP_KERNEL,
45 };
43 46
44 do { 47 do {
45 nr_objects = shrink_slab(1000, GFP_KERNEL, 1000); 48 nr_objects = shrink_slab(&shrink, 1000, 1000);
46 } while (nr_objects > 10); 49 } while (nr_objects > 10);
47} 50}
48 51
diff --git a/fs/exec.c b/fs/exec.c
index 5e62d26a4fec..936f5776655c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
55#include <linux/fs_struct.h> 55#include <linux/fs_struct.h>
56#include <linux/pipe_fs_i.h> 56#include <linux/pipe_fs_i.h>
57#include <linux/oom.h> 57#include <linux/oom.h>
58#include <linux/compat.h>
58 59
59#include <asm/uaccess.h> 60#include <asm/uaccess.h>
60#include <asm/mmu_context.h> 61#include <asm/mmu_context.h>
@@ -166,8 +167,13 @@ out:
166} 167}
167 168
168#ifdef CONFIG_MMU 169#ifdef CONFIG_MMU
169 170/*
170void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) 171 * The nascent bprm->mm is not visible until exec_mmap() but it can
172 * use a lot of memory, account these pages in current->mm temporary
173 * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
174 * change the counter back via acct_arg_size(0).
175 */
176static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
171{ 177{
172 struct mm_struct *mm = current->mm; 178 struct mm_struct *mm = current->mm;
173 long diff = (long)(pages - bprm->vma_pages); 179 long diff = (long)(pages - bprm->vma_pages);
@@ -186,7 +192,7 @@ void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
186#endif 192#endif
187} 193}
188 194
189struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, 195static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
190 int write) 196 int write)
191{ 197{
192 struct page *page; 198 struct page *page;
@@ -194,7 +200,7 @@ struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
194 200
195#ifdef CONFIG_STACK_GROWSUP 201#ifdef CONFIG_STACK_GROWSUP
196 if (write) { 202 if (write) {
197 ret = expand_stack_downwards(bprm->vma, pos); 203 ret = expand_downwards(bprm->vma, pos);
198 if (ret < 0) 204 if (ret < 0)
199 return NULL; 205 return NULL;
200 } 206 }
@@ -305,11 +311,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
305 311
306#else 312#else
307 313
308void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) 314static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
309{ 315{
310} 316}
311 317
312struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, 318static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
313 int write) 319 int write)
314{ 320{
315 struct page *page; 321 struct page *page;
@@ -398,22 +404,56 @@ err:
398 return err; 404 return err;
399} 405}
400 406
407struct user_arg_ptr {
408#ifdef CONFIG_COMPAT
409 bool is_compat;
410#endif
411 union {
412 const char __user *const __user *native;
413#ifdef CONFIG_COMPAT
414 compat_uptr_t __user *compat;
415#endif
416 } ptr;
417};
418
419static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
420{
421 const char __user *native;
422
423#ifdef CONFIG_COMPAT
424 if (unlikely(argv.is_compat)) {
425 compat_uptr_t compat;
426
427 if (get_user(compat, argv.ptr.compat + nr))
428 return ERR_PTR(-EFAULT);
429
430 return compat_ptr(compat);
431 }
432#endif
433
434 if (get_user(native, argv.ptr.native + nr))
435 return ERR_PTR(-EFAULT);
436
437 return native;
438}
439
401/* 440/*
402 * count() counts the number of strings in array ARGV. 441 * count() counts the number of strings in array ARGV.
403 */ 442 */
404static int count(const char __user * const __user * argv, int max) 443static int count(struct user_arg_ptr argv, int max)
405{ 444{
406 int i = 0; 445 int i = 0;
407 446
408 if (argv != NULL) { 447 if (argv.ptr.native != NULL) {
409 for (;;) { 448 for (;;) {
410 const char __user * p; 449 const char __user *p = get_user_arg_ptr(argv, i);
411 450
412 if (get_user(p, argv))
413 return -EFAULT;
414 if (!p) 451 if (!p)
415 break; 452 break;
416 argv++; 453
454 if (IS_ERR(p))
455 return -EFAULT;
456
417 if (i++ >= max) 457 if (i++ >= max)
418 return -E2BIG; 458 return -E2BIG;
419 459
@@ -430,7 +470,7 @@ static int count(const char __user * const __user * argv, int max)
430 * processes's memory to the new process's stack. The call to get_user_pages() 470 * processes's memory to the new process's stack. The call to get_user_pages()
431 * ensures the destination page is created and not swapped out. 471 * ensures the destination page is created and not swapped out.
432 */ 472 */
433static int copy_strings(int argc, const char __user *const __user *argv, 473static int copy_strings(int argc, struct user_arg_ptr argv,
434 struct linux_binprm *bprm) 474 struct linux_binprm *bprm)
435{ 475{
436 struct page *kmapped_page = NULL; 476 struct page *kmapped_page = NULL;
@@ -443,16 +483,18 @@ static int copy_strings(int argc, const char __user *const __user *argv,
443 int len; 483 int len;
444 unsigned long pos; 484 unsigned long pos;
445 485
446 if (get_user(str, argv+argc) || 486 ret = -EFAULT;
447 !(len = strnlen_user(str, MAX_ARG_STRLEN))) { 487 str = get_user_arg_ptr(argv, argc);
448 ret = -EFAULT; 488 if (IS_ERR(str))
449 goto out; 489 goto out;
450 }
451 490
452 if (!valid_arg_len(bprm, len)) { 491 len = strnlen_user(str, MAX_ARG_STRLEN);
453 ret = -E2BIG; 492 if (!len)
493 goto out;
494
495 ret = -E2BIG;
496 if (!valid_arg_len(bprm, len))
454 goto out; 497 goto out;
455 }
456 498
457 /* We're going to work our way backwords. */ 499 /* We're going to work our way backwords. */
458 pos = bprm->p; 500 pos = bprm->p;
@@ -519,14 +561,19 @@ out:
519/* 561/*
520 * Like copy_strings, but get argv and its values from kernel memory. 562 * Like copy_strings, but get argv and its values from kernel memory.
521 */ 563 */
522int copy_strings_kernel(int argc, const char *const *argv, 564int copy_strings_kernel(int argc, const char *const *__argv,
523 struct linux_binprm *bprm) 565 struct linux_binprm *bprm)
524{ 566{
525 int r; 567 int r;
526 mm_segment_t oldfs = get_fs(); 568 mm_segment_t oldfs = get_fs();
569 struct user_arg_ptr argv = {
570 .ptr.native = (const char __user *const __user *)__argv,
571 };
572
527 set_fs(KERNEL_DS); 573 set_fs(KERNEL_DS);
528 r = copy_strings(argc, (const char __user *const __user *)argv, bprm); 574 r = copy_strings(argc, argv, bprm);
529 set_fs(oldfs); 575 set_fs(oldfs);
576
530 return r; 577 return r;
531} 578}
532EXPORT_SYMBOL(copy_strings_kernel); 579EXPORT_SYMBOL(copy_strings_kernel);
@@ -553,7 +600,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
553 unsigned long length = old_end - old_start; 600 unsigned long length = old_end - old_start;
554 unsigned long new_start = old_start - shift; 601 unsigned long new_start = old_start - shift;
555 unsigned long new_end = old_end - shift; 602 unsigned long new_end = old_end - shift;
556 struct mmu_gather *tlb; 603 struct mmu_gather tlb;
557 604
558 BUG_ON(new_start > new_end); 605 BUG_ON(new_start > new_end);
559 606
@@ -579,12 +626,12 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
579 return -ENOMEM; 626 return -ENOMEM;
580 627
581 lru_add_drain(); 628 lru_add_drain();
582 tlb = tlb_gather_mmu(mm, 0); 629 tlb_gather_mmu(&tlb, mm, 0);
583 if (new_end > old_start) { 630 if (new_end > old_start) {
584 /* 631 /*
585 * when the old and new regions overlap clear from new_end. 632 * when the old and new regions overlap clear from new_end.
586 */ 633 */
587 free_pgd_range(tlb, new_end, old_end, new_end, 634 free_pgd_range(&tlb, new_end, old_end, new_end,
588 vma->vm_next ? vma->vm_next->vm_start : 0); 635 vma->vm_next ? vma->vm_next->vm_start : 0);
589 } else { 636 } else {
590 /* 637 /*
@@ -593,10 +640,10 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
593 * have constraints on va-space that make this illegal (IA64) - 640 * have constraints on va-space that make this illegal (IA64) -
594 * for the others its just a little faster. 641 * for the others its just a little faster.
595 */ 642 */
596 free_pgd_range(tlb, old_start, old_end, new_end, 643 free_pgd_range(&tlb, old_start, old_end, new_end,
597 vma->vm_next ? vma->vm_next->vm_start : 0); 644 vma->vm_next ? vma->vm_next->vm_start : 0);
598 } 645 }
599 tlb_finish_mmu(tlb, new_end, old_end); 646 tlb_finish_mmu(&tlb, new_end, old_end);
600 647
601 /* 648 /*
602 * Shrink the vma to just the new range. Always succeeds. 649 * Shrink the vma to just the new range. Always succeeds.
@@ -1004,6 +1051,7 @@ char *get_task_comm(char *buf, struct task_struct *tsk)
1004 task_unlock(tsk); 1051 task_unlock(tsk);
1005 return buf; 1052 return buf;
1006} 1053}
1054EXPORT_SYMBOL_GPL(get_task_comm);
1007 1055
1008void set_task_comm(struct task_struct *tsk, char *buf) 1056void set_task_comm(struct task_struct *tsk, char *buf)
1009{ 1057{
@@ -1379,10 +1427,10 @@ EXPORT_SYMBOL(search_binary_handler);
1379/* 1427/*
1380 * sys_execve() executes a new program. 1428 * sys_execve() executes a new program.
1381 */ 1429 */
1382int do_execve(const char * filename, 1430static int do_execve_common(const char *filename,
1383 const char __user *const __user *argv, 1431 struct user_arg_ptr argv,
1384 const char __user *const __user *envp, 1432 struct user_arg_ptr envp,
1385 struct pt_regs * regs) 1433 struct pt_regs *regs)
1386{ 1434{
1387 struct linux_binprm *bprm; 1435 struct linux_binprm *bprm;
1388 struct file *file; 1436 struct file *file;
@@ -1489,6 +1537,34 @@ out_ret:
1489 return retval; 1537 return retval;
1490} 1538}
1491 1539
1540int do_execve(const char *filename,
1541 const char __user *const __user *__argv,
1542 const char __user *const __user *__envp,
1543 struct pt_regs *regs)
1544{
1545 struct user_arg_ptr argv = { .ptr.native = __argv };
1546 struct user_arg_ptr envp = { .ptr.native = __envp };
1547 return do_execve_common(filename, argv, envp, regs);
1548}
1549
1550#ifdef CONFIG_COMPAT
1551int compat_do_execve(char *filename,
1552 compat_uptr_t __user *__argv,
1553 compat_uptr_t __user *__envp,
1554 struct pt_regs *regs)
1555{
1556 struct user_arg_ptr argv = {
1557 .is_compat = true,
1558 .ptr.compat = __argv,
1559 };
1560 struct user_arg_ptr envp = {
1561 .is_compat = true,
1562 .ptr.compat = __envp,
1563 };
1564 return do_execve_common(filename, argv, envp, regs);
1565}
1566#endif
1567
1492void set_binfmt(struct linux_binfmt *new) 1568void set_binfmt(struct linux_binfmt *new)
1493{ 1569{
1494 struct mm_struct *mm = current->mm; 1570 struct mm_struct *mm = current->mm;
@@ -1659,6 +1735,7 @@ static int zap_process(struct task_struct *start, int exit_code)
1659 1735
1660 t = start; 1736 t = start;
1661 do { 1737 do {
1738 task_clear_group_stop_pending(t);
1662 if (t != current && t->mm) { 1739 if (t != current && t->mm) {
1663 sigaddset(&t->pending.signal, SIGKILL); 1740 sigaddset(&t->pending.signal, SIGKILL);
1664 signal_wake_up(t, 1); 1741 signal_wake_up(t, 1);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 0a78dae7e2cb..1dd62ed35b85 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -898,7 +898,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
898 brelse(bh); 898 brelse(bh);
899 899
900 if (!sb_set_blocksize(sb, blocksize)) { 900 if (!sb_set_blocksize(sb, blocksize)) {
901 ext2_msg(sb, KERN_ERR, "error: blocksize is too small"); 901 ext2_msg(sb, KERN_ERR,
902 "error: bad blocksize %d", blocksize);
902 goto failed_sbi; 903 goto failed_sbi;
903 } 904 }
904 905
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 32f3b8695859..34b6d9bfc48a 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1416,10 +1416,19 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1416 frame->at = entries; 1416 frame->at = entries;
1417 frame->bh = bh; 1417 frame->bh = bh;
1418 bh = bh2; 1418 bh = bh2;
1419 /*
1420 * Mark buffers dirty here so that if do_split() fails we write a
1421 * consistent set of buffers to disk.
1422 */
1423 ext3_journal_dirty_metadata(handle, frame->bh);
1424 ext3_journal_dirty_metadata(handle, bh);
1419 de = do_split(handle,dir, &bh, frame, &hinfo, &retval); 1425 de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
1420 dx_release (frames); 1426 if (!de) {
1421 if (!(de)) 1427 ext3_mark_inode_dirty(handle, dir);
1428 dx_release(frames);
1422 return retval; 1429 return retval;
1430 }
1431 dx_release(frames);
1423 1432
1424 return add_dirent_to_buf(handle, dentry, inode, de, bh); 1433 return add_dirent_to_buf(handle, dentry, inode, de, bh);
1425} 1434}
@@ -2189,6 +2198,7 @@ static int ext3_symlink (struct inode * dir,
2189 handle_t *handle; 2198 handle_t *handle;
2190 struct inode * inode; 2199 struct inode * inode;
2191 int l, err, retries = 0; 2200 int l, err, retries = 0;
2201 int credits;
2192 2202
2193 l = strlen(symname)+1; 2203 l = strlen(symname)+1;
2194 if (l > dir->i_sb->s_blocksize) 2204 if (l > dir->i_sb->s_blocksize)
@@ -2196,10 +2206,26 @@ static int ext3_symlink (struct inode * dir,
2196 2206
2197 dquot_initialize(dir); 2207 dquot_initialize(dir);
2198 2208
2209 if (l > EXT3_N_BLOCKS * 4) {
2210 /*
2211 * For non-fast symlinks, we just allocate inode and put it on
2212 * orphan list in the first transaction => we need bitmap,
2213 * group descriptor, sb, inode block, quota blocks.
2214 */
2215 credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
2216 } else {
2217 /*
2218 * Fast symlink. We have to add entry to directory
2219 * (EXT3_DATA_TRANS_BLOCKS + EXT3_INDEX_EXTRA_TRANS_BLOCKS),
2220 * allocate new inode (bitmap, group descriptor, inode block,
2221 * quota blocks, sb is already counted in previous macros).
2222 */
2223 credits = EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2224 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2225 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
2226 }
2199retry: 2227retry:
2200 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2228 handle = ext3_journal_start(dir, credits);
2201 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2202 EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2203 if (IS_ERR(handle)) 2229 if (IS_ERR(handle))
2204 return PTR_ERR(handle); 2230 return PTR_ERR(handle);
2205 2231
@@ -2211,21 +2237,45 @@ retry:
2211 if (IS_ERR(inode)) 2237 if (IS_ERR(inode))
2212 goto out_stop; 2238 goto out_stop;
2213 2239
2214 if (l > sizeof (EXT3_I(inode)->i_data)) { 2240 if (l > EXT3_N_BLOCKS * 4) {
2215 inode->i_op = &ext3_symlink_inode_operations; 2241 inode->i_op = &ext3_symlink_inode_operations;
2216 ext3_set_aops(inode); 2242 ext3_set_aops(inode);
2217 /* 2243 /*
2218 * page_symlink() calls into ext3_prepare/commit_write. 2244 * We cannot call page_symlink() with transaction started
2219 * We have a transaction open. All is sweetness. It also sets 2245 * because it calls into ext3_write_begin() which acquires page
2220 * i_size in generic_commit_write(). 2246 * lock which ranks below transaction start (and it can also
2247 * wait for journal commit if we are running out of space). So
2248 * we have to stop transaction now and restart it when symlink
2249 * contents is written.
2250 *
2251 * To keep fs consistent in case of crash, we have to put inode
2252 * to orphan list in the mean time.
2221 */ 2253 */
2254 drop_nlink(inode);
2255 err = ext3_orphan_add(handle, inode);
2256 ext3_journal_stop(handle);
2257 if (err)
2258 goto err_drop_inode;
2222 err = __page_symlink(inode, symname, l, 1); 2259 err = __page_symlink(inode, symname, l, 1);
2260 if (err)
2261 goto err_drop_inode;
2262 /*
2263 * Now inode is being linked into dir (EXT3_DATA_TRANS_BLOCKS
2264 * + EXT3_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
2265 */
2266 handle = ext3_journal_start(dir,
2267 EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2268 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 1);
2269 if (IS_ERR(handle)) {
2270 err = PTR_ERR(handle);
2271 goto err_drop_inode;
2272 }
2273 inc_nlink(inode);
2274 err = ext3_orphan_del(handle, inode);
2223 if (err) { 2275 if (err) {
2276 ext3_journal_stop(handle);
2224 drop_nlink(inode); 2277 drop_nlink(inode);
2225 unlock_new_inode(inode); 2278 goto err_drop_inode;
2226 ext3_mark_inode_dirty(handle, inode);
2227 iput (inode);
2228 goto out_stop;
2229 } 2279 }
2230 } else { 2280 } else {
2231 inode->i_op = &ext3_fast_symlink_inode_operations; 2281 inode->i_op = &ext3_fast_symlink_inode_operations;
@@ -2239,6 +2289,10 @@ out_stop:
2239 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) 2289 if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
2240 goto retry; 2290 goto retry;
2241 return err; 2291 return err;
2292err_drop_inode:
2293 unlock_new_inode(inode);
2294 iput(inode);
2295 return err;
2242} 2296}
2243 2297
2244static int ext3_link (struct dentry * old_dentry, 2298static int ext3_link (struct dentry * old_dentry,
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index ae8200f84e39..1cc7038e273d 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -151,6 +151,13 @@ static void fat_cache_add(struct inode *inode, struct fat_cache_id *new)
151 spin_unlock(&MSDOS_I(inode)->cache_lru_lock); 151 spin_unlock(&MSDOS_I(inode)->cache_lru_lock);
152 152
153 tmp = fat_cache_alloc(inode); 153 tmp = fat_cache_alloc(inode);
154 if (!tmp) {
155 spin_lock(&MSDOS_I(inode)->cache_lru_lock);
156 MSDOS_I(inode)->nr_caches--;
157 spin_unlock(&MSDOS_I(inode)->cache_lru_lock);
158 return;
159 }
160
154 spin_lock(&MSDOS_I(inode)->cache_lru_lock); 161 spin_lock(&MSDOS_I(inode)->cache_lru_lock);
155 cache = fat_cache_merge(inode, new); 162 cache = fat_cache_merge(inode, new);
156 if (cache != NULL) { 163 if (cache != NULL) {
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index ee42b9e0b16a..4ad64732cbce 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -98,7 +98,7 @@ next:
98 98
99 *bh = sb_bread(sb, phys); 99 *bh = sb_bread(sb, phys);
100 if (*bh == NULL) { 100 if (*bh == NULL) {
101 printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n", 101 fat_msg(sb, KERN_ERR, "Directory bread(block %llu) failed",
102 (llu)phys); 102 (llu)phys);
103 /* skip this block */ 103 /* skip this block */
104 *pos = (iblock + 1) << sb->s_blocksize_bits; 104 *pos = (iblock + 1) << sb->s_blocksize_bits;
@@ -136,9 +136,10 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
136 * but ignore that right now. 136 * but ignore that right now.
137 * Ahem... Stack smashing in ring 0 isn't fun. Fixed. 137 * Ahem... Stack smashing in ring 0 isn't fun. Fixed.
138 */ 138 */
139static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len, 139static int uni16_to_x8(struct super_block *sb, unsigned char *ascii,
140 int uni_xlate, struct nls_table *nls) 140 const wchar_t *uni, int len, struct nls_table *nls)
141{ 141{
142 int uni_xlate = MSDOS_SB(sb)->options.unicode_xlate;
142 const wchar_t *ip; 143 const wchar_t *ip;
143 wchar_t ec; 144 wchar_t ec;
144 unsigned char *op; 145 unsigned char *op;
@@ -166,23 +167,23 @@ static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
166 } 167 }
167 168
168 if (unlikely(*ip)) { 169 if (unlikely(*ip)) {
169 printk(KERN_WARNING "FAT: filename was truncated while " 170 fat_msg(sb, KERN_WARNING, "filename was truncated while "
170 "converting."); 171 "converting.");
171 } 172 }
172 173
173 *op = 0; 174 *op = 0;
174 return (op - ascii); 175 return (op - ascii);
175} 176}
176 177
177static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni, 178static inline int fat_uni_to_x8(struct super_block *sb, const wchar_t *uni,
178 unsigned char *buf, int size) 179 unsigned char *buf, int size)
179{ 180{
181 struct msdos_sb_info *sbi = MSDOS_SB(sb);
180 if (sbi->options.utf8) 182 if (sbi->options.utf8)
181 return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS, 183 return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS,
182 UTF16_HOST_ENDIAN, buf, size); 184 UTF16_HOST_ENDIAN, buf, size);
183 else 185 else
184 return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate, 186 return uni16_to_x8(sb, buf, uni, size, sbi->nls_io);
185 sbi->nls_io);
186} 187}
187 188
188static inline int 189static inline int
@@ -419,7 +420,7 @@ parse_record:
419 420
420 /* Compare shortname */ 421 /* Compare shortname */
421 bufuname[last_u] = 0x0000; 422 bufuname[last_u] = 0x0000;
422 len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname)); 423 len = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
423 if (fat_name_match(sbi, name, name_len, bufname, len)) 424 if (fat_name_match(sbi, name, name_len, bufname, len))
424 goto found; 425 goto found;
425 426
@@ -428,7 +429,7 @@ parse_record:
428 int size = PATH_MAX - FAT_MAX_UNI_SIZE; 429 int size = PATH_MAX - FAT_MAX_UNI_SIZE;
429 430
430 /* Compare longname */ 431 /* Compare longname */
431 len = fat_uni_to_x8(sbi, unicode, longname, size); 432 len = fat_uni_to_x8(sb, unicode, longname, size);
432 if (fat_name_match(sbi, name, name_len, longname, len)) 433 if (fat_name_match(sbi, name, name_len, longname, len))
433 goto found; 434 goto found;
434 } 435 }
@@ -545,7 +546,7 @@ parse_record:
545 if (nr_slots) { 546 if (nr_slots) {
546 void *longname = unicode + FAT_MAX_UNI_CHARS; 547 void *longname = unicode + FAT_MAX_UNI_CHARS;
547 int size = PATH_MAX - FAT_MAX_UNI_SIZE; 548 int size = PATH_MAX - FAT_MAX_UNI_SIZE;
548 int len = fat_uni_to_x8(sbi, unicode, longname, size); 549 int len = fat_uni_to_x8(sb, unicode, longname, size);
549 550
550 fill_name = longname; 551 fill_name = longname;
551 fill_len = len; 552 fill_len = len;
@@ -621,7 +622,7 @@ parse_record:
621 622
622 if (isvfat) { 623 if (isvfat) {
623 bufuname[j] = 0x0000; 624 bufuname[j] = 0x0000;
624 i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname)); 625 i = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
625 } 626 }
626 if (nr_slots) { 627 if (nr_slots) {
627 /* hack for fat_ioctl_filldir() */ 628 /* hack for fat_ioctl_filldir() */
@@ -979,6 +980,7 @@ static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
979 980
980int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo) 981int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
981{ 982{
983 struct super_block *sb = dir->i_sb;
982 struct msdos_dir_entry *de; 984 struct msdos_dir_entry *de;
983 struct buffer_head *bh; 985 struct buffer_head *bh;
984 int err = 0, nr_slots; 986 int err = 0, nr_slots;
@@ -1013,8 +1015,8 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
1013 */ 1015 */
1014 err = __fat_remove_entries(dir, sinfo->slot_off, nr_slots); 1016 err = __fat_remove_entries(dir, sinfo->slot_off, nr_slots);
1015 if (err) { 1017 if (err) {
1016 printk(KERN_WARNING 1018 fat_msg(sb, KERN_WARNING,
1017 "FAT: Couldn't remove the long name slots\n"); 1019 "Couldn't remove the long name slots");
1018 } 1020 }
1019 } 1021 }
1020 1022
@@ -1265,7 +1267,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
1265 if (sbi->fat_bits != 32) 1267 if (sbi->fat_bits != 32)
1266 goto error; 1268 goto error;
1267 } else if (MSDOS_I(dir)->i_start == 0) { 1269 } else if (MSDOS_I(dir)->i_start == 0) {
1268 printk(KERN_ERR "FAT: Corrupted directory (i_pos %lld)\n", 1270 fat_msg(sb, KERN_ERR, "Corrupted directory (i_pos %lld)",
1269 MSDOS_I(dir)->i_pos); 1271 MSDOS_I(dir)->i_pos);
1270 err = -EIO; 1272 err = -EIO;
1271 goto error; 1273 goto error;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index f50408901f7e..8276cc282dec 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -319,19 +319,20 @@ extern struct inode *fat_build_inode(struct super_block *sb,
319 struct msdos_dir_entry *de, loff_t i_pos); 319 struct msdos_dir_entry *de, loff_t i_pos);
320extern int fat_sync_inode(struct inode *inode); 320extern int fat_sync_inode(struct inode *inode);
321extern int fat_fill_super(struct super_block *sb, void *data, int silent, 321extern int fat_fill_super(struct super_block *sb, void *data, int silent,
322 const struct inode_operations *fs_dir_inode_ops, 322 int isvfat, void (*setup)(struct super_block *));
323 int isvfat, void (*setup)(struct super_block *));
324 323
325extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, 324extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
326 struct inode *i2); 325 struct inode *i2);
327/* fat/misc.c */ 326/* fat/misc.c */
328extern void 327extern void
329__fat_fs_error(struct super_block *s, int report, const char *fmt, ...) 328__fat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
329 __attribute__ ((format (printf, 3, 4))) __cold;
330#define fat_fs_error(sb, fmt, args...) \
331 __fat_fs_error(sb, 1, fmt , ## args)
332#define fat_fs_error_ratelimit(sb, fmt, args...) \
333 __fat_fs_error(sb, __ratelimit(&MSDOS_SB(sb)->ratelimit), fmt , ## args)
334void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...)
330 __attribute__ ((format (printf, 3, 4))) __cold; 335 __attribute__ ((format (printf, 3, 4))) __cold;
331#define fat_fs_error(s, fmt, args...) \
332 __fat_fs_error(s, 1, fmt , ## args)
333#define fat_fs_error_ratelimit(s, fmt, args...) \
334 __fat_fs_error(s, __ratelimit(&MSDOS_SB(s)->ratelimit), fmt , ## args)
335extern int fat_clusters_flush(struct super_block *sb); 336extern int fat_clusters_flush(struct super_block *sb);
336extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); 337extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
337extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, 338extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index b47d2c9f4fa1..2e81ac0df7e2 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -95,7 +95,7 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent,
95err_brelse: 95err_brelse:
96 brelse(bhs[0]); 96 brelse(bhs[0]);
97err: 97err:
98 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", (llu)blocknr); 98 fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr);
99 return -EIO; 99 return -EIO;
100} 100}
101 101
@@ -108,7 +108,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent,
108 fatent->fat_inode = MSDOS_SB(sb)->fat_inode; 108 fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
109 fatent->bhs[0] = sb_bread(sb, blocknr); 109 fatent->bhs[0] = sb_bread(sb, blocknr);
110 if (!fatent->bhs[0]) { 110 if (!fatent->bhs[0]) {
111 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", 111 fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)",
112 (llu)blocknr); 112 (llu)blocknr);
113 return -EIO; 113 return -EIO;
114 } 114 }
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 8d68690bdcf1..cb8d8391ac0b 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -581,7 +581,8 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
581 buf->f_bavail = sbi->free_clusters; 581 buf->f_bavail = sbi->free_clusters;
582 buf->f_fsid.val[0] = (u32)id; 582 buf->f_fsid.val[0] = (u32)id;
583 buf->f_fsid.val[1] = (u32)(id >> 32); 583 buf->f_fsid.val[1] = (u32)(id >> 32);
584 buf->f_namelen = sbi->options.isvfat ? FAT_LFN_LEN : 12; 584 buf->f_namelen =
585 (sbi->options.isvfat ? FAT_LFN_LEN : 12) * NLS_MAX_CHARSET_SIZE;
585 586
586 return 0; 587 return 0;
587} 588}
@@ -619,8 +620,8 @@ retry:
619 620
620 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); 621 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
621 if (!bh) { 622 if (!bh) {
622 printk(KERN_ERR "FAT: unable to read inode block " 623 fat_msg(sb, KERN_ERR, "unable to read inode block "
623 "for updating (i_pos %lld)\n", i_pos); 624 "for updating (i_pos %lld)", i_pos);
624 return -EIO; 625 return -EIO;
625 } 626 }
626 spin_lock(&sbi->inode_hash_lock); 627 spin_lock(&sbi->inode_hash_lock);
@@ -976,8 +977,8 @@ static const match_table_t vfat_tokens = {
976 {Opt_err, NULL} 977 {Opt_err, NULL}
977}; 978};
978 979
979static int parse_options(char *options, int is_vfat, int silent, int *debug, 980static int parse_options(struct super_block *sb, char *options, int is_vfat,
980 struct fat_mount_options *opts) 981 int silent, int *debug, struct fat_mount_options *opts)
981{ 982{
982 char *p; 983 char *p;
983 substring_t args[MAX_OPT_ARGS]; 984 substring_t args[MAX_OPT_ARGS];
@@ -1168,15 +1169,15 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1168 1169
1169 /* obsolete mount options */ 1170 /* obsolete mount options */
1170 case Opt_obsolate: 1171 case Opt_obsolate:
1171 printk(KERN_INFO "FAT: \"%s\" option is obsolete, " 1172 fat_msg(sb, KERN_INFO, "\"%s\" option is obsolete, "
1172 "not supported now\n", p); 1173 "not supported now", p);
1173 break; 1174 break;
1174 /* unknown option */ 1175 /* unknown option */
1175 default: 1176 default:
1176 if (!silent) { 1177 if (!silent) {
1177 printk(KERN_ERR 1178 fat_msg(sb, KERN_ERR,
1178 "FAT: Unrecognized mount option \"%s\" " 1179 "Unrecognized mount option \"%s\" "
1179 "or missing value\n", p); 1180 "or missing value", p);
1180 } 1181 }
1181 return -EINVAL; 1182 return -EINVAL;
1182 } 1183 }
@@ -1185,7 +1186,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1185out: 1186out:
1186 /* UTF-8 doesn't provide FAT semantics */ 1187 /* UTF-8 doesn't provide FAT semantics */
1187 if (!strcmp(opts->iocharset, "utf8")) { 1188 if (!strcmp(opts->iocharset, "utf8")) {
1188 printk(KERN_ERR "FAT: utf8 is not a recommended IO charset" 1189 fat_msg(sb, KERN_ERR, "utf8 is not a recommended IO charset"
1189 " for FAT filesystems, filesystem will be " 1190 " for FAT filesystems, filesystem will be "
1190 "case sensitive!\n"); 1191 "case sensitive!\n");
1191 } 1192 }
@@ -1238,8 +1239,7 @@ static int fat_read_root(struct inode *inode)
1238/* 1239/*
1239 * Read the super block of an MS-DOS FS. 1240 * Read the super block of an MS-DOS FS.
1240 */ 1241 */
1241int fat_fill_super(struct super_block *sb, void *data, int silent, 1242int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1242 const struct inode_operations *fs_dir_inode_ops, int isvfat,
1243 void (*setup)(struct super_block *)) 1243 void (*setup)(struct super_block *))
1244{ 1244{
1245 struct inode *root_inode = NULL, *fat_inode = NULL; 1245 struct inode *root_inode = NULL, *fat_inode = NULL;
@@ -1268,11 +1268,10 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1268 sb->s_magic = MSDOS_SUPER_MAGIC; 1268 sb->s_magic = MSDOS_SUPER_MAGIC;
1269 sb->s_op = &fat_sops; 1269 sb->s_op = &fat_sops;
1270 sb->s_export_op = &fat_export_ops; 1270 sb->s_export_op = &fat_export_ops;
1271 sbi->dir_ops = fs_dir_inode_ops;
1272 ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL, 1271 ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL,
1273 DEFAULT_RATELIMIT_BURST); 1272 DEFAULT_RATELIMIT_BURST);
1274 1273
1275 error = parse_options(data, isvfat, silent, &debug, &sbi->options); 1274 error = parse_options(sb, data, isvfat, silent, &debug, &sbi->options);
1276 if (error) 1275 if (error)
1277 goto out_fail; 1276 goto out_fail;
1278 1277
@@ -1282,20 +1281,20 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1282 sb_min_blocksize(sb, 512); 1281 sb_min_blocksize(sb, 512);
1283 bh = sb_bread(sb, 0); 1282 bh = sb_bread(sb, 0);
1284 if (bh == NULL) { 1283 if (bh == NULL) {
1285 printk(KERN_ERR "FAT: unable to read boot sector\n"); 1284 fat_msg(sb, KERN_ERR, "unable to read boot sector");
1286 goto out_fail; 1285 goto out_fail;
1287 } 1286 }
1288 1287
1289 b = (struct fat_boot_sector *) bh->b_data; 1288 b = (struct fat_boot_sector *) bh->b_data;
1290 if (!b->reserved) { 1289 if (!b->reserved) {
1291 if (!silent) 1290 if (!silent)
1292 printk(KERN_ERR "FAT: bogus number of reserved sectors\n"); 1291 fat_msg(sb, KERN_ERR, "bogus number of reserved sectors");
1293 brelse(bh); 1292 brelse(bh);
1294 goto out_invalid; 1293 goto out_invalid;
1295 } 1294 }
1296 if (!b->fats) { 1295 if (!b->fats) {
1297 if (!silent) 1296 if (!silent)
1298 printk(KERN_ERR "FAT: bogus number of FAT structure\n"); 1297 fat_msg(sb, KERN_ERR, "bogus number of FAT structure");
1299 brelse(bh); 1298 brelse(bh);
1300 goto out_invalid; 1299 goto out_invalid;
1301 } 1300 }
@@ -1308,7 +1307,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1308 media = b->media; 1307 media = b->media;
1309 if (!fat_valid_media(media)) { 1308 if (!fat_valid_media(media)) {
1310 if (!silent) 1309 if (!silent)
1311 printk(KERN_ERR "FAT: invalid media value (0x%02x)\n", 1310 fat_msg(sb, KERN_ERR, "invalid media value (0x%02x)",
1312 media); 1311 media);
1313 brelse(bh); 1312 brelse(bh);
1314 goto out_invalid; 1313 goto out_invalid;
@@ -1318,7 +1317,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1318 || (logical_sector_size < 512) 1317 || (logical_sector_size < 512)
1319 || (logical_sector_size > 4096)) { 1318 || (logical_sector_size > 4096)) {
1320 if (!silent) 1319 if (!silent)
1321 printk(KERN_ERR "FAT: bogus logical sector size %u\n", 1320 fat_msg(sb, KERN_ERR, "bogus logical sector size %u",
1322 logical_sector_size); 1321 logical_sector_size);
1323 brelse(bh); 1322 brelse(bh);
1324 goto out_invalid; 1323 goto out_invalid;
@@ -1326,15 +1325,15 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1326 sbi->sec_per_clus = b->sec_per_clus; 1325 sbi->sec_per_clus = b->sec_per_clus;
1327 if (!is_power_of_2(sbi->sec_per_clus)) { 1326 if (!is_power_of_2(sbi->sec_per_clus)) {
1328 if (!silent) 1327 if (!silent)
1329 printk(KERN_ERR "FAT: bogus sectors per cluster %u\n", 1328 fat_msg(sb, KERN_ERR, "bogus sectors per cluster %u",
1330 sbi->sec_per_clus); 1329 sbi->sec_per_clus);
1331 brelse(bh); 1330 brelse(bh);
1332 goto out_invalid; 1331 goto out_invalid;
1333 } 1332 }
1334 1333
1335 if (logical_sector_size < sb->s_blocksize) { 1334 if (logical_sector_size < sb->s_blocksize) {
1336 printk(KERN_ERR "FAT: logical sector size too small for device" 1335 fat_msg(sb, KERN_ERR, "logical sector size too small for device"
1337 " (logical sector size = %u)\n", logical_sector_size); 1336 " (logical sector size = %u)", logical_sector_size);
1338 brelse(bh); 1337 brelse(bh);
1339 goto out_fail; 1338 goto out_fail;
1340 } 1339 }
@@ -1342,14 +1341,14 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1342 brelse(bh); 1341 brelse(bh);
1343 1342
1344 if (!sb_set_blocksize(sb, logical_sector_size)) { 1343 if (!sb_set_blocksize(sb, logical_sector_size)) {
1345 printk(KERN_ERR "FAT: unable to set blocksize %u\n", 1344 fat_msg(sb, KERN_ERR, "unable to set blocksize %u",
1346 logical_sector_size); 1345 logical_sector_size);
1347 goto out_fail; 1346 goto out_fail;
1348 } 1347 }
1349 bh = sb_bread(sb, 0); 1348 bh = sb_bread(sb, 0);
1350 if (bh == NULL) { 1349 if (bh == NULL) {
1351 printk(KERN_ERR "FAT: unable to read boot sector" 1350 fat_msg(sb, KERN_ERR, "unable to read boot sector"
1352 " (logical sector size = %lu)\n", 1351 " (logical sector size = %lu)",
1353 sb->s_blocksize); 1352 sb->s_blocksize);
1354 goto out_fail; 1353 goto out_fail;
1355 } 1354 }
@@ -1385,16 +1384,16 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1385 1384
1386 fsinfo_bh = sb_bread(sb, sbi->fsinfo_sector); 1385 fsinfo_bh = sb_bread(sb, sbi->fsinfo_sector);
1387 if (fsinfo_bh == NULL) { 1386 if (fsinfo_bh == NULL) {
1388 printk(KERN_ERR "FAT: bread failed, FSINFO block" 1387 fat_msg(sb, KERN_ERR, "bread failed, FSINFO block"
1389 " (sector = %lu)\n", sbi->fsinfo_sector); 1388 " (sector = %lu)", sbi->fsinfo_sector);
1390 brelse(bh); 1389 brelse(bh);
1391 goto out_fail; 1390 goto out_fail;
1392 } 1391 }
1393 1392
1394 fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data; 1393 fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data;
1395 if (!IS_FSINFO(fsinfo)) { 1394 if (!IS_FSINFO(fsinfo)) {
1396 printk(KERN_WARNING "FAT: Invalid FSINFO signature: " 1395 fat_msg(sb, KERN_WARNING, "Invalid FSINFO signature: "
1397 "0x%08x, 0x%08x (sector = %lu)\n", 1396 "0x%08x, 0x%08x (sector = %lu)",
1398 le32_to_cpu(fsinfo->signature1), 1397 le32_to_cpu(fsinfo->signature1),
1399 le32_to_cpu(fsinfo->signature2), 1398 le32_to_cpu(fsinfo->signature2),
1400 sbi->fsinfo_sector); 1399 sbi->fsinfo_sector);
@@ -1415,8 +1414,8 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1415 sbi->dir_entries = get_unaligned_le16(&b->dir_entries); 1414 sbi->dir_entries = get_unaligned_le16(&b->dir_entries);
1416 if (sbi->dir_entries & (sbi->dir_per_block - 1)) { 1415 if (sbi->dir_entries & (sbi->dir_per_block - 1)) {
1417 if (!silent) 1416 if (!silent)
1418 printk(KERN_ERR "FAT: bogus directroy-entries per block" 1417 fat_msg(sb, KERN_ERR, "bogus directroy-entries per block"
1419 " (%u)\n", sbi->dir_entries); 1418 " (%u)", sbi->dir_entries);
1420 brelse(bh); 1419 brelse(bh);
1421 goto out_invalid; 1420 goto out_invalid;
1422 } 1421 }
@@ -1438,7 +1437,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1438 total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT); 1437 total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT);
1439 if (total_clusters > MAX_FAT(sb)) { 1438 if (total_clusters > MAX_FAT(sb)) {
1440 if (!silent) 1439 if (!silent)
1441 printk(KERN_ERR "FAT: count of clusters too big (%u)\n", 1440 fat_msg(sb, KERN_ERR, "count of clusters too big (%u)",
1442 total_clusters); 1441 total_clusters);
1443 brelse(bh); 1442 brelse(bh);
1444 goto out_invalid; 1443 goto out_invalid;
@@ -1471,7 +1470,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1471 sprintf(buf, "cp%d", sbi->options.codepage); 1470 sprintf(buf, "cp%d", sbi->options.codepage);
1472 sbi->nls_disk = load_nls(buf); 1471 sbi->nls_disk = load_nls(buf);
1473 if (!sbi->nls_disk) { 1472 if (!sbi->nls_disk) {
1474 printk(KERN_ERR "FAT: codepage %s not found\n", buf); 1473 fat_msg(sb, KERN_ERR, "codepage %s not found", buf);
1475 goto out_fail; 1474 goto out_fail;
1476 } 1475 }
1477 1476
@@ -1479,7 +1478,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1479 if (sbi->options.isvfat) { 1478 if (sbi->options.isvfat) {
1480 sbi->nls_io = load_nls(sbi->options.iocharset); 1479 sbi->nls_io = load_nls(sbi->options.iocharset);
1481 if (!sbi->nls_io) { 1480 if (!sbi->nls_io) {
1482 printk(KERN_ERR "FAT: IO charset %s not found\n", 1481 fat_msg(sb, KERN_ERR, "IO charset %s not found",
1483 sbi->options.iocharset); 1482 sbi->options.iocharset);
1484 goto out_fail; 1483 goto out_fail;
1485 } 1484 }
@@ -1503,7 +1502,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1503 insert_inode_hash(root_inode); 1502 insert_inode_hash(root_inode);
1504 sb->s_root = d_alloc_root(root_inode); 1503 sb->s_root = d_alloc_root(root_inode);
1505 if (!sb->s_root) { 1504 if (!sb->s_root) {
1506 printk(KERN_ERR "FAT: get root inode failed\n"); 1505 fat_msg(sb, KERN_ERR, "get root inode failed");
1507 goto out_fail; 1506 goto out_fail;
1508 } 1507 }
1509 1508
@@ -1512,8 +1511,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1512out_invalid: 1511out_invalid:
1513 error = -EINVAL; 1512 error = -EINVAL;
1514 if (!silent) 1513 if (!silent)
1515 printk(KERN_INFO "VFS: Can't find a valid FAT filesystem" 1514 fat_msg(sb, KERN_INFO, "Can't find a valid FAT filesystem");
1516 " on dev %s.\n", sb->s_id);
1517 1515
1518out_fail: 1516out_fail:
1519 if (fat_inode) 1517 if (fat_inode)
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 970e682ea754..6d93360ca0cc 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -20,30 +20,46 @@
20 * In case the file system is remounted read-only, it can be made writable 20 * In case the file system is remounted read-only, it can be made writable
21 * again by remounting it. 21 * again by remounting it.
22 */ 22 */
23void __fat_fs_error(struct super_block *s, int report, const char *fmt, ...) 23void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
24{ 24{
25 struct fat_mount_options *opts = &MSDOS_SB(s)->options; 25 struct fat_mount_options *opts = &MSDOS_SB(sb)->options;
26 va_list args; 26 va_list args;
27 struct va_format vaf;
27 28
28 if (report) { 29 if (report) {
29 printk(KERN_ERR "FAT: Filesystem error (dev %s)\n", s->s_id);
30
31 printk(KERN_ERR " ");
32 va_start(args, fmt); 30 va_start(args, fmt);
33 vprintk(fmt, args); 31 vaf.fmt = fmt;
32 vaf.va = &args;
33 printk(KERN_ERR "FAT-fs (%s): error, %pV\n", sb->s_id, &vaf);
34 va_end(args); 34 va_end(args);
35 printk("\n");
36 } 35 }
37 36
38 if (opts->errors == FAT_ERRORS_PANIC) 37 if (opts->errors == FAT_ERRORS_PANIC)
39 panic("FAT: fs panic from previous error\n"); 38 panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id);
40 else if (opts->errors == FAT_ERRORS_RO && !(s->s_flags & MS_RDONLY)) { 39 else if (opts->errors == FAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) {
41 s->s_flags |= MS_RDONLY; 40 sb->s_flags |= MS_RDONLY;
42 printk(KERN_ERR "FAT: Filesystem has been set read-only\n"); 41 printk(KERN_ERR "FAT-fs (%s): Filesystem has been "
42 "set read-only\n", sb->s_id);
43 } 43 }
44} 44}
45EXPORT_SYMBOL_GPL(__fat_fs_error); 45EXPORT_SYMBOL_GPL(__fat_fs_error);
46 46
47/**
48 * fat_msg() - print preformated FAT specific messages. Every thing what is
49 * not fat_fs_error() should be fat_msg().
50 */
51void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...)
52{
53 struct va_format vaf;
54 va_list args;
55
56 va_start(args, fmt);
57 vaf.fmt = fmt;
58 vaf.va = &args;
59 printk("%sFAT-fs (%s): %pV\n", level, sb->s_id, &vaf);
60 va_end(args);
61}
62
47/* Flushes the number of free clusters on FAT32 */ 63/* Flushes the number of free clusters on FAT32 */
48/* XXX: Need to write one per FSINFO block. Currently only writes 1 */ 64/* XXX: Need to write one per FSINFO block. Currently only writes 1 */
49int fat_clusters_flush(struct super_block *sb) 65int fat_clusters_flush(struct super_block *sb)
@@ -57,15 +73,15 @@ int fat_clusters_flush(struct super_block *sb)
57 73
58 bh = sb_bread(sb, sbi->fsinfo_sector); 74 bh = sb_bread(sb, sbi->fsinfo_sector);
59 if (bh == NULL) { 75 if (bh == NULL) {
60 printk(KERN_ERR "FAT: bread failed in fat_clusters_flush\n"); 76 fat_msg(sb, KERN_ERR, "bread failed in fat_clusters_flush");
61 return -EIO; 77 return -EIO;
62 } 78 }
63 79
64 fsinfo = (struct fat_boot_fsinfo *)bh->b_data; 80 fsinfo = (struct fat_boot_fsinfo *)bh->b_data;
65 /* Sanity check */ 81 /* Sanity check */
66 if (!IS_FSINFO(fsinfo)) { 82 if (!IS_FSINFO(fsinfo)) {
67 printk(KERN_ERR "FAT: Invalid FSINFO signature: " 83 fat_msg(sb, KERN_ERR, "Invalid FSINFO signature: "
68 "0x%08x, 0x%08x (sector = %lu)\n", 84 "0x%08x, 0x%08x (sector = %lu)",
69 le32_to_cpu(fsinfo->signature1), 85 le32_to_cpu(fsinfo->signature1),
70 le32_to_cpu(fsinfo->signature2), 86 le32_to_cpu(fsinfo->signature2),
71 sbi->fsinfo_sector); 87 sbi->fsinfo_sector);
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 711499040eb6..3b222dafd15b 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -659,14 +659,14 @@ static const struct inode_operations msdos_dir_inode_operations = {
659 659
660static void setup(struct super_block *sb) 660static void setup(struct super_block *sb)
661{ 661{
662 MSDOS_SB(sb)->dir_ops = &msdos_dir_inode_operations;
662 sb->s_d_op = &msdos_dentry_operations; 663 sb->s_d_op = &msdos_dentry_operations;
663 sb->s_flags |= MS_NOATIME; 664 sb->s_flags |= MS_NOATIME;
664} 665}
665 666
666static int msdos_fill_super(struct super_block *sb, void *data, int silent) 667static int msdos_fill_super(struct super_block *sb, void *data, int silent)
667{ 668{
668 return fat_fill_super(sb, data, silent, &msdos_dir_inode_operations, 669 return fat_fill_super(sb, data, silent, 0, setup);
669 0, setup);
670} 670}
671 671
672static struct dentry *msdos_mount(struct file_system_type *fs_type, 672static struct dentry *msdos_mount(struct file_system_type *fs_type,
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index adae3fb7451a..20b4ea53fdc4 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1065,6 +1065,7 @@ static const struct inode_operations vfat_dir_inode_operations = {
1065 1065
1066static void setup(struct super_block *sb) 1066static void setup(struct super_block *sb)
1067{ 1067{
1068 MSDOS_SB(sb)->dir_ops = &vfat_dir_inode_operations;
1068 if (MSDOS_SB(sb)->options.name_check != 's') 1069 if (MSDOS_SB(sb)->options.name_check != 's')
1069 sb->s_d_op = &vfat_ci_dentry_ops; 1070 sb->s_d_op = &vfat_ci_dentry_ops;
1070 else 1071 else
@@ -1073,8 +1074,7 @@ static void setup(struct super_block *sb)
1073 1074
1074static int vfat_fill_super(struct super_block *sb, void *data, int silent) 1075static int vfat_fill_super(struct super_block *sb, void *data, int silent)
1075{ 1076{
1076 return fat_fill_super(sb, data, silent, &vfat_dir_inode_operations, 1077 return fat_fill_super(sb, data, silent, 1, setup);
1077 1, setup);
1078} 1078}
1079 1079
1080static struct dentry *vfat_mount(struct file_system_type *fs_type, 1080static struct dentry *vfat_mount(struct file_system_type *fs_type,
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 2ba6719ac612..1a4311437a8b 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -272,7 +272,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip)
272 * *ip: VFS inode 272 * *ip: VFS inode
273 * 273 *
274 * Description: 274 * Description:
275 * vxfs_put_fake_inode frees all data asssociated with @ip. 275 * vxfs_put_fake_inode frees all data associated with @ip.
276 */ 276 */
277void 277void
278vxfs_put_fake_inode(struct inode *ip) 278vxfs_put_fake_inode(struct inode *ip)
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c
index 48a18f184d50..30afdfa7aec7 100644
--- a/fs/fscache/operation.c
+++ b/fs/fscache/operation.c
@@ -33,8 +33,6 @@ void fscache_enqueue_operation(struct fscache_operation *op)
33 _enter("{OBJ%x OP%x,%u}", 33 _enter("{OBJ%x OP%x,%u}",
34 op->object->debug_id, op->debug_id, atomic_read(&op->usage)); 34 op->object->debug_id, op->debug_id, atomic_read(&op->usage));
35 35
36 fscache_set_op_state(op, "EnQ");
37
38 ASSERT(list_empty(&op->pend_link)); 36 ASSERT(list_empty(&op->pend_link));
39 ASSERT(op->processor != NULL); 37 ASSERT(op->processor != NULL);
40 ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); 38 ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE);
@@ -66,8 +64,6 @@ EXPORT_SYMBOL(fscache_enqueue_operation);
66static void fscache_run_op(struct fscache_object *object, 64static void fscache_run_op(struct fscache_object *object,
67 struct fscache_operation *op) 65 struct fscache_operation *op)
68{ 66{
69 fscache_set_op_state(op, "Run");
70
71 object->n_in_progress++; 67 object->n_in_progress++;
72 if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) 68 if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags))
73 wake_up_bit(&op->flags, FSCACHE_OP_WAITING); 69 wake_up_bit(&op->flags, FSCACHE_OP_WAITING);
@@ -88,8 +84,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object,
88 84
89 _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); 85 _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id);
90 86
91 fscache_set_op_state(op, "SubmitX");
92
93 spin_lock(&object->lock); 87 spin_lock(&object->lock);
94 ASSERTCMP(object->n_ops, >=, object->n_in_progress); 88 ASSERTCMP(object->n_ops, >=, object->n_in_progress);
95 ASSERTCMP(object->n_ops, >=, object->n_exclusive); 89 ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -194,8 +188,6 @@ int fscache_submit_op(struct fscache_object *object,
194 188
195 ASSERTCMP(atomic_read(&op->usage), >, 0); 189 ASSERTCMP(atomic_read(&op->usage), >, 0);
196 190
197 fscache_set_op_state(op, "Submit");
198
199 spin_lock(&object->lock); 191 spin_lock(&object->lock);
200 ASSERTCMP(object->n_ops, >=, object->n_in_progress); 192 ASSERTCMP(object->n_ops, >=, object->n_in_progress);
201 ASSERTCMP(object->n_ops, >=, object->n_exclusive); 193 ASSERTCMP(object->n_ops, >=, object->n_exclusive);
@@ -335,8 +327,6 @@ void fscache_put_operation(struct fscache_operation *op)
335 if (!atomic_dec_and_test(&op->usage)) 327 if (!atomic_dec_and_test(&op->usage))
336 return; 328 return;
337 329
338 fscache_set_op_state(op, "Put");
339
340 _debug("PUT OP"); 330 _debug("PUT OP");
341 if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) 331 if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags))
342 BUG(); 332 BUG();
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 41c441c2058d..a2a5d19ece6a 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -155,11 +155,9 @@ static void fscache_attr_changed_op(struct fscache_operation *op)
155 fscache_stat(&fscache_n_attr_changed_calls); 155 fscache_stat(&fscache_n_attr_changed_calls);
156 156
157 if (fscache_object_is_active(object)) { 157 if (fscache_object_is_active(object)) {
158 fscache_set_op_state(op, "CallFS");
159 fscache_stat(&fscache_n_cop_attr_changed); 158 fscache_stat(&fscache_n_cop_attr_changed);
160 ret = object->cache->ops->attr_changed(object); 159 ret = object->cache->ops->attr_changed(object);
161 fscache_stat_d(&fscache_n_cop_attr_changed); 160 fscache_stat_d(&fscache_n_cop_attr_changed);
162 fscache_set_op_state(op, "Done");
163 if (ret < 0) 161 if (ret < 0)
164 fscache_abort_object(object); 162 fscache_abort_object(object);
165 } 163 }
@@ -190,7 +188,6 @@ int __fscache_attr_changed(struct fscache_cookie *cookie)
190 188
191 fscache_operation_init(op, fscache_attr_changed_op, NULL); 189 fscache_operation_init(op, fscache_attr_changed_op, NULL);
192 op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); 190 op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE);
193 fscache_set_op_name(op, "Attr");
194 191
195 spin_lock(&cookie->lock); 192 spin_lock(&cookie->lock);
196 193
@@ -257,7 +254,6 @@ static struct fscache_retrieval *fscache_alloc_retrieval(
257 op->context = context; 254 op->context = context;
258 op->start_time = jiffies; 255 op->start_time = jiffies;
259 INIT_LIST_HEAD(&op->to_do); 256 INIT_LIST_HEAD(&op->to_do);
260 fscache_set_op_name(&op->op, "Retr");
261 return op; 257 return op;
262} 258}
263 259
@@ -368,7 +364,6 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie,
368 _leave(" = -ENOMEM"); 364 _leave(" = -ENOMEM");
369 return -ENOMEM; 365 return -ENOMEM;
370 } 366 }
371 fscache_set_op_name(&op->op, "RetrRA1");
372 367
373 spin_lock(&cookie->lock); 368 spin_lock(&cookie->lock);
374 369
@@ -487,7 +482,6 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
487 op = fscache_alloc_retrieval(mapping, end_io_func, context); 482 op = fscache_alloc_retrieval(mapping, end_io_func, context);
488 if (!op) 483 if (!op)
489 return -ENOMEM; 484 return -ENOMEM;
490 fscache_set_op_name(&op->op, "RetrRAN");
491 485
492 spin_lock(&cookie->lock); 486 spin_lock(&cookie->lock);
493 487
@@ -589,7 +583,6 @@ int __fscache_alloc_page(struct fscache_cookie *cookie,
589 op = fscache_alloc_retrieval(page->mapping, NULL, NULL); 583 op = fscache_alloc_retrieval(page->mapping, NULL, NULL);
590 if (!op) 584 if (!op)
591 return -ENOMEM; 585 return -ENOMEM;
592 fscache_set_op_name(&op->op, "RetrAL1");
593 586
594 spin_lock(&cookie->lock); 587 spin_lock(&cookie->lock);
595 588
@@ -662,8 +655,6 @@ static void fscache_write_op(struct fscache_operation *_op)
662 655
663 _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage)); 656 _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage));
664 657
665 fscache_set_op_state(&op->op, "GetPage");
666
667 spin_lock(&object->lock); 658 spin_lock(&object->lock);
668 cookie = object->cookie; 659 cookie = object->cookie;
669 660
@@ -698,15 +689,12 @@ static void fscache_write_op(struct fscache_operation *_op)
698 spin_unlock(&cookie->stores_lock); 689 spin_unlock(&cookie->stores_lock);
699 spin_unlock(&object->lock); 690 spin_unlock(&object->lock);
700 691
701 fscache_set_op_state(&op->op, "Store");
702 fscache_stat(&fscache_n_store_pages); 692 fscache_stat(&fscache_n_store_pages);
703 fscache_stat(&fscache_n_cop_write_page); 693 fscache_stat(&fscache_n_cop_write_page);
704 ret = object->cache->ops->write_page(op, page); 694 ret = object->cache->ops->write_page(op, page);
705 fscache_stat_d(&fscache_n_cop_write_page); 695 fscache_stat_d(&fscache_n_cop_write_page);
706 fscache_set_op_state(&op->op, "EndWrite");
707 fscache_end_page_write(object, page); 696 fscache_end_page_write(object, page);
708 if (ret < 0) { 697 if (ret < 0) {
709 fscache_set_op_state(&op->op, "Abort");
710 fscache_abort_object(object); 698 fscache_abort_object(object);
711 } else { 699 } else {
712 fscache_enqueue_operation(&op->op); 700 fscache_enqueue_operation(&op->op);
@@ -778,7 +766,6 @@ int __fscache_write_page(struct fscache_cookie *cookie,
778 fscache_operation_init(&op->op, fscache_write_op, 766 fscache_operation_init(&op->op, fscache_write_op,
779 fscache_release_write_op); 767 fscache_release_write_op);
780 op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING); 768 op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING);
781 fscache_set_op_name(&op->op, "Write1");
782 769
783 ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); 770 ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
784 if (ret < 0) 771 if (ret < 0)
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index f3d23ef4e876..86128202384f 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,9 +1,9 @@
1ccflags-y := -I$(src) 1ccflags-y := -I$(src)
2obj-$(CONFIG_GFS2_FS) += gfs2.o 2obj-$(CONFIG_GFS2_FS) += gfs2.o
3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \ 3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
4 glops.o inode.o log.o lops.o main.o meta_io.o \ 4 glops.o log.o lops.o main.o meta_io.o \
5 aops.o dentry.o export.o file.o \ 5 aops.o dentry.o export.o file.o \
6 ops_fstype.o ops_inode.o quota.o \ 6 ops_fstype.o inode.o quota.o \
7 recovery.o rgrp.o super.o sys.o trans.o util.o 7 recovery.o rgrp.o super.o sys.o trans.o util.o
8 8
9gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o 9gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 0f5c4f9d5d62..802ac5eeba28 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1076,8 +1076,8 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1076 bd = bh->b_private; 1076 bd = bh->b_private;
1077 if (bd && bd->bd_ail) 1077 if (bd && bd->bd_ail)
1078 goto cannot_release; 1078 goto cannot_release;
1079 gfs2_assert_warn(sdp, !buffer_pinned(bh)); 1079 if (buffer_pinned(bh) || buffer_dirty(bh))
1080 gfs2_assert_warn(sdp, !buffer_dirty(bh)); 1080 goto not_possible;
1081 bh = bh->b_this_page; 1081 bh = bh->b_this_page;
1082 } while(bh != head); 1082 } while(bh != head);
1083 gfs2_log_unlock(sdp); 1083 gfs2_log_unlock(sdp);
@@ -1107,6 +1107,10 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1107 } while (bh != head); 1107 } while (bh != head);
1108 1108
1109 return try_to_free_buffers(page); 1109 return try_to_free_buffers(page);
1110
1111not_possible: /* Should never happen */
1112 WARN_ON(buffer_dirty(bh));
1113 WARN_ON(buffer_pinned(bh));
1110cannot_release: 1114cannot_release:
1111 gfs2_log_unlock(sdp); 1115 gfs2_log_unlock(sdp);
1112 return 0; 1116 return 0;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 74add2ddcc3f..e65493a8ac00 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -780,6 +780,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
780 metadata = (height != ip->i_height - 1); 780 metadata = (height != ip->i_height - 1);
781 if (metadata) 781 if (metadata)
782 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; 782 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
783 else if (ip->i_depth)
784 revokes = sdp->sd_inptrs;
783 785
784 if (ip != GFS2_I(sdp->sd_rindex)) 786 if (ip != GFS2_I(sdp->sd_rindex))
785 error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); 787 error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index f789c5732b7c..091ee4779538 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -82,12 +82,9 @@
82struct qstr gfs2_qdot __read_mostly; 82struct qstr gfs2_qdot __read_mostly;
83struct qstr gfs2_qdotdot __read_mostly; 83struct qstr gfs2_qdotdot __read_mostly;
84 84
85typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len,
86 u64 leaf_no, void *data);
87typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, 85typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent,
88 const struct qstr *name, void *opaque); 86 const struct qstr *name, void *opaque);
89 87
90
91int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, 88int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
92 struct buffer_head **bhp) 89 struct buffer_head **bhp)
93{ 90{
@@ -1600,7 +1597,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1600 */ 1597 */
1601 1598
1602int gfs2_dir_add(struct inode *inode, const struct qstr *name, 1599int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1603 const struct gfs2_inode *nip, unsigned type) 1600 const struct gfs2_inode *nip)
1604{ 1601{
1605 struct gfs2_inode *ip = GFS2_I(inode); 1602 struct gfs2_inode *ip = GFS2_I(inode);
1606 struct buffer_head *bh; 1603 struct buffer_head *bh;
@@ -1616,7 +1613,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1616 return PTR_ERR(dent); 1613 return PTR_ERR(dent);
1617 dent = gfs2_init_dirent(inode, dent, name, bh); 1614 dent = gfs2_init_dirent(inode, dent, name, bh);
1618 gfs2_inum_out(nip, dent); 1615 gfs2_inum_out(nip, dent);
1619 dent->de_type = cpu_to_be16(type); 1616 dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode));
1620 if (ip->i_diskflags & GFS2_DIF_EXHASH) { 1617 if (ip->i_diskflags & GFS2_DIF_EXHASH) {
1621 leaf = (struct gfs2_leaf *)bh->b_data; 1618 leaf = (struct gfs2_leaf *)bh->b_data;
1622 be16_add_cpu(&leaf->lf_entries, 1); 1619 be16_add_cpu(&leaf->lf_entries, 1);
@@ -1628,6 +1625,8 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1628 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1625 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1629 ip->i_entries++; 1626 ip->i_entries++;
1630 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1627 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1628 if (S_ISDIR(nip->i_inode.i_mode))
1629 inc_nlink(&ip->i_inode);
1631 gfs2_dinode_out(ip, bh->b_data); 1630 gfs2_dinode_out(ip, bh->b_data);
1632 brelse(bh); 1631 brelse(bh);
1633 error = 0; 1632 error = 0;
@@ -1672,8 +1671,9 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1672 * Returns: 0 on success, error code on failure 1671 * Returns: 0 on success, error code on failure
1673 */ 1672 */
1674 1673
1675int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) 1674int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
1676{ 1675{
1676 const struct qstr *name = &dentry->d_name;
1677 struct gfs2_dirent *dent, *prev = NULL; 1677 struct gfs2_dirent *dent, *prev = NULL;
1678 struct buffer_head *bh; 1678 struct buffer_head *bh;
1679 int error; 1679 int error;
@@ -1714,6 +1714,8 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
1714 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1714 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1715 dip->i_entries--; 1715 dip->i_entries--;
1716 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; 1716 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
1717 if (S_ISDIR(dentry->d_inode->i_mode))
1718 drop_nlink(&dip->i_inode);
1717 gfs2_dinode_out(dip, bh->b_data); 1719 gfs2_dinode_out(dip, bh->b_data);
1718 brelse(bh); 1720 brelse(bh);
1719 mark_inode_dirty(&dip->i_inode); 1721 mark_inode_dirty(&dip->i_inode);
@@ -1768,94 +1770,20 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1768} 1770}
1769 1771
1770/** 1772/**
1771 * foreach_leaf - call a function for each leaf in a directory
1772 * @dip: the directory
1773 * @lc: the function to call for each each
1774 * @data: private data to pass to it
1775 *
1776 * Returns: errno
1777 */
1778
1779static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
1780{
1781 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1782 struct buffer_head *bh;
1783 struct gfs2_leaf *leaf;
1784 u32 hsize, len;
1785 u32 ht_offset, lp_offset, ht_offset_cur = -1;
1786 u32 index = 0;
1787 __be64 *lp;
1788 u64 leaf_no;
1789 int error = 0;
1790
1791 hsize = 1 << dip->i_depth;
1792 if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
1793 gfs2_consist_inode(dip);
1794 return -EIO;
1795 }
1796
1797 lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
1798 if (!lp)
1799 return -ENOMEM;
1800
1801 while (index < hsize) {
1802 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1803 ht_offset = index - lp_offset;
1804
1805 if (ht_offset_cur != ht_offset) {
1806 error = gfs2_dir_read_data(dip, (char *)lp,
1807 ht_offset * sizeof(__be64),
1808 sdp->sd_hash_bsize, 1);
1809 if (error != sdp->sd_hash_bsize) {
1810 if (error >= 0)
1811 error = -EIO;
1812 goto out;
1813 }
1814 ht_offset_cur = ht_offset;
1815 }
1816
1817 leaf_no = be64_to_cpu(lp[lp_offset]);
1818 if (leaf_no) {
1819 error = get_leaf(dip, leaf_no, &bh);
1820 if (error)
1821 goto out;
1822 leaf = (struct gfs2_leaf *)bh->b_data;
1823 len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
1824 brelse(bh);
1825
1826 error = lc(dip, index, len, leaf_no, data);
1827 if (error)
1828 goto out;
1829
1830 index = (index & ~(len - 1)) + len;
1831 } else
1832 index++;
1833 }
1834
1835 if (index != hsize) {
1836 gfs2_consist_inode(dip);
1837 error = -EIO;
1838 }
1839
1840out:
1841 kfree(lp);
1842
1843 return error;
1844}
1845
1846/**
1847 * leaf_dealloc - Deallocate a directory leaf 1773 * leaf_dealloc - Deallocate a directory leaf
1848 * @dip: the directory 1774 * @dip: the directory
1849 * @index: the hash table offset in the directory 1775 * @index: the hash table offset in the directory
1850 * @len: the number of pointers to this leaf 1776 * @len: the number of pointers to this leaf
1851 * @leaf_no: the leaf number 1777 * @leaf_no: the leaf number
1852 * @data: not used 1778 * @leaf_bh: buffer_head for the starting leaf
1779 * last_dealloc: 1 if this is the final dealloc for the leaf, else 0
1853 * 1780 *
1854 * Returns: errno 1781 * Returns: errno
1855 */ 1782 */
1856 1783
1857static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, 1784static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1858 u64 leaf_no, void *data) 1785 u64 leaf_no, struct buffer_head *leaf_bh,
1786 int last_dealloc)
1859{ 1787{
1860 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1788 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1861 struct gfs2_leaf *tmp_leaf; 1789 struct gfs2_leaf *tmp_leaf;
@@ -1887,14 +1815,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1887 goto out_qs; 1815 goto out_qs;
1888 1816
1889 /* Count the number of leaves */ 1817 /* Count the number of leaves */
1818 bh = leaf_bh;
1890 1819
1891 for (blk = leaf_no; blk; blk = nblk) { 1820 for (blk = leaf_no; blk; blk = nblk) {
1892 error = get_leaf(dip, blk, &bh); 1821 if (blk != leaf_no) {
1893 if (error) 1822 error = get_leaf(dip, blk, &bh);
1894 goto out_rlist; 1823 if (error)
1824 goto out_rlist;
1825 }
1895 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1826 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1896 nblk = be64_to_cpu(tmp_leaf->lf_next); 1827 nblk = be64_to_cpu(tmp_leaf->lf_next);
1897 brelse(bh); 1828 if (blk != leaf_no)
1829 brelse(bh);
1898 1830
1899 gfs2_rlist_add(sdp, &rlist, blk); 1831 gfs2_rlist_add(sdp, &rlist, blk);
1900 l_blocks++; 1832 l_blocks++;
@@ -1918,13 +1850,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1918 if (error) 1850 if (error)
1919 goto out_rg_gunlock; 1851 goto out_rg_gunlock;
1920 1852
1853 bh = leaf_bh;
1854
1921 for (blk = leaf_no; blk; blk = nblk) { 1855 for (blk = leaf_no; blk; blk = nblk) {
1922 error = get_leaf(dip, blk, &bh); 1856 if (blk != leaf_no) {
1923 if (error) 1857 error = get_leaf(dip, blk, &bh);
1924 goto out_end_trans; 1858 if (error)
1859 goto out_end_trans;
1860 }
1925 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1861 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1926 nblk = be64_to_cpu(tmp_leaf->lf_next); 1862 nblk = be64_to_cpu(tmp_leaf->lf_next);
1927 brelse(bh); 1863 if (blk != leaf_no)
1864 brelse(bh);
1928 1865
1929 gfs2_free_meta(dip, blk, 1); 1866 gfs2_free_meta(dip, blk, 1);
1930 gfs2_add_inode_blocks(&dip->i_inode, -1); 1867 gfs2_add_inode_blocks(&dip->i_inode, -1);
@@ -1942,6 +1879,10 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1942 goto out_end_trans; 1879 goto out_end_trans;
1943 1880
1944 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1881 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1882 /* On the last dealloc, make this a regular file in case we crash.
1883 (We don't want to free these blocks a second time.) */
1884 if (last_dealloc)
1885 dip->i_inode.i_mode = S_IFREG;
1945 gfs2_dinode_out(dip, dibh->b_data); 1886 gfs2_dinode_out(dip, dibh->b_data);
1946 brelse(dibh); 1887 brelse(dibh);
1947 1888
@@ -1975,29 +1916,67 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
1975{ 1916{
1976 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1917 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1977 struct buffer_head *bh; 1918 struct buffer_head *bh;
1978 int error; 1919 struct gfs2_leaf *leaf;
1920 u32 hsize, len;
1921 u32 ht_offset, lp_offset, ht_offset_cur = -1;
1922 u32 index = 0, next_index;
1923 __be64 *lp;
1924 u64 leaf_no;
1925 int error = 0, last;
1979 1926
1980 /* Dealloc on-disk leaves to FREEMETA state */ 1927 hsize = 1 << dip->i_depth;
1981 error = foreach_leaf(dip, leaf_dealloc, NULL); 1928 if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
1982 if (error) 1929 gfs2_consist_inode(dip);
1983 return error; 1930 return -EIO;
1931 }
1984 1932
1985 /* Make this a regular file in case we crash. 1933 lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
1986 (We don't want to free these blocks a second time.) */ 1934 if (!lp)
1935 return -ENOMEM;
1987 1936
1988 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1937 while (index < hsize) {
1989 if (error) 1938 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1990 return error; 1939 ht_offset = index - lp_offset;
1991 1940
1992 error = gfs2_meta_inode_buffer(dip, &bh); 1941 if (ht_offset_cur != ht_offset) {
1993 if (!error) { 1942 error = gfs2_dir_read_data(dip, (char *)lp,
1994 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1943 ht_offset * sizeof(__be64),
1995 ((struct gfs2_dinode *)bh->b_data)->di_mode = 1944 sdp->sd_hash_bsize, 1);
1996 cpu_to_be32(S_IFREG); 1945 if (error != sdp->sd_hash_bsize) {
1997 brelse(bh); 1946 if (error >= 0)
1947 error = -EIO;
1948 goto out;
1949 }
1950 ht_offset_cur = ht_offset;
1951 }
1952
1953 leaf_no = be64_to_cpu(lp[lp_offset]);
1954 if (leaf_no) {
1955 error = get_leaf(dip, leaf_no, &bh);
1956 if (error)
1957 goto out;
1958 leaf = (struct gfs2_leaf *)bh->b_data;
1959 len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
1960
1961 next_index = (index & ~(len - 1)) + len;
1962 last = ((next_index >= hsize) ? 1 : 0);
1963 error = leaf_dealloc(dip, index, len, leaf_no, bh,
1964 last);
1965 brelse(bh);
1966 if (error)
1967 goto out;
1968 index = next_index;
1969 } else
1970 index++;
1998 } 1971 }
1999 1972
2000 gfs2_trans_end(sdp); 1973 if (index != hsize) {
1974 gfs2_consist_inode(dip);
1975 error = -EIO;
1976 }
1977
1978out:
1979 kfree(lp);
2001 1980
2002 return error; 1981 return error;
2003} 1982}
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index a98f644bd3df..e686af11becd 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -22,8 +22,8 @@ extern struct inode *gfs2_dir_search(struct inode *dir,
22extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, 22extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
23 const struct gfs2_inode *ip); 23 const struct gfs2_inode *ip);
24extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, 24extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
25 const struct gfs2_inode *ip, unsigned int type); 25 const struct gfs2_inode *ip);
26extern int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); 26extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
27extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, 27extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
28 filldir_t filldir); 28 filldir_t filldir);
29extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, 29extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index b5a5e60df0d5..fe9945f2ff72 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -139,7 +139,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
139 struct gfs2_sbd *sdp = sb->s_fs_info; 139 struct gfs2_sbd *sdp = sb->s_fs_info;
140 struct inode *inode; 140 struct inode *inode;
141 141
142 inode = gfs2_ilookup(sb, inum->no_addr); 142 inode = gfs2_ilookup(sb, inum->no_addr, 0);
143 if (inode) { 143 if (inode) {
144 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { 144 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
145 iput(inode); 145 iput(inode);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index e48310885c48..a9f5cbe45cd9 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -545,18 +545,10 @@ static int gfs2_close(struct inode *inode, struct file *file)
545/** 545/**
546 * gfs2_fsync - sync the dirty data for a file (across the cluster) 546 * gfs2_fsync - sync the dirty data for a file (across the cluster)
547 * @file: the file that points to the dentry (we ignore this) 547 * @file: the file that points to the dentry (we ignore this)
548 * @dentry: the dentry that points to the inode to sync 548 * @datasync: set if we can ignore timestamp changes
549 * 549 *
550 * The VFS will flush "normal" data for us. We only need to worry 550 * The VFS will flush data for us. We only need to worry
551 * about metadata here. For journaled data, we just do a log flush 551 * about metadata here.
552 * as we can't avoid it. Otherwise we can just bale out if datasync
553 * is set. For stuffed inodes we must flush the log in order to
554 * ensure that all data is on disk.
555 *
556 * The call to write_inode_now() is there to write back metadata and
557 * the inode itself. It does also try and write the data, but thats
558 * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite()
559 * for us.
560 * 552 *
561 * Returns: errno 553 * Returns: errno
562 */ 554 */
@@ -565,22 +557,20 @@ static int gfs2_fsync(struct file *file, int datasync)
565{ 557{
566 struct inode *inode = file->f_mapping->host; 558 struct inode *inode = file->f_mapping->host;
567 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); 559 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
568 int ret = 0; 560 struct gfs2_inode *ip = GFS2_I(inode);
569 561 int ret;
570 if (gfs2_is_jdata(GFS2_I(inode))) {
571 gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
572 return 0;
573 }
574 562
575 if (sync_state != 0) { 563 if (datasync)
576 if (!datasync) 564 sync_state &= ~I_DIRTY_SYNC;
577 ret = write_inode_now(inode, 0);
578 565
579 if (gfs2_is_stuffed(GFS2_I(inode))) 566 if (sync_state) {
580 gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl); 567 ret = sync_inode_metadata(inode, 1);
568 if (ret)
569 return ret;
570 gfs2_ail_flush(ip->i_gl);
581 } 571 }
582 572
583 return ret; 573 return 0;
584} 574}
585 575
586/** 576/**
@@ -826,6 +816,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
826 loff_t bytes, max_bytes; 816 loff_t bytes, max_bytes;
827 struct gfs2_alloc *al; 817 struct gfs2_alloc *al;
828 int error; 818 int error;
819 loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
829 loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; 820 loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
830 next = (next + 1) << sdp->sd_sb.sb_bsize_shift; 821 next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
831 822
@@ -833,13 +824,15 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
833 if (mode & ~FALLOC_FL_KEEP_SIZE) 824 if (mode & ~FALLOC_FL_KEEP_SIZE)
834 return -EOPNOTSUPP; 825 return -EOPNOTSUPP;
835 826
836 offset = (offset >> sdp->sd_sb.sb_bsize_shift) << 827 offset &= bsize_mask;
837 sdp->sd_sb.sb_bsize_shift;
838 828
839 len = next - offset; 829 len = next - offset;
840 bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; 830 bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
841 if (!bytes) 831 if (!bytes)
842 bytes = UINT_MAX; 832 bytes = UINT_MAX;
833 bytes &= bsize_mask;
834 if (bytes == 0)
835 bytes = sdp->sd_sb.sb_bsize;
843 836
844 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); 837 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
845 error = gfs2_glock_nq(&ip->i_gh); 838 error = gfs2_glock_nq(&ip->i_gh);
@@ -870,6 +863,9 @@ retry:
870 if (error) { 863 if (error) {
871 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { 864 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
872 bytes >>= 1; 865 bytes >>= 1;
866 bytes &= bsize_mask;
867 if (bytes == 0)
868 bytes = sdp->sd_sb.sb_bsize;
873 goto retry; 869 goto retry;
874 } 870 }
875 goto out_qunlock; 871 goto out_qunlock;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 7a4fb630a320..2792a790e50b 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -143,14 +143,9 @@ static int demote_ok(const struct gfs2_glock *gl)
143{ 143{
144 const struct gfs2_glock_operations *glops = gl->gl_ops; 144 const struct gfs2_glock_operations *glops = gl->gl_ops;
145 145
146 /* assert_spin_locked(&gl->gl_spin); */
147
148 if (gl->gl_state == LM_ST_UNLOCKED) 146 if (gl->gl_state == LM_ST_UNLOCKED)
149 return 0; 147 return 0;
150 if (test_bit(GLF_LFLUSH, &gl->gl_flags)) 148 if (!list_empty(&gl->gl_holders))
151 return 0;
152 if ((gl->gl_name.ln_type != LM_TYPE_INODE) &&
153 !list_empty(&gl->gl_holders))
154 return 0; 149 return 0;
155 if (glops->go_demote_ok) 150 if (glops->go_demote_ok)
156 return glops->go_demote_ok(gl); 151 return glops->go_demote_ok(gl);
@@ -158,6 +153,31 @@ static int demote_ok(const struct gfs2_glock *gl)
158} 153}
159 154
160 155
156void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
157{
158 spin_lock(&lru_lock);
159
160 if (!list_empty(&gl->gl_lru))
161 list_del_init(&gl->gl_lru);
162 else
163 atomic_inc(&lru_count);
164
165 list_add_tail(&gl->gl_lru, &lru_list);
166 set_bit(GLF_LRU, &gl->gl_flags);
167 spin_unlock(&lru_lock);
168}
169
170static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
171{
172 spin_lock(&lru_lock);
173 if (!list_empty(&gl->gl_lru)) {
174 list_del_init(&gl->gl_lru);
175 atomic_dec(&lru_count);
176 clear_bit(GLF_LRU, &gl->gl_flags);
177 }
178 spin_unlock(&lru_lock);
179}
180
161/** 181/**
162 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list 182 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
163 * @gl: the glock 183 * @gl: the glock
@@ -168,24 +188,8 @@ static int demote_ok(const struct gfs2_glock *gl)
168 188
169static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) 189static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
170{ 190{
171 if (demote_ok(gl)) { 191 if (demote_ok(gl))
172 spin_lock(&lru_lock); 192 gfs2_glock_add_to_lru(gl);
173
174 if (!list_empty(&gl->gl_lru))
175 list_del_init(&gl->gl_lru);
176 else
177 atomic_inc(&lru_count);
178
179 list_add_tail(&gl->gl_lru, &lru_list);
180 spin_unlock(&lru_lock);
181 }
182}
183
184void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
185{
186 spin_lock(&gl->gl_spin);
187 __gfs2_glock_schedule_for_reclaim(gl);
188 spin_unlock(&gl->gl_spin);
189} 193}
190 194
191/** 195/**
@@ -217,12 +221,7 @@ void gfs2_glock_put(struct gfs2_glock *gl)
217 spin_lock_bucket(gl->gl_hash); 221 spin_lock_bucket(gl->gl_hash);
218 hlist_bl_del_rcu(&gl->gl_list); 222 hlist_bl_del_rcu(&gl->gl_list);
219 spin_unlock_bucket(gl->gl_hash); 223 spin_unlock_bucket(gl->gl_hash);
220 spin_lock(&lru_lock); 224 gfs2_glock_remove_from_lru(gl);
221 if (!list_empty(&gl->gl_lru)) {
222 list_del_init(&gl->gl_lru);
223 atomic_dec(&lru_count);
224 }
225 spin_unlock(&lru_lock);
226 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 225 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
227 GLOCK_BUG_ON(gl, mapping && mapping->nrpages); 226 GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
228 trace_gfs2_glock_put(gl); 227 trace_gfs2_glock_put(gl);
@@ -542,11 +541,6 @@ __acquires(&gl->gl_spin)
542 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); 541 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
543 542
544 gfs2_glock_hold(gl); 543 gfs2_glock_hold(gl);
545 if (target != LM_ST_UNLOCKED && (gl->gl_state == LM_ST_SHARED ||
546 gl->gl_state == LM_ST_DEFERRED) &&
547 !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
548 lck_flags |= LM_FLAG_TRY_1CB;
549
550 if (sdp->sd_lockstruct.ls_ops->lm_lock) { 544 if (sdp->sd_lockstruct.ls_ops->lm_lock) {
551 /* lock_dlm */ 545 /* lock_dlm */
552 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); 546 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
@@ -648,7 +642,7 @@ static void delete_work_func(struct work_struct *work)
648 /* Note: Unsafe to dereference ip as we don't hold right refs/locks */ 642 /* Note: Unsafe to dereference ip as we don't hold right refs/locks */
649 643
650 if (ip) 644 if (ip)
651 inode = gfs2_ilookup(sdp->sd_vfs, no_addr); 645 inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1);
652 else 646 else
653 inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); 647 inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
654 if (inode && !IS_ERR(inode)) { 648 if (inode && !IS_ERR(inode)) {
@@ -1025,6 +1019,9 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
1025 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 1019 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
1026 return -EIO; 1020 return -EIO;
1027 1021
1022 if (test_bit(GLF_LRU, &gl->gl_flags))
1023 gfs2_glock_remove_from_lru(gl);
1024
1028 spin_lock(&gl->gl_spin); 1025 spin_lock(&gl->gl_spin);
1029 add_to_queue(gh); 1026 add_to_queue(gh);
1030 if ((LM_FLAG_NOEXP & gh->gh_flags) && 1027 if ((LM_FLAG_NOEXP & gh->gh_flags) &&
@@ -1082,7 +1079,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1082 !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1079 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1083 fast_path = 1; 1080 fast_path = 1;
1084 } 1081 }
1085 __gfs2_glock_schedule_for_reclaim(gl); 1082 if (!test_bit(GLF_LFLUSH, &gl->gl_flags))
1083 __gfs2_glock_schedule_for_reclaim(gl);
1086 trace_gfs2_glock_queue(gh, 0); 1084 trace_gfs2_glock_queue(gh, 0);
1087 spin_unlock(&gl->gl_spin); 1085 spin_unlock(&gl->gl_spin);
1088 if (likely(fast_path)) 1086 if (likely(fast_path))
@@ -1348,11 +1346,14 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1348} 1346}
1349 1347
1350 1348
1351static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 1349static int gfs2_shrink_glock_memory(struct shrinker *shrink,
1350 struct shrink_control *sc)
1352{ 1351{
1353 struct gfs2_glock *gl; 1352 struct gfs2_glock *gl;
1354 int may_demote; 1353 int may_demote;
1355 int nr_skipped = 0; 1354 int nr_skipped = 0;
1355 int nr = sc->nr_to_scan;
1356 gfp_t gfp_mask = sc->gfp_mask;
1356 LIST_HEAD(skipped); 1357 LIST_HEAD(skipped);
1357 1358
1358 if (nr == 0) 1359 if (nr == 0)
@@ -1365,6 +1366,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
1365 while(nr && !list_empty(&lru_list)) { 1366 while(nr && !list_empty(&lru_list)) {
1366 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); 1367 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
1367 list_del_init(&gl->gl_lru); 1368 list_del_init(&gl->gl_lru);
1369 clear_bit(GLF_LRU, &gl->gl_flags);
1368 atomic_dec(&lru_count); 1370 atomic_dec(&lru_count);
1369 1371
1370 /* Test for being demotable */ 1372 /* Test for being demotable */
@@ -1387,6 +1389,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
1387 } 1389 }
1388 nr_skipped++; 1390 nr_skipped++;
1389 list_add(&gl->gl_lru, &skipped); 1391 list_add(&gl->gl_lru, &skipped);
1392 set_bit(GLF_LRU, &gl->gl_flags);
1390 } 1393 }
1391 list_splice(&skipped, &lru_list); 1394 list_splice(&skipped, &lru_list);
1392 atomic_add(nr_skipped, &lru_count); 1395 atomic_add(nr_skipped, &lru_count);
@@ -1459,12 +1462,7 @@ static void thaw_glock(struct gfs2_glock *gl)
1459 1462
1460static void clear_glock(struct gfs2_glock *gl) 1463static void clear_glock(struct gfs2_glock *gl)
1461{ 1464{
1462 spin_lock(&lru_lock); 1465 gfs2_glock_remove_from_lru(gl);
1463 if (!list_empty(&gl->gl_lru)) {
1464 list_del_init(&gl->gl_lru);
1465 atomic_dec(&lru_count);
1466 }
1467 spin_unlock(&lru_lock);
1468 1466
1469 spin_lock(&gl->gl_spin); 1467 spin_lock(&gl->gl_spin);
1470 if (gl->gl_state != LM_ST_UNLOCKED) 1468 if (gl->gl_state != LM_ST_UNLOCKED)
@@ -1599,9 +1597,11 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
1599 return 0; 1597 return 0;
1600} 1598}
1601 1599
1602static const char *gflags2str(char *buf, const unsigned long *gflags) 1600static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
1603{ 1601{
1602 const unsigned long *gflags = &gl->gl_flags;
1604 char *p = buf; 1603 char *p = buf;
1604
1605 if (test_bit(GLF_LOCK, gflags)) 1605 if (test_bit(GLF_LOCK, gflags))
1606 *p++ = 'l'; 1606 *p++ = 'l';
1607 if (test_bit(GLF_DEMOTE, gflags)) 1607 if (test_bit(GLF_DEMOTE, gflags))
@@ -1624,6 +1624,10 @@ static const char *gflags2str(char *buf, const unsigned long *gflags)
1624 *p++ = 'F'; 1624 *p++ = 'F';
1625 if (test_bit(GLF_QUEUED, gflags)) 1625 if (test_bit(GLF_QUEUED, gflags))
1626 *p++ = 'q'; 1626 *p++ = 'q';
1627 if (test_bit(GLF_LRU, gflags))
1628 *p++ = 'L';
1629 if (gl->gl_object)
1630 *p++ = 'o';
1627 *p = 0; 1631 *p = 0;
1628 return buf; 1632 return buf;
1629} 1633}
@@ -1658,14 +1662,15 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
1658 dtime *= 1000000/HZ; /* demote time in uSec */ 1662 dtime *= 1000000/HZ; /* demote time in uSec */
1659 if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) 1663 if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
1660 dtime = 0; 1664 dtime = 0;
1661 gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n", 1665 gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n",
1662 state2str(gl->gl_state), 1666 state2str(gl->gl_state),
1663 gl->gl_name.ln_type, 1667 gl->gl_name.ln_type,
1664 (unsigned long long)gl->gl_name.ln_number, 1668 (unsigned long long)gl->gl_name.ln_number,
1665 gflags2str(gflags_buf, &gl->gl_flags), 1669 gflags2str(gflags_buf, gl),
1666 state2str(gl->gl_target), 1670 state2str(gl->gl_target),
1667 state2str(gl->gl_demote_state), dtime, 1671 state2str(gl->gl_demote_state), dtime,
1668 atomic_read(&gl->gl_ail_count), 1672 atomic_read(&gl->gl_ail_count),
1673 atomic_read(&gl->gl_revokes),
1669 atomic_read(&gl->gl_ref)); 1674 atomic_read(&gl->gl_ref));
1670 1675
1671 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 1676 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index aea160690e94..6b2f757b9281 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -225,11 +225,10 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
225 225
226extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); 226extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
227extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret); 227extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
228extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
229extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); 228extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
230extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip); 229extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
231extern void gfs2_glock_thaw(struct gfs2_sbd *sdp); 230extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
232extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); 231extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl);
233extern void gfs2_glock_free(struct gfs2_glock *gl); 232extern void gfs2_glock_free(struct gfs2_glock *gl);
234 233
235extern int __init gfs2_glock_init(void); 234extern int __init gfs2_glock_init(void);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 25eeb2bcee47..8ef70f464731 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,33 +28,18 @@
28#include "trans.h" 28#include "trans.h"
29 29
30/** 30/**
31 * ail_empty_gl - remove all buffers for a given lock from the AIL 31 * __gfs2_ail_flush - remove all buffers for a given lock from the AIL
32 * @gl: the glock 32 * @gl: the glock
33 * 33 *
34 * None of the buffers should be dirty, locked, or pinned. 34 * None of the buffers should be dirty, locked, or pinned.
35 */ 35 */
36 36
37static void gfs2_ail_empty_gl(struct gfs2_glock *gl) 37static void __gfs2_ail_flush(struct gfs2_glock *gl)
38{ 38{
39 struct gfs2_sbd *sdp = gl->gl_sbd; 39 struct gfs2_sbd *sdp = gl->gl_sbd;
40 struct list_head *head = &gl->gl_ail_list; 40 struct list_head *head = &gl->gl_ail_list;
41 struct gfs2_bufdata *bd; 41 struct gfs2_bufdata *bd;
42 struct buffer_head *bh; 42 struct buffer_head *bh;
43 struct gfs2_trans tr;
44
45 memset(&tr, 0, sizeof(tr));
46 tr.tr_revokes = atomic_read(&gl->gl_ail_count);
47
48 if (!tr.tr_revokes)
49 return;
50
51 /* A shortened, inline version of gfs2_trans_begin() */
52 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
53 tr.tr_ip = (unsigned long)__builtin_return_address(0);
54 INIT_LIST_HEAD(&tr.tr_list_buf);
55 gfs2_log_reserve(sdp, tr.tr_reserved);
56 BUG_ON(current->journal_info);
57 current->journal_info = &tr;
58 43
59 spin_lock(&sdp->sd_ail_lock); 44 spin_lock(&sdp->sd_ail_lock);
60 while (!list_empty(head)) { 45 while (!list_empty(head)) {
@@ -76,7 +61,47 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
76 } 61 }
77 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); 62 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
78 spin_unlock(&sdp->sd_ail_lock); 63 spin_unlock(&sdp->sd_ail_lock);
64}
65
66
67static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
68{
69 struct gfs2_sbd *sdp = gl->gl_sbd;
70 struct gfs2_trans tr;
71
72 memset(&tr, 0, sizeof(tr));
73 tr.tr_revokes = atomic_read(&gl->gl_ail_count);
74
75 if (!tr.tr_revokes)
76 return;
77
78 /* A shortened, inline version of gfs2_trans_begin() */
79 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
80 tr.tr_ip = (unsigned long)__builtin_return_address(0);
81 INIT_LIST_HEAD(&tr.tr_list_buf);
82 gfs2_log_reserve(sdp, tr.tr_reserved);
83 BUG_ON(current->journal_info);
84 current->journal_info = &tr;
85
86 __gfs2_ail_flush(gl);
87
88 gfs2_trans_end(sdp);
89 gfs2_log_flush(sdp, NULL);
90}
91
92void gfs2_ail_flush(struct gfs2_glock *gl)
93{
94 struct gfs2_sbd *sdp = gl->gl_sbd;
95 unsigned int revokes = atomic_read(&gl->gl_ail_count);
96 int ret;
97
98 if (!revokes)
99 return;
79 100
101 ret = gfs2_trans_begin(sdp, 0, revokes);
102 if (ret)
103 return;
104 __gfs2_ail_flush(gl);
80 gfs2_trans_end(sdp); 105 gfs2_trans_end(sdp);
81 gfs2_log_flush(sdp, NULL); 106 gfs2_log_flush(sdp, NULL);
82} 107}
@@ -227,6 +252,119 @@ static int inode_go_demote_ok(const struct gfs2_glock *gl)
227} 252}
228 253
229/** 254/**
255 * gfs2_set_nlink - Set the inode's link count based on on-disk info
256 * @inode: The inode in question
257 * @nlink: The link count
258 *
259 * If the link count has hit zero, it must never be raised, whatever the
260 * on-disk inode might say. When new struct inodes are created the link
261 * count is set to 1, so that we can safely use this test even when reading
262 * in on disk information for the first time.
263 */
264
265static void gfs2_set_nlink(struct inode *inode, u32 nlink)
266{
267 /*
268 * We will need to review setting the nlink count here in the
269 * light of the forthcoming ro bind mount work. This is a reminder
270 * to do that.
271 */
272 if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) {
273 if (nlink == 0)
274 clear_nlink(inode);
275 else
276 inode->i_nlink = nlink;
277 }
278}
279
280static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
281{
282 const struct gfs2_dinode *str = buf;
283 struct timespec atime;
284 u16 height, depth;
285
286 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
287 goto corrupt;
288 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
289 ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
290 ip->i_inode.i_rdev = 0;
291 switch (ip->i_inode.i_mode & S_IFMT) {
292 case S_IFBLK:
293 case S_IFCHR:
294 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
295 be32_to_cpu(str->di_minor));
296 break;
297 };
298
299 ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
300 ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
301 gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
302 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
303 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
304 atime.tv_sec = be64_to_cpu(str->di_atime);
305 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
306 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
307 ip->i_inode.i_atime = atime;
308 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
309 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
310 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
311 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
312
313 ip->i_goal = be64_to_cpu(str->di_goal_meta);
314 ip->i_generation = be64_to_cpu(str->di_generation);
315
316 ip->i_diskflags = be32_to_cpu(str->di_flags);
317 gfs2_set_inode_flags(&ip->i_inode);
318 height = be16_to_cpu(str->di_height);
319 if (unlikely(height > GFS2_MAX_META_HEIGHT))
320 goto corrupt;
321 ip->i_height = (u8)height;
322
323 depth = be16_to_cpu(str->di_depth);
324 if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
325 goto corrupt;
326 ip->i_depth = (u8)depth;
327 ip->i_entries = be32_to_cpu(str->di_entries);
328
329 ip->i_eattr = be64_to_cpu(str->di_eattr);
330 if (S_ISREG(ip->i_inode.i_mode))
331 gfs2_set_aops(&ip->i_inode);
332
333 return 0;
334corrupt:
335 gfs2_consist_inode(ip);
336 return -EIO;
337}
338
339/**
340 * gfs2_inode_refresh - Refresh the incore copy of the dinode
341 * @ip: The GFS2 inode
342 *
343 * Returns: errno
344 */
345
346int gfs2_inode_refresh(struct gfs2_inode *ip)
347{
348 struct buffer_head *dibh;
349 int error;
350
351 error = gfs2_meta_inode_buffer(ip, &dibh);
352 if (error)
353 return error;
354
355 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
356 brelse(dibh);
357 return -EIO;
358 }
359
360 error = gfs2_dinode_in(ip, dibh->b_data);
361 brelse(dibh);
362 clear_bit(GIF_INVALID, &ip->i_flags);
363
364 return error;
365}
366
367/**
230 * inode_go_lock - operation done after an inode lock is locked by a process 368 * inode_go_lock - operation done after an inode lock is locked by a process
231 * @gl: the glock 369 * @gl: the glock
232 * @flags: 370 * @flags:
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index b3aa2e3210fd..6fce409b5a50 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -23,4 +23,6 @@ extern const struct gfs2_glock_operations gfs2_quota_glops;
23extern const struct gfs2_glock_operations gfs2_journal_glops; 23extern const struct gfs2_glock_operations gfs2_journal_glops;
24extern const struct gfs2_glock_operations *gfs2_glops_list[]; 24extern const struct gfs2_glock_operations *gfs2_glops_list[];
25 25
26extern void gfs2_ail_flush(struct gfs2_glock *gl);
27
26#endif /* __GLOPS_DOT_H__ */ 28#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 870a89d6d4dc..0a064e91ac70 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -20,7 +20,6 @@
20 20
21#define DIO_WAIT 0x00000010 21#define DIO_WAIT 0x00000010
22#define DIO_METADATA 0x00000020 22#define DIO_METADATA 0x00000020
23#define DIO_ALL 0x00000100
24 23
25struct gfs2_log_operations; 24struct gfs2_log_operations;
26struct gfs2_log_element; 25struct gfs2_log_element;
@@ -200,6 +199,8 @@ enum {
200 GLF_INITIAL = 10, 199 GLF_INITIAL = 10,
201 GLF_FROZEN = 11, 200 GLF_FROZEN = 11,
202 GLF_QUEUED = 12, 201 GLF_QUEUED = 12,
202 GLF_LRU = 13,
203 GLF_OBJECT = 14, /* Used only for tracing */
203}; 204};
204 205
205struct gfs2_glock { 206struct gfs2_glock {
@@ -234,6 +235,7 @@ struct gfs2_glock {
234 235
235 struct list_head gl_ail_list; 236 struct list_head gl_ail_list;
236 atomic_t gl_ail_count; 237 atomic_t gl_ail_count;
238 atomic_t gl_revokes;
237 struct delayed_work gl_work; 239 struct delayed_work gl_work;
238 struct work_struct gl_delete; 240 struct work_struct gl_delete;
239 struct rcu_head gl_rcu; 241 struct rcu_head gl_rcu;
@@ -374,8 +376,6 @@ struct gfs2_ail {
374 unsigned int ai_first; 376 unsigned int ai_first;
375 struct list_head ai_ail1_list; 377 struct list_head ai_ail1_list;
376 struct list_head ai_ail2_list; 378 struct list_head ai_ail2_list;
377
378 u64 ai_sync_gen;
379}; 379};
380 380
381struct gfs2_journal_extent { 381struct gfs2_journal_extent {
@@ -488,7 +488,6 @@ struct gfs2_sb_host {
488 488
489 char sb_lockproto[GFS2_LOCKNAME_LEN]; 489 char sb_lockproto[GFS2_LOCKNAME_LEN];
490 char sb_locktable[GFS2_LOCKNAME_LEN]; 490 char sb_locktable[GFS2_LOCKNAME_LEN];
491 u8 sb_uuid[16];
492}; 491};
493 492
494/* 493/*
@@ -654,7 +653,6 @@ struct gfs2_sbd {
654 spinlock_t sd_ail_lock; 653 spinlock_t sd_ail_lock;
655 struct list_head sd_ail1_list; 654 struct list_head sd_ail1_list;
656 struct list_head sd_ail2_list; 655 struct list_head sd_ail2_list;
657 u64 sd_ail_sync_gen;
658 656
659 /* Replay stuff */ 657 /* Replay stuff */
660 658
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 9134dcb89479..03e0c529063e 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1,23 +1,25 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2. 7 * of the GNU General Public License version 2.
8 */ 8 */
9 9
10#include <linux/sched.h>
11#include <linux/slab.h> 10#include <linux/slab.h>
12#include <linux/spinlock.h> 11#include <linux/spinlock.h>
13#include <linux/completion.h> 12#include <linux/completion.h>
14#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/namei.h>
15#include <linux/mm.h>
16#include <linux/xattr.h>
15#include <linux/posix_acl.h> 17#include <linux/posix_acl.h>
16#include <linux/sort.h>
17#include <linux/gfs2_ondisk.h> 18#include <linux/gfs2_ondisk.h>
18#include <linux/crc32.h> 19#include <linux/crc32.h>
20#include <linux/fiemap.h>
19#include <linux/security.h> 21#include <linux/security.h>
20#include <linux/time.h> 22#include <asm/uaccess.h>
21 23
22#include "gfs2.h" 24#include "gfs2.h"
23#include "incore.h" 25#include "incore.h"
@@ -26,19 +28,14 @@
26#include "dir.h" 28#include "dir.h"
27#include "xattr.h" 29#include "xattr.h"
28#include "glock.h" 30#include "glock.h"
29#include "glops.h"
30#include "inode.h" 31#include "inode.h"
31#include "log.h"
32#include "meta_io.h" 32#include "meta_io.h"
33#include "quota.h" 33#include "quota.h"
34#include "rgrp.h" 34#include "rgrp.h"
35#include "trans.h" 35#include "trans.h"
36#include "util.h" 36#include "util.h"
37 37#include "super.h"
38struct gfs2_inum_range_host { 38#include "glops.h"
39 u64 ir_start;
40 u64 ir_length;
41};
42 39
43struct gfs2_skip_data { 40struct gfs2_skip_data {
44 u64 no_addr; 41 u64 no_addr;
@@ -74,14 +71,14 @@ static int iget_set(struct inode *inode, void *opaque)
74 return 0; 71 return 0;
75} 72}
76 73
77struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) 74struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
78{ 75{
79 unsigned long hash = (unsigned long)no_addr; 76 unsigned long hash = (unsigned long)no_addr;
80 struct gfs2_skip_data data; 77 struct gfs2_skip_data data;
81 78
82 data.no_addr = no_addr; 79 data.no_addr = no_addr;
83 data.skipped = 0; 80 data.skipped = 0;
84 data.non_block = 0; 81 data.non_block = non_block;
85 return ilookup5(sb, hash, iget_test, &data); 82 return ilookup5(sb, hash, iget_test, &data);
86} 83}
87 84
@@ -248,203 +245,6 @@ fail_iput:
248 goto fail; 245 goto fail;
249} 246}
250 247
251static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
252{
253 const struct gfs2_dinode *str = buf;
254 struct timespec atime;
255 u16 height, depth;
256
257 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
258 goto corrupt;
259 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
260 ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
261 ip->i_inode.i_rdev = 0;
262 switch (ip->i_inode.i_mode & S_IFMT) {
263 case S_IFBLK:
264 case S_IFCHR:
265 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
266 be32_to_cpu(str->di_minor));
267 break;
268 };
269
270 ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
271 ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
272 /*
273 * We will need to review setting the nlink count here in the
274 * light of the forthcoming ro bind mount work. This is a reminder
275 * to do that.
276 */
277 ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
278 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
279 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
280 atime.tv_sec = be64_to_cpu(str->di_atime);
281 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
282 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
283 ip->i_inode.i_atime = atime;
284 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
285 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
286 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
287 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
288
289 ip->i_goal = be64_to_cpu(str->di_goal_meta);
290 ip->i_generation = be64_to_cpu(str->di_generation);
291
292 ip->i_diskflags = be32_to_cpu(str->di_flags);
293 gfs2_set_inode_flags(&ip->i_inode);
294 height = be16_to_cpu(str->di_height);
295 if (unlikely(height > GFS2_MAX_META_HEIGHT))
296 goto corrupt;
297 ip->i_height = (u8)height;
298
299 depth = be16_to_cpu(str->di_depth);
300 if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
301 goto corrupt;
302 ip->i_depth = (u8)depth;
303 ip->i_entries = be32_to_cpu(str->di_entries);
304
305 ip->i_eattr = be64_to_cpu(str->di_eattr);
306 if (S_ISREG(ip->i_inode.i_mode))
307 gfs2_set_aops(&ip->i_inode);
308
309 return 0;
310corrupt:
311 if (gfs2_consist_inode(ip))
312 gfs2_dinode_print(ip);
313 return -EIO;
314}
315
316/**
317 * gfs2_inode_refresh - Refresh the incore copy of the dinode
318 * @ip: The GFS2 inode
319 *
320 * Returns: errno
321 */
322
323int gfs2_inode_refresh(struct gfs2_inode *ip)
324{
325 struct buffer_head *dibh;
326 int error;
327
328 error = gfs2_meta_inode_buffer(ip, &dibh);
329 if (error)
330 return error;
331
332 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
333 brelse(dibh);
334 return -EIO;
335 }
336
337 error = gfs2_dinode_in(ip, dibh->b_data);
338 brelse(dibh);
339 clear_bit(GIF_INVALID, &ip->i_flags);
340
341 return error;
342}
343
344int gfs2_dinode_dealloc(struct gfs2_inode *ip)
345{
346 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
347 struct gfs2_alloc *al;
348 struct gfs2_rgrpd *rgd;
349 int error;
350
351 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
352 if (gfs2_consist_inode(ip))
353 gfs2_dinode_print(ip);
354 return -EIO;
355 }
356
357 al = gfs2_alloc_get(ip);
358 if (!al)
359 return -ENOMEM;
360
361 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
362 if (error)
363 goto out;
364
365 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
366 if (error)
367 goto out_qs;
368
369 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
370 if (!rgd) {
371 gfs2_consist_inode(ip);
372 error = -EIO;
373 goto out_rindex_relse;
374 }
375
376 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
377 &al->al_rgd_gh);
378 if (error)
379 goto out_rindex_relse;
380
381 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1);
382 if (error)
383 goto out_rg_gunlock;
384
385 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
386 set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags);
387
388 gfs2_free_di(rgd, ip);
389
390 gfs2_trans_end(sdp);
391
392out_rg_gunlock:
393 gfs2_glock_dq_uninit(&al->al_rgd_gh);
394out_rindex_relse:
395 gfs2_glock_dq_uninit(&al->al_ri_gh);
396out_qs:
397 gfs2_quota_unhold(ip);
398out:
399 gfs2_alloc_put(ip);
400 return error;
401}
402
403/**
404 * gfs2_change_nlink - Change nlink count on inode
405 * @ip: The GFS2 inode
406 * @diff: The change in the nlink count required
407 *
408 * Returns: errno
409 */
410int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
411{
412 struct buffer_head *dibh;
413 u32 nlink;
414 int error;
415
416 BUG_ON(diff != 1 && diff != -1);
417 nlink = ip->i_inode.i_nlink + diff;
418
419 /* If we are reducing the nlink count, but the new value ends up being
420 bigger than the old one, we must have underflowed. */
421 if (diff < 0 && nlink > ip->i_inode.i_nlink) {
422 if (gfs2_consist_inode(ip))
423 gfs2_dinode_print(ip);
424 return -EIO;
425 }
426
427 error = gfs2_meta_inode_buffer(ip, &dibh);
428 if (error)
429 return error;
430
431 if (diff > 0)
432 inc_nlink(&ip->i_inode);
433 else
434 drop_nlink(&ip->i_inode);
435
436 ip->i_inode.i_ctime = CURRENT_TIME;
437
438 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
439 gfs2_dinode_out(ip, dibh->b_data);
440 brelse(dibh);
441 mark_inode_dirty(&ip->i_inode);
442
443 if (ip->i_inode.i_nlink == 0)
444 gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
445
446 return error;
447}
448 248
449struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) 249struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
450{ 250{
@@ -543,7 +343,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
543 343
544 /* Don't create entries in an unlinked directory */ 344 /* Don't create entries in an unlinked directory */
545 if (!dip->i_inode.i_nlink) 345 if (!dip->i_inode.i_nlink)
546 return -EPERM; 346 return -ENOENT;
547 347
548 error = gfs2_dir_check(&dip->i_inode, name, NULL); 348 error = gfs2_dir_check(&dip->i_inode, name, NULL);
549 switch (error) { 349 switch (error) {
@@ -613,21 +413,44 @@ out:
613 return error; 413 return error;
614} 414}
615 415
416static void gfs2_init_dir(struct buffer_head *dibh,
417 const struct gfs2_inode *parent)
418{
419 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
420 struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
421
422 gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
423 dent->de_inum = di->di_num; /* already GFS2 endian */
424 dent->de_type = cpu_to_be16(DT_DIR);
425
426 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
427 gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
428 gfs2_inum_out(parent, dent);
429 dent->de_type = cpu_to_be16(DT_DIR);
430
431}
432
616/** 433/**
617 * init_dinode - Fill in a new dinode structure 434 * init_dinode - Fill in a new dinode structure
618 * @dip: the directory this inode is being created in 435 * @dip: The directory this inode is being created in
619 * @gl: The glock covering the new inode 436 * @gl: The glock covering the new inode
620 * @inum: the inode number 437 * @inum: The inode number
621 * @mode: the file permissions 438 * @mode: The file permissions
622 * @uid: 439 * @uid: The uid of the new inode
623 * @gid: 440 * @gid: The gid of the new inode
441 * @generation: The generation number of the new inode
442 * @dev: The device number (if a device node)
443 * @symname: The symlink destination (if a symlink)
444 * @size: The inode size (ignored for directories)
445 * @bhp: The buffer head (returned to caller)
624 * 446 *
625 */ 447 */
626 448
627static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 449static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
628 const struct gfs2_inum_host *inum, unsigned int mode, 450 const struct gfs2_inum_host *inum, unsigned int mode,
629 unsigned int uid, unsigned int gid, 451 unsigned int uid, unsigned int gid,
630 const u64 *generation, dev_t dev, struct buffer_head **bhp) 452 const u64 *generation, dev_t dev, const char *symname,
453 unsigned size, struct buffer_head **bhp)
631{ 454{
632 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 455 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
633 struct gfs2_dinode *di; 456 struct gfs2_dinode *di;
@@ -646,7 +469,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
646 di->di_uid = cpu_to_be32(uid); 469 di->di_uid = cpu_to_be32(uid);
647 di->di_gid = cpu_to_be32(gid); 470 di->di_gid = cpu_to_be32(gid);
648 di->di_nlink = 0; 471 di->di_nlink = 0;
649 di->di_size = 0; 472 di->di_size = cpu_to_be64(size);
650 di->di_blocks = cpu_to_be64(1); 473 di->di_blocks = cpu_to_be64(1);
651 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); 474 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
652 di->di_major = cpu_to_be32(MAJOR(dev)); 475 di->di_major = cpu_to_be32(MAJOR(dev));
@@ -654,16 +477,6 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
654 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); 477 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
655 di->di_generation = cpu_to_be64(*generation); 478 di->di_generation = cpu_to_be64(*generation);
656 di->di_flags = 0; 479 di->di_flags = 0;
657
658 if (S_ISREG(mode)) {
659 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
660 gfs2_tune_get(sdp, gt_new_files_jdata))
661 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
662 } else if (S_ISDIR(mode)) {
663 di->di_flags |= cpu_to_be32(dip->i_diskflags &
664 GFS2_DIF_INHERIT_JDATA);
665 }
666
667 di->__pad1 = 0; 480 di->__pad1 = 0;
668 di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); 481 di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0);
669 di->di_height = 0; 482 di->di_height = 0;
@@ -677,7 +490,26 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
677 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); 490 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
678 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); 491 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
679 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 492 memset(&di->di_reserved, 0, sizeof(di->di_reserved));
680 493
494 switch(mode & S_IFMT) {
495 case S_IFREG:
496 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
497 gfs2_tune_get(sdp, gt_new_files_jdata))
498 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
499 break;
500 case S_IFDIR:
501 di->di_flags |= cpu_to_be32(dip->i_diskflags &
502 GFS2_DIF_INHERIT_JDATA);
503 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
504 di->di_size = cpu_to_be64(sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
505 di->di_entries = cpu_to_be32(2);
506 gfs2_init_dir(dibh, dip);
507 break;
508 case S_IFLNK:
509 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, size);
510 break;
511 }
512
681 set_buffer_uptodate(dibh); 513 set_buffer_uptodate(dibh);
682 514
683 *bhp = dibh; 515 *bhp = dibh;
@@ -685,7 +517,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
685 517
686static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 518static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
687 unsigned int mode, const struct gfs2_inum_host *inum, 519 unsigned int mode, const struct gfs2_inum_host *inum,
688 const u64 *generation, dev_t dev, struct buffer_head **bhp) 520 const u64 *generation, dev_t dev, const char *symname,
521 unsigned int size, struct buffer_head **bhp)
689{ 522{
690 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 523 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
691 unsigned int uid, gid; 524 unsigned int uid, gid;
@@ -707,7 +540,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
707 if (error) 540 if (error)
708 goto out_quota; 541 goto out_quota;
709 542
710 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp); 543 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, symname, size, bhp);
711 gfs2_quota_change(dip, +1, uid, gid); 544 gfs2_quota_change(dip, +1, uid, gid);
712 gfs2_trans_end(sdp); 545 gfs2_trans_end(sdp);
713 546
@@ -761,14 +594,16 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
761 goto fail_quota_locks; 594 goto fail_quota_locks;
762 } 595 }
763 596
764 error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode)); 597 error = gfs2_dir_add(&dip->i_inode, name, ip);
765 if (error) 598 if (error)
766 goto fail_end_trans; 599 goto fail_end_trans;
767 600
768 error = gfs2_meta_inode_buffer(ip, &dibh); 601 error = gfs2_meta_inode_buffer(ip, &dibh);
769 if (error) 602 if (error)
770 goto fail_end_trans; 603 goto fail_end_trans;
771 ip->i_inode.i_nlink = 1; 604 inc_nlink(&ip->i_inode);
605 if (S_ISDIR(ip->i_inode.i_mode))
606 inc_nlink(&ip->i_inode);
772 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 607 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
773 gfs2_dinode_out(ip, dibh->b_data); 608 gfs2_dinode_out(ip, dibh->b_data);
774 brelse(dibh); 609 brelse(dibh);
@@ -815,27 +650,25 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
815} 650}
816 651
817/** 652/**
818 * gfs2_createi - Create a new inode 653 * gfs2_create_inode - Create a new inode
819 * @ghs: An array of two holders 654 * @dir: The parent directory
820 * @name: The name of the new file 655 * @dentry: The new dentry
821 * @mode: the permissions on the new inode 656 * @mode: The permissions on the new inode
822 * 657 * @dev: For device nodes, this is the device number
823 * @ghs[0] is an initialized holder for the directory 658 * @symname: For symlinks, this is the link destination
824 * @ghs[1] is the holder for the inode lock 659 * @size: The initial size of the inode (ignored for directories)
825 * 660 *
826 * If the return value is not NULL, the glocks on both the directory and the new 661 * Returns: 0 on success, or error code
827 * file are held. A transaction has been started and an inplace reservation
828 * is held, as well.
829 *
830 * Returns: An inode
831 */ 662 */
832 663
833struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, 664static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
834 unsigned int mode, dev_t dev) 665 unsigned int mode, dev_t dev, const char *symname,
666 unsigned int size)
835{ 667{
668 const struct qstr *name = &dentry->d_name;
669 struct gfs2_holder ghs[2];
836 struct inode *inode = NULL; 670 struct inode *inode = NULL;
837 struct gfs2_inode *dip = ghs->gh_gl->gl_object; 671 struct gfs2_inode *dip = GFS2_I(dir);
838 struct inode *dir = &dip->i_inode;
839 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 672 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
840 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; 673 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
841 int error; 674 int error;
@@ -843,10 +676,9 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
843 struct buffer_head *bh = NULL; 676 struct buffer_head *bh = NULL;
844 677
845 if (!name->len || name->len > GFS2_FNAMESIZE) 678 if (!name->len || name->len > GFS2_FNAMESIZE)
846 return ERR_PTR(-ENAMETOOLONG); 679 return -ENAMETOOLONG;
847 680
848 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); 681 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
849 error = gfs2_glock_nq(ghs);
850 if (error) 682 if (error)
851 goto fail; 683 goto fail;
852 684
@@ -864,7 +696,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
864 if (error) 696 if (error)
865 goto fail_gunlock; 697 goto fail_gunlock;
866 698
867 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh); 699 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, symname, size, &bh);
868 if (error) 700 if (error)
869 goto fail_gunlock2; 701 goto fail_gunlock2;
870 702
@@ -891,18 +723,852 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
891 723
892 if (bh) 724 if (bh)
893 brelse(bh); 725 brelse(bh);
894 return inode; 726
727 gfs2_trans_end(sdp);
728 if (dip->i_alloc->al_rgd)
729 gfs2_inplace_release(dip);
730 gfs2_quota_unlock(dip);
731 gfs2_alloc_put(dip);
732 gfs2_glock_dq_uninit_m(2, ghs);
733 mark_inode_dirty(inode);
734 d_instantiate(dentry, inode);
735 return 0;
895 736
896fail_gunlock2: 737fail_gunlock2:
897 gfs2_glock_dq_uninit(ghs + 1); 738 gfs2_glock_dq_uninit(ghs + 1);
898 if (inode && !IS_ERR(inode)) 739 if (inode && !IS_ERR(inode))
899 iput(inode); 740 iput(inode);
900fail_gunlock: 741fail_gunlock:
901 gfs2_glock_dq(ghs); 742 gfs2_glock_dq_uninit(ghs);
902fail: 743fail:
903 if (bh) 744 if (bh)
904 brelse(bh); 745 brelse(bh);
905 return ERR_PTR(error); 746 return error;
747}
748
749/**
750 * gfs2_create - Create a file
751 * @dir: The directory in which to create the file
752 * @dentry: The dentry of the new file
753 * @mode: The mode of the new file
754 *
755 * Returns: errno
756 */
757
758static int gfs2_create(struct inode *dir, struct dentry *dentry,
759 int mode, struct nameidata *nd)
760{
761 struct inode *inode;
762 int ret;
763
764 for (;;) {
765 ret = gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0);
766 if (ret != -EEXIST || (nd && (nd->flags & LOOKUP_EXCL)))
767 return ret;
768
769 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
770 if (inode) {
771 if (!IS_ERR(inode))
772 break;
773 return PTR_ERR(inode);
774 }
775 }
776
777 d_instantiate(dentry, inode);
778 return 0;
779}
780
781/**
782 * gfs2_lookup - Look up a filename in a directory and return its inode
783 * @dir: The directory inode
784 * @dentry: The dentry of the new inode
785 * @nd: passed from Linux VFS, ignored by us
786 *
787 * Called by the VFS layer. Lock dir and call gfs2_lookupi()
788 *
789 * Returns: errno
790 */
791
792static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
793 struct nameidata *nd)
794{
795 struct inode *inode = NULL;
796
797 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
798 if (inode && IS_ERR(inode))
799 return ERR_CAST(inode);
800
801 if (inode) {
802 struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
803 struct gfs2_holder gh;
804 int error;
805 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
806 if (error) {
807 iput(inode);
808 return ERR_PTR(error);
809 }
810 gfs2_glock_dq_uninit(&gh);
811 return d_splice_alias(inode, dentry);
812 }
813 d_add(dentry, inode);
814
815 return NULL;
816}
817
818/**
819 * gfs2_link - Link to a file
820 * @old_dentry: The inode to link
821 * @dir: Add link to this directory
822 * @dentry: The name of the link
823 *
824 * Link the inode in "old_dentry" into the directory "dir" with the
825 * name in "dentry".
826 *
827 * Returns: errno
828 */
829
830static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
831 struct dentry *dentry)
832{
833 struct gfs2_inode *dip = GFS2_I(dir);
834 struct gfs2_sbd *sdp = GFS2_SB(dir);
835 struct inode *inode = old_dentry->d_inode;
836 struct gfs2_inode *ip = GFS2_I(inode);
837 struct gfs2_holder ghs[2];
838 struct buffer_head *dibh;
839 int alloc_required;
840 int error;
841
842 if (S_ISDIR(inode->i_mode))
843 return -EPERM;
844
845 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
846 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
847
848 error = gfs2_glock_nq(ghs); /* parent */
849 if (error)
850 goto out_parent;
851
852 error = gfs2_glock_nq(ghs + 1); /* child */
853 if (error)
854 goto out_child;
855
856 error = -ENOENT;
857 if (inode->i_nlink == 0)
858 goto out_gunlock;
859
860 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
861 if (error)
862 goto out_gunlock;
863
864 error = gfs2_dir_check(dir, &dentry->d_name, NULL);
865 switch (error) {
866 case -ENOENT:
867 break;
868 case 0:
869 error = -EEXIST;
870 default:
871 goto out_gunlock;
872 }
873
874 error = -EINVAL;
875 if (!dip->i_inode.i_nlink)
876 goto out_gunlock;
877 error = -EFBIG;
878 if (dip->i_entries == (u32)-1)
879 goto out_gunlock;
880 error = -EPERM;
881 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
882 goto out_gunlock;
883 error = -EINVAL;
884 if (!ip->i_inode.i_nlink)
885 goto out_gunlock;
886 error = -EMLINK;
887 if (ip->i_inode.i_nlink == (u32)-1)
888 goto out_gunlock;
889
890 alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
891 if (error < 0)
892 goto out_gunlock;
893 error = 0;
894
895 if (alloc_required) {
896 struct gfs2_alloc *al = gfs2_alloc_get(dip);
897 if (!al) {
898 error = -ENOMEM;
899 goto out_gunlock;
900 }
901
902 error = gfs2_quota_lock_check(dip);
903 if (error)
904 goto out_alloc;
905
906 al->al_requested = sdp->sd_max_dirres;
907
908 error = gfs2_inplace_reserve(dip);
909 if (error)
910 goto out_gunlock_q;
911
912 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
913 gfs2_rg_blocks(al) +
914 2 * RES_DINODE + RES_STATFS +
915 RES_QUOTA, 0);
916 if (error)
917 goto out_ipres;
918 } else {
919 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
920 if (error)
921 goto out_ipres;
922 }
923
924 error = gfs2_meta_inode_buffer(ip, &dibh);
925 if (error)
926 goto out_end_trans;
927
928 error = gfs2_dir_add(dir, &dentry->d_name, ip);
929 if (error)
930 goto out_brelse;
931
932 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
933 inc_nlink(&ip->i_inode);
934 ip->i_inode.i_ctime = CURRENT_TIME;
935 gfs2_dinode_out(ip, dibh->b_data);
936 mark_inode_dirty(&ip->i_inode);
937
938out_brelse:
939 brelse(dibh);
940out_end_trans:
941 gfs2_trans_end(sdp);
942out_ipres:
943 if (alloc_required)
944 gfs2_inplace_release(dip);
945out_gunlock_q:
946 if (alloc_required)
947 gfs2_quota_unlock(dip);
948out_alloc:
949 if (alloc_required)
950 gfs2_alloc_put(dip);
951out_gunlock:
952 gfs2_glock_dq(ghs + 1);
953out_child:
954 gfs2_glock_dq(ghs);
955out_parent:
956 gfs2_holder_uninit(ghs);
957 gfs2_holder_uninit(ghs + 1);
958 if (!error) {
959 ihold(inode);
960 d_instantiate(dentry, inode);
961 mark_inode_dirty(inode);
962 }
963 return error;
964}
965
966/*
967 * gfs2_unlink_ok - check to see that a inode is still in a directory
968 * @dip: the directory
969 * @name: the name of the file
970 * @ip: the inode
971 *
972 * Assumes that the lock on (at least) @dip is held.
973 *
974 * Returns: 0 if the parent/child relationship is correct, errno if it isn't
975 */
976
977static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
978 const struct gfs2_inode *ip)
979{
980 int error;
981
982 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
983 return -EPERM;
984
985 if ((dip->i_inode.i_mode & S_ISVTX) &&
986 dip->i_inode.i_uid != current_fsuid() &&
987 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
988 return -EPERM;
989
990 if (IS_APPEND(&dip->i_inode))
991 return -EPERM;
992
993 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
994 if (error)
995 return error;
996
997 error = gfs2_dir_check(&dip->i_inode, name, ip);
998 if (error)
999 return error;
1000
1001 return 0;
1002}
1003
1004/**
1005 * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it
1006 * @dip: The parent directory
1007 * @name: The name of the entry in the parent directory
1008 * @bh: The inode buffer for the inode to be removed
1009 * @inode: The inode to be removed
1010 *
1011 * Called with all the locks and in a transaction. This will only be
1012 * called for a directory after it has been checked to ensure it is empty.
1013 *
1014 * Returns: 0 on success, or an error
1015 */
1016
1017static int gfs2_unlink_inode(struct gfs2_inode *dip,
1018 const struct dentry *dentry,
1019 struct buffer_head *bh)
1020{
1021 struct inode *inode = dentry->d_inode;
1022 struct gfs2_inode *ip = GFS2_I(inode);
1023 int error;
1024
1025 error = gfs2_dir_del(dip, dentry);
1026 if (error)
1027 return error;
1028
1029 ip->i_entries = 0;
1030 inode->i_ctime = CURRENT_TIME;
1031 if (S_ISDIR(inode->i_mode))
1032 clear_nlink(inode);
1033 else
1034 drop_nlink(inode);
1035 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1036 gfs2_dinode_out(ip, bh->b_data);
1037 mark_inode_dirty(inode);
1038 if (inode->i_nlink == 0)
1039 gfs2_unlink_di(inode);
1040 return 0;
1041}
1042
1043
1044/**
1045 * gfs2_unlink - Unlink an inode (this does rmdir as well)
1046 * @dir: The inode of the directory containing the inode to unlink
1047 * @dentry: The file itself
1048 *
1049 * This routine uses the type of the inode as a flag to figure out
1050 * whether this is an unlink or an rmdir.
1051 *
1052 * Returns: errno
1053 */
1054
1055static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
1056{
1057 struct gfs2_inode *dip = GFS2_I(dir);
1058 struct gfs2_sbd *sdp = GFS2_SB(dir);
1059 struct inode *inode = dentry->d_inode;
1060 struct gfs2_inode *ip = GFS2_I(inode);
1061 struct buffer_head *bh;
1062 struct gfs2_holder ghs[3];
1063 struct gfs2_rgrpd *rgd;
1064 struct gfs2_holder ri_gh;
1065 int error;
1066
1067 error = gfs2_rindex_hold(sdp, &ri_gh);
1068 if (error)
1069 return error;
1070
1071 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1072 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
1073
1074 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
1075 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
1076
1077
1078 error = gfs2_glock_nq(ghs); /* parent */
1079 if (error)
1080 goto out_parent;
1081
1082 error = gfs2_glock_nq(ghs + 1); /* child */
1083 if (error)
1084 goto out_child;
1085
1086 error = -ENOENT;
1087 if (inode->i_nlink == 0)
1088 goto out_rgrp;
1089
1090 if (S_ISDIR(inode->i_mode)) {
1091 error = -ENOTEMPTY;
1092 if (ip->i_entries > 2 || inode->i_nlink > 2)
1093 goto out_rgrp;
1094 }
1095
1096 error = gfs2_glock_nq(ghs + 2); /* rgrp */
1097 if (error)
1098 goto out_rgrp;
1099
1100 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
1101 if (error)
1102 goto out_gunlock;
1103
1104 error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0);
1105 if (error)
1106 goto out_gunlock;
1107
1108 error = gfs2_meta_inode_buffer(ip, &bh);
1109 if (error)
1110 goto out_end_trans;
1111
1112 error = gfs2_unlink_inode(dip, dentry, bh);
1113 brelse(bh);
1114
1115out_end_trans:
1116 gfs2_trans_end(sdp);
1117out_gunlock:
1118 gfs2_glock_dq(ghs + 2);
1119out_rgrp:
1120 gfs2_holder_uninit(ghs + 2);
1121 gfs2_glock_dq(ghs + 1);
1122out_child:
1123 gfs2_holder_uninit(ghs + 1);
1124 gfs2_glock_dq(ghs);
1125out_parent:
1126 gfs2_holder_uninit(ghs);
1127 gfs2_glock_dq_uninit(&ri_gh);
1128 return error;
1129}
1130
1131/**
1132 * gfs2_symlink - Create a symlink
1133 * @dir: The directory to create the symlink in
1134 * @dentry: The dentry to put the symlink in
1135 * @symname: The thing which the link points to
1136 *
1137 * Returns: errno
1138 */
1139
1140static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
1141 const char *symname)
1142{
1143 struct gfs2_sbd *sdp = GFS2_SB(dir);
1144 unsigned int size;
1145
1146 size = strlen(symname);
1147 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
1148 return -ENAMETOOLONG;
1149
1150 return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size);
1151}
1152
1153/**
1154 * gfs2_mkdir - Make a directory
1155 * @dir: The parent directory of the new one
1156 * @dentry: The dentry of the new directory
1157 * @mode: The mode of the new directory
1158 *
1159 * Returns: errno
1160 */
1161
1162static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1163{
1164 return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0);
1165}
1166
1167/**
1168 * gfs2_mknod - Make a special file
1169 * @dir: The directory in which the special file will reside
1170 * @dentry: The dentry of the special file
1171 * @mode: The mode of the special file
1172 * @dev: The device specification of the special file
1173 *
1174 */
1175
1176static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
1177 dev_t dev)
1178{
1179 return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0);
1180}
1181
1182/*
1183 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
1184 * @this: move this
1185 * @to: to here
1186 *
1187 * Follow @to back to the root and make sure we don't encounter @this
1188 * Assumes we already hold the rename lock.
1189 *
1190 * Returns: errno
1191 */
1192
1193static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
1194{
1195 struct inode *dir = &to->i_inode;
1196 struct super_block *sb = dir->i_sb;
1197 struct inode *tmp;
1198 int error = 0;
1199
1200 igrab(dir);
1201
1202 for (;;) {
1203 if (dir == &this->i_inode) {
1204 error = -EINVAL;
1205 break;
1206 }
1207 if (dir == sb->s_root->d_inode) {
1208 error = 0;
1209 break;
1210 }
1211
1212 tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
1213 if (IS_ERR(tmp)) {
1214 error = PTR_ERR(tmp);
1215 break;
1216 }
1217
1218 iput(dir);
1219 dir = tmp;
1220 }
1221
1222 iput(dir);
1223
1224 return error;
1225}
1226
1227/**
1228 * gfs2_rename - Rename a file
1229 * @odir: Parent directory of old file name
1230 * @odentry: The old dentry of the file
1231 * @ndir: Parent directory of new file name
1232 * @ndentry: The new dentry of the file
1233 *
1234 * Returns: errno
1235 */
1236
1237static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1238 struct inode *ndir, struct dentry *ndentry)
1239{
1240 struct gfs2_inode *odip = GFS2_I(odir);
1241 struct gfs2_inode *ndip = GFS2_I(ndir);
1242 struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
1243 struct gfs2_inode *nip = NULL;
1244 struct gfs2_sbd *sdp = GFS2_SB(odir);
1245 struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
1246 struct gfs2_rgrpd *nrgd;
1247 unsigned int num_gh;
1248 int dir_rename = 0;
1249 int alloc_required = 0;
1250 unsigned int x;
1251 int error;
1252
1253 if (ndentry->d_inode) {
1254 nip = GFS2_I(ndentry->d_inode);
1255 if (ip == nip)
1256 return 0;
1257 }
1258
1259 error = gfs2_rindex_hold(sdp, &ri_gh);
1260 if (error)
1261 return error;
1262
1263 if (odip != ndip) {
1264 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
1265 0, &r_gh);
1266 if (error)
1267 goto out;
1268
1269 if (S_ISDIR(ip->i_inode.i_mode)) {
1270 dir_rename = 1;
1271 /* don't move a dirctory into it's subdir */
1272 error = gfs2_ok_to_move(ip, ndip);
1273 if (error)
1274 goto out_gunlock_r;
1275 }
1276 }
1277
1278 num_gh = 1;
1279 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1280 if (odip != ndip) {
1281 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1282 num_gh++;
1283 }
1284 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1285 num_gh++;
1286
1287 if (nip) {
1288 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1289 num_gh++;
1290 /* grab the resource lock for unlink flag twiddling
1291 * this is the case of the target file already existing
1292 * so we unlink before doing the rename
1293 */
1294 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
1295 if (nrgd)
1296 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
1297 }
1298
1299 for (x = 0; x < num_gh; x++) {
1300 error = gfs2_glock_nq(ghs + x);
1301 if (error)
1302 goto out_gunlock;
1303 }
1304
1305 error = -ENOENT;
1306 if (ip->i_inode.i_nlink == 0)
1307 goto out_gunlock;
1308
1309 /* Check out the old directory */
1310
1311 error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
1312 if (error)
1313 goto out_gunlock;
1314
1315 /* Check out the new directory */
1316
1317 if (nip) {
1318 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
1319 if (error)
1320 goto out_gunlock;
1321
1322 if (nip->i_inode.i_nlink == 0) {
1323 error = -EAGAIN;
1324 goto out_gunlock;
1325 }
1326
1327 if (S_ISDIR(nip->i_inode.i_mode)) {
1328 if (nip->i_entries < 2) {
1329 gfs2_consist_inode(nip);
1330 error = -EIO;
1331 goto out_gunlock;
1332 }
1333 if (nip->i_entries > 2) {
1334 error = -ENOTEMPTY;
1335 goto out_gunlock;
1336 }
1337 }
1338 } else {
1339 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
1340 if (error)
1341 goto out_gunlock;
1342
1343 error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
1344 switch (error) {
1345 case -ENOENT:
1346 error = 0;
1347 break;
1348 case 0:
1349 error = -EEXIST;
1350 default:
1351 goto out_gunlock;
1352 };
1353
1354 if (odip != ndip) {
1355 if (!ndip->i_inode.i_nlink) {
1356 error = -ENOENT;
1357 goto out_gunlock;
1358 }
1359 if (ndip->i_entries == (u32)-1) {
1360 error = -EFBIG;
1361 goto out_gunlock;
1362 }
1363 if (S_ISDIR(ip->i_inode.i_mode) &&
1364 ndip->i_inode.i_nlink == (u32)-1) {
1365 error = -EMLINK;
1366 goto out_gunlock;
1367 }
1368 }
1369 }
1370
1371 /* Check out the dir to be renamed */
1372
1373 if (dir_rename) {
1374 error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
1375 if (error)
1376 goto out_gunlock;
1377 }
1378
1379 if (nip == NULL)
1380 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
1381 error = alloc_required;
1382 if (error < 0)
1383 goto out_gunlock;
1384 error = 0;
1385
1386 if (alloc_required) {
1387 struct gfs2_alloc *al = gfs2_alloc_get(ndip);
1388 if (!al) {
1389 error = -ENOMEM;
1390 goto out_gunlock;
1391 }
1392
1393 error = gfs2_quota_lock_check(ndip);
1394 if (error)
1395 goto out_alloc;
1396
1397 al->al_requested = sdp->sd_max_dirres;
1398
1399 error = gfs2_inplace_reserve_ri(ndip);
1400 if (error)
1401 goto out_gunlock_q;
1402
1403 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
1404 gfs2_rg_blocks(al) +
1405 4 * RES_DINODE + 4 * RES_LEAF +
1406 RES_STATFS + RES_QUOTA + 4, 0);
1407 if (error)
1408 goto out_ipreserv;
1409 } else {
1410 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
1411 5 * RES_LEAF + 4, 0);
1412 if (error)
1413 goto out_gunlock;
1414 }
1415
1416 /* Remove the target file, if it exists */
1417
1418 if (nip) {
1419 struct buffer_head *bh;
1420 error = gfs2_meta_inode_buffer(nip, &bh);
1421 if (error)
1422 goto out_end_trans;
1423 error = gfs2_unlink_inode(ndip, ndentry, bh);
1424 brelse(bh);
1425 }
1426
1427 if (dir_rename) {
1428 error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
1429 if (error)
1430 goto out_end_trans;
1431 } else {
1432 struct buffer_head *dibh;
1433 error = gfs2_meta_inode_buffer(ip, &dibh);
1434 if (error)
1435 goto out_end_trans;
1436 ip->i_inode.i_ctime = CURRENT_TIME;
1437 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1438 gfs2_dinode_out(ip, dibh->b_data);
1439 brelse(dibh);
1440 }
1441
1442 error = gfs2_dir_del(odip, odentry);
1443 if (error)
1444 goto out_end_trans;
1445
1446 error = gfs2_dir_add(ndir, &ndentry->d_name, ip);
1447 if (error)
1448 goto out_end_trans;
1449
1450out_end_trans:
1451 gfs2_trans_end(sdp);
1452out_ipreserv:
1453 if (alloc_required)
1454 gfs2_inplace_release(ndip);
1455out_gunlock_q:
1456 if (alloc_required)
1457 gfs2_quota_unlock(ndip);
1458out_alloc:
1459 if (alloc_required)
1460 gfs2_alloc_put(ndip);
1461out_gunlock:
1462 while (x--) {
1463 gfs2_glock_dq(ghs + x);
1464 gfs2_holder_uninit(ghs + x);
1465 }
1466out_gunlock_r:
1467 if (r_gh.gh_gl)
1468 gfs2_glock_dq_uninit(&r_gh);
1469out:
1470 gfs2_glock_dq_uninit(&ri_gh);
1471 return error;
1472}
1473
1474/**
1475 * gfs2_follow_link - Follow a symbolic link
1476 * @dentry: The dentry of the link
1477 * @nd: Data that we pass to vfs_follow_link()
1478 *
1479 * This can handle symlinks of any size.
1480 *
1481 * Returns: 0 on success or error code
1482 */
1483
1484static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
1485{
1486 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
1487 struct gfs2_holder i_gh;
1488 struct buffer_head *dibh;
1489 unsigned int size;
1490 char *buf;
1491 int error;
1492
1493 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
1494 error = gfs2_glock_nq(&i_gh);
1495 if (error) {
1496 gfs2_holder_uninit(&i_gh);
1497 nd_set_link(nd, ERR_PTR(error));
1498 return NULL;
1499 }
1500
1501 size = (unsigned int)i_size_read(&ip->i_inode);
1502 if (size == 0) {
1503 gfs2_consist_inode(ip);
1504 buf = ERR_PTR(-EIO);
1505 goto out;
1506 }
1507
1508 error = gfs2_meta_inode_buffer(ip, &dibh);
1509 if (error) {
1510 buf = ERR_PTR(error);
1511 goto out;
1512 }
1513
1514 buf = kzalloc(size + 1, GFP_NOFS);
1515 if (!buf)
1516 buf = ERR_PTR(-ENOMEM);
1517 else
1518 memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size);
1519 brelse(dibh);
1520out:
1521 gfs2_glock_dq_uninit(&i_gh);
1522 nd_set_link(nd, buf);
1523 return NULL;
1524}
1525
1526static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1527{
1528 char *s = nd_get_link(nd);
1529 if (!IS_ERR(s))
1530 kfree(s);
1531}
1532
1533/**
1534 * gfs2_permission -
1535 * @inode: The inode
1536 * @mask: The mask to be tested
1537 * @flags: Indicates whether this is an RCU path walk or not
1538 *
1539 * This may be called from the VFS directly, or from within GFS2 with the
1540 * inode locked, so we look to see if the glock is already locked and only
1541 * lock the glock if its not already been done.
1542 *
1543 * Returns: errno
1544 */
1545
1546int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1547{
1548 struct gfs2_inode *ip;
1549 struct gfs2_holder i_gh;
1550 int error;
1551 int unlock = 0;
1552
1553
1554 ip = GFS2_I(inode);
1555 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1556 if (flags & IPERM_FLAG_RCU)
1557 return -ECHILD;
1558 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1559 if (error)
1560 return error;
1561 unlock = 1;
1562 }
1563
1564 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
1565 error = -EACCES;
1566 else
1567 error = generic_permission(inode, mask, flags, gfs2_check_acl);
1568 if (unlock)
1569 gfs2_glock_dq_uninit(&i_gh);
1570
1571 return error;
906} 1572}
907 1573
908static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 1574static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
@@ -928,8 +1594,6 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
928 * @ip: 1594 * @ip:
929 * @attr: 1595 * @attr:
930 * 1596 *
931 * Called with a reference on the vnode.
932 *
933 * Returns: errno 1597 * Returns: errno
934 */ 1598 */
935 1599
@@ -949,60 +1613,280 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
949 return error; 1613 return error;
950} 1614}
951 1615
952void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) 1616static int setattr_chown(struct inode *inode, struct iattr *attr)
953{ 1617{
954 struct gfs2_dinode *str = buf; 1618 struct gfs2_inode *ip = GFS2_I(inode);
955 1619 struct gfs2_sbd *sdp = GFS2_SB(inode);
956 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 1620 u32 ouid, ogid, nuid, ngid;
957 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); 1621 int error;
958 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); 1622
959 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 1623 ouid = inode->i_uid;
960 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 1624 ogid = inode->i_gid;
961 str->di_mode = cpu_to_be32(ip->i_inode.i_mode); 1625 nuid = attr->ia_uid;
962 str->di_uid = cpu_to_be32(ip->i_inode.i_uid); 1626 ngid = attr->ia_gid;
963 str->di_gid = cpu_to_be32(ip->i_inode.i_gid); 1627
964 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); 1628 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
965 str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); 1629 ouid = nuid = NO_QUOTA_CHANGE;
966 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); 1630 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
967 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); 1631 ogid = ngid = NO_QUOTA_CHANGE;
968 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); 1632
969 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); 1633 if (!gfs2_alloc_get(ip))
970 1634 return -ENOMEM;
971 str->di_goal_meta = cpu_to_be64(ip->i_goal); 1635
972 str->di_goal_data = cpu_to_be64(ip->i_goal); 1636 error = gfs2_quota_lock(ip, nuid, ngid);
973 str->di_generation = cpu_to_be64(ip->i_generation); 1637 if (error)
974 1638 goto out_alloc;
975 str->di_flags = cpu_to_be32(ip->i_diskflags); 1639
976 str->di_height = cpu_to_be16(ip->i_height); 1640 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
977 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && 1641 error = gfs2_quota_check(ip, nuid, ngid);
978 !(ip->i_diskflags & GFS2_DIF_EXHASH) ? 1642 if (error)
979 GFS2_FORMAT_DE : 0); 1643 goto out_gunlock_q;
980 str->di_depth = cpu_to_be16(ip->i_depth); 1644 }
981 str->di_entries = cpu_to_be32(ip->i_entries); 1645
982 1646 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
983 str->di_eattr = cpu_to_be64(ip->i_eattr); 1647 if (error)
984 str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); 1648 goto out_gunlock_q;
985 str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); 1649
986 str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); 1650 error = gfs2_setattr_simple(ip, attr);
987} 1651 if (error)
988 1652 goto out_end_trans;
989void gfs2_dinode_print(const struct gfs2_inode *ip) 1653
990{ 1654 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
991 printk(KERN_INFO " no_formal_ino = %llu\n", 1655 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
992 (unsigned long long)ip->i_no_formal_ino); 1656 gfs2_quota_change(ip, -blocks, ouid, ogid);
993 printk(KERN_INFO " no_addr = %llu\n", 1657 gfs2_quota_change(ip, blocks, nuid, ngid);
994 (unsigned long long)ip->i_no_addr); 1658 }
995 printk(KERN_INFO " i_size = %llu\n", 1659
996 (unsigned long long)i_size_read(&ip->i_inode)); 1660out_end_trans:
997 printk(KERN_INFO " blocks = %llu\n", 1661 gfs2_trans_end(sdp);
998 (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); 1662out_gunlock_q:
999 printk(KERN_INFO " i_goal = %llu\n", 1663 gfs2_quota_unlock(ip);
1000 (unsigned long long)ip->i_goal); 1664out_alloc:
1001 printk(KERN_INFO " i_diskflags = 0x%.8X\n", ip->i_diskflags); 1665 gfs2_alloc_put(ip);
1002 printk(KERN_INFO " i_height = %u\n", ip->i_height); 1666 return error;
1003 printk(KERN_INFO " i_depth = %u\n", ip->i_depth); 1667}
1004 printk(KERN_INFO " i_entries = %u\n", ip->i_entries); 1668
1005 printk(KERN_INFO " i_eattr = %llu\n", 1669/**
1006 (unsigned long long)ip->i_eattr); 1670 * gfs2_setattr - Change attributes on an inode
1671 * @dentry: The dentry which is changing
1672 * @attr: The structure describing the change
1673 *
1674 * The VFS layer wants to change one or more of an inodes attributes. Write
1675 * that change out to disk.
1676 *
1677 * Returns: errno
1678 */
1679
1680static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
1681{
1682 struct inode *inode = dentry->d_inode;
1683 struct gfs2_inode *ip = GFS2_I(inode);
1684 struct gfs2_holder i_gh;
1685 int error;
1686
1687 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1688 if (error)
1689 return error;
1690
1691 error = -EPERM;
1692 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1693 goto out;
1694
1695 error = inode_change_ok(inode, attr);
1696 if (error)
1697 goto out;
1698
1699 if (attr->ia_valid & ATTR_SIZE)
1700 error = gfs2_setattr_size(inode, attr->ia_size);
1701 else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
1702 error = setattr_chown(inode, attr);
1703 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
1704 error = gfs2_acl_chmod(ip, attr);
1705 else
1706 error = gfs2_setattr_simple(ip, attr);
1707
1708out:
1709 gfs2_glock_dq_uninit(&i_gh);
1710 if (!error)
1711 mark_inode_dirty(inode);
1712 return error;
1713}
1714
1715/**
1716 * gfs2_getattr - Read out an inode's attributes
1717 * @mnt: The vfsmount the inode is being accessed from
1718 * @dentry: The dentry to stat
1719 * @stat: The inode's stats
1720 *
1721 * This may be called from the VFS directly, or from within GFS2 with the
1722 * inode locked, so we look to see if the glock is already locked and only
1723 * lock the glock if its not already been done. Note that its the NFS
1724 * readdirplus operation which causes this to be called (from filldir)
1725 * with the glock already held.
1726 *
1727 * Returns: errno
1728 */
1729
1730static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1731 struct kstat *stat)
1732{
1733 struct inode *inode = dentry->d_inode;
1734 struct gfs2_inode *ip = GFS2_I(inode);
1735 struct gfs2_holder gh;
1736 int error;
1737 int unlock = 0;
1738
1739 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1740 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1741 if (error)
1742 return error;
1743 unlock = 1;
1744 }
1745
1746 generic_fillattr(inode, stat);
1747 if (unlock)
1748 gfs2_glock_dq_uninit(&gh);
1749
1750 return 0;
1751}
1752
1753static int gfs2_setxattr(struct dentry *dentry, const char *name,
1754 const void *data, size_t size, int flags)
1755{
1756 struct inode *inode = dentry->d_inode;
1757 struct gfs2_inode *ip = GFS2_I(inode);
1758 struct gfs2_holder gh;
1759 int ret;
1760
1761 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1762 ret = gfs2_glock_nq(&gh);
1763 if (ret == 0) {
1764 ret = generic_setxattr(dentry, name, data, size, flags);
1765 gfs2_glock_dq(&gh);
1766 }
1767 gfs2_holder_uninit(&gh);
1768 return ret;
1769}
1770
1771static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1772 void *data, size_t size)
1773{
1774 struct inode *inode = dentry->d_inode;
1775 struct gfs2_inode *ip = GFS2_I(inode);
1776 struct gfs2_holder gh;
1777 int ret;
1778
1779 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1780 ret = gfs2_glock_nq(&gh);
1781 if (ret == 0) {
1782 ret = generic_getxattr(dentry, name, data, size);
1783 gfs2_glock_dq(&gh);
1784 }
1785 gfs2_holder_uninit(&gh);
1786 return ret;
1787}
1788
1789static int gfs2_removexattr(struct dentry *dentry, const char *name)
1790{
1791 struct inode *inode = dentry->d_inode;
1792 struct gfs2_inode *ip = GFS2_I(inode);
1793 struct gfs2_holder gh;
1794 int ret;
1795
1796 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1797 ret = gfs2_glock_nq(&gh);
1798 if (ret == 0) {
1799 ret = generic_removexattr(dentry, name);
1800 gfs2_glock_dq(&gh);
1801 }
1802 gfs2_holder_uninit(&gh);
1803 return ret;
1804}
1805
1806static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1807 u64 start, u64 len)
1808{
1809 struct gfs2_inode *ip = GFS2_I(inode);
1810 struct gfs2_holder gh;
1811 int ret;
1812
1813 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
1814 if (ret)
1815 return ret;
1816
1817 mutex_lock(&inode->i_mutex);
1818
1819 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
1820 if (ret)
1821 goto out;
1822
1823 if (gfs2_is_stuffed(ip)) {
1824 u64 phys = ip->i_no_addr << inode->i_blkbits;
1825 u64 size = i_size_read(inode);
1826 u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
1827 FIEMAP_EXTENT_DATA_INLINE;
1828 phys += sizeof(struct gfs2_dinode);
1829 phys += start;
1830 if (start + len > size)
1831 len = size - start;
1832 if (start < size)
1833 ret = fiemap_fill_next_extent(fieinfo, start, phys,
1834 len, flags);
1835 if (ret == 1)
1836 ret = 0;
1837 } else {
1838 ret = __generic_block_fiemap(inode, fieinfo, start, len,
1839 gfs2_block_map);
1840 }
1841
1842 gfs2_glock_dq_uninit(&gh);
1843out:
1844 mutex_unlock(&inode->i_mutex);
1845 return ret;
1007} 1846}
1008 1847
1848const struct inode_operations gfs2_file_iops = {
1849 .permission = gfs2_permission,
1850 .setattr = gfs2_setattr,
1851 .getattr = gfs2_getattr,
1852 .setxattr = gfs2_setxattr,
1853 .getxattr = gfs2_getxattr,
1854 .listxattr = gfs2_listxattr,
1855 .removexattr = gfs2_removexattr,
1856 .fiemap = gfs2_fiemap,
1857};
1858
1859const struct inode_operations gfs2_dir_iops = {
1860 .create = gfs2_create,
1861 .lookup = gfs2_lookup,
1862 .link = gfs2_link,
1863 .unlink = gfs2_unlink,
1864 .symlink = gfs2_symlink,
1865 .mkdir = gfs2_mkdir,
1866 .rmdir = gfs2_unlink,
1867 .mknod = gfs2_mknod,
1868 .rename = gfs2_rename,
1869 .permission = gfs2_permission,
1870 .setattr = gfs2_setattr,
1871 .getattr = gfs2_getattr,
1872 .setxattr = gfs2_setxattr,
1873 .getxattr = gfs2_getxattr,
1874 .listxattr = gfs2_listxattr,
1875 .removexattr = gfs2_removexattr,
1876 .fiemap = gfs2_fiemap,
1877};
1878
1879const struct inode_operations gfs2_symlink_iops = {
1880 .readlink = generic_readlink,
1881 .follow_link = gfs2_follow_link,
1882 .put_link = gfs2_put_link,
1883 .permission = gfs2_permission,
1884 .setattr = gfs2_setattr,
1885 .getattr = gfs2_getattr,
1886 .setxattr = gfs2_setxattr,
1887 .getxattr = gfs2_getxattr,
1888 .listxattr = gfs2_listxattr,
1889 .removexattr = gfs2_removexattr,
1890 .fiemap = gfs2_fiemap,
1891};
1892
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 099ca305e518..31606076f701 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -102,22 +102,16 @@ extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
102extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, 102extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
103 u64 *no_formal_ino, 103 u64 *no_formal_ino,
104 unsigned int blktype); 104 unsigned int blktype);
105extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); 105extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int nonblock);
106 106
107extern int gfs2_inode_refresh(struct gfs2_inode *ip); 107extern int gfs2_inode_refresh(struct gfs2_inode *ip);
108 108
109extern int gfs2_dinode_dealloc(struct gfs2_inode *inode);
110extern int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
111extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, 109extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
112 int is_root); 110 int is_root);
113extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
114 const struct qstr *name,
115 unsigned int mode, dev_t dev);
116extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags); 111extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags);
117extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); 112extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
118extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); 113extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
119extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); 114extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
120extern void gfs2_dinode_print(const struct gfs2_inode *ip);
121 115
122extern const struct inode_operations gfs2_file_iops; 116extern const struct inode_operations gfs2_file_iops;
123extern const struct inode_operations gfs2_dir_iops; 117extern const struct inode_operations gfs2_dir_iops;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 5b102c1887fd..903115f2bb34 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -18,6 +18,7 @@
18#include <linux/kthread.h> 18#include <linux/kthread.h>
19#include <linux/freezer.h> 19#include <linux/freezer.h>
20#include <linux/bio.h> 20#include <linux/bio.h>
21#include <linux/writeback.h>
21 22
22#include "gfs2.h" 23#include "gfs2.h"
23#include "incore.h" 24#include "incore.h"
@@ -83,55 +84,97 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
83/** 84/**
84 * gfs2_ail1_start_one - Start I/O on a part of the AIL 85 * gfs2_ail1_start_one - Start I/O on a part of the AIL
85 * @sdp: the filesystem 86 * @sdp: the filesystem
86 * @tr: the part of the AIL 87 * @wbc: The writeback control structure
88 * @ai: The ail structure
87 * 89 *
88 */ 90 */
89 91
90static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 92static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
93 struct writeback_control *wbc,
94 struct gfs2_ail *ai)
91__releases(&sdp->sd_ail_lock) 95__releases(&sdp->sd_ail_lock)
92__acquires(&sdp->sd_ail_lock) 96__acquires(&sdp->sd_ail_lock)
93{ 97{
98 struct gfs2_glock *gl = NULL;
99 struct address_space *mapping;
94 struct gfs2_bufdata *bd, *s; 100 struct gfs2_bufdata *bd, *s;
95 struct buffer_head *bh; 101 struct buffer_head *bh;
96 int retry;
97 102
98 do { 103 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, bd_ail_st_list) {
99 retry = 0; 104 bh = bd->bd_bh;
100 105
101 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, 106 gfs2_assert(sdp, bd->bd_ail == ai);
102 bd_ail_st_list) {
103 bh = bd->bd_bh;
104 107
105 gfs2_assert(sdp, bd->bd_ail == ai); 108 if (!buffer_busy(bh)) {
109 if (!buffer_uptodate(bh))
110 gfs2_io_error_bh(sdp, bh);
111 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
112 continue;
113 }
106 114
107 if (!buffer_busy(bh)) { 115 if (!buffer_dirty(bh))
108 if (!buffer_uptodate(bh)) 116 continue;
109 gfs2_io_error_bh(sdp, bh); 117 if (gl == bd->bd_gl)
110 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); 118 continue;
111 continue; 119 gl = bd->bd_gl;
112 } 120 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
121 mapping = bh->b_page->mapping;
122 if (!mapping)
123 continue;
124 spin_unlock(&sdp->sd_ail_lock);
125 generic_writepages(mapping, wbc);
126 spin_lock(&sdp->sd_ail_lock);
127 if (wbc->nr_to_write <= 0)
128 break;
129 return 1;
130 }
113 131
114 if (!buffer_dirty(bh)) 132 return 0;
115 continue; 133}
116 134
117 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
118 135
119 get_bh(bh); 136/**
120 spin_unlock(&sdp->sd_ail_lock); 137 * gfs2_ail1_flush - start writeback of some ail1 entries
121 lock_buffer(bh); 138 * @sdp: The super block
122 if (test_clear_buffer_dirty(bh)) { 139 * @wbc: The writeback control structure
123 bh->b_end_io = end_buffer_write_sync; 140 *
124 submit_bh(WRITE_SYNC, bh); 141 * Writes back some ail1 entries, according to the limits in the
125 } else { 142 * writeback control structure
126 unlock_buffer(bh); 143 */
127 brelse(bh); 144
128 } 145void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
129 spin_lock(&sdp->sd_ail_lock); 146{
130 147 struct list_head *head = &sdp->sd_ail1_list;
131 retry = 1; 148 struct gfs2_ail *ai;
149
150 trace_gfs2_ail_flush(sdp, wbc, 1);
151 spin_lock(&sdp->sd_ail_lock);
152restart:
153 list_for_each_entry_reverse(ai, head, ai_list) {
154 if (wbc->nr_to_write <= 0)
132 break; 155 break;
133 } 156 if (gfs2_ail1_start_one(sdp, wbc, ai))
134 } while (retry); 157 goto restart;
158 }
159 spin_unlock(&sdp->sd_ail_lock);
160 trace_gfs2_ail_flush(sdp, wbc, 0);
161}
162
163/**
164 * gfs2_ail1_start - start writeback of all ail1 entries
165 * @sdp: The superblock
166 */
167
168static void gfs2_ail1_start(struct gfs2_sbd *sdp)
169{
170 struct writeback_control wbc = {
171 .sync_mode = WB_SYNC_NONE,
172 .nr_to_write = LONG_MAX,
173 .range_start = 0,
174 .range_end = LLONG_MAX,
175 };
176
177 return gfs2_ail1_flush(sdp, &wbc);
135} 178}
136 179
137/** 180/**
@@ -141,7 +184,7 @@ __acquires(&sdp->sd_ail_lock)
141 * 184 *
142 */ 185 */
143 186
144static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags) 187static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
145{ 188{
146 struct gfs2_bufdata *bd, *s; 189 struct gfs2_bufdata *bd, *s;
147 struct buffer_head *bh; 190 struct buffer_head *bh;
@@ -149,76 +192,63 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
149 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, 192 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
150 bd_ail_st_list) { 193 bd_ail_st_list) {
151 bh = bd->bd_bh; 194 bh = bd->bd_bh;
152
153 gfs2_assert(sdp, bd->bd_ail == ai); 195 gfs2_assert(sdp, bd->bd_ail == ai);
154 196 if (buffer_busy(bh))
155 if (buffer_busy(bh)) { 197 continue;
156 if (flags & DIO_ALL)
157 continue;
158 else
159 break;
160 }
161
162 if (!buffer_uptodate(bh)) 198 if (!buffer_uptodate(bh))
163 gfs2_io_error_bh(sdp, bh); 199 gfs2_io_error_bh(sdp, bh);
164
165 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); 200 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
166 } 201 }
167 202
168 return list_empty(&ai->ai_ail1_list);
169} 203}
170 204
171static void gfs2_ail1_start(struct gfs2_sbd *sdp) 205/**
172{ 206 * gfs2_ail1_empty - Try to empty the ail1 lists
173 struct list_head *head; 207 * @sdp: The superblock
174 u64 sync_gen; 208 *
175 struct gfs2_ail *ai; 209 * Tries to empty the ail1 lists, starting with the oldest first
176 int done = 0; 210 */
177
178 spin_lock(&sdp->sd_ail_lock);
179 head = &sdp->sd_ail1_list;
180 if (list_empty(head)) {
181 spin_unlock(&sdp->sd_ail_lock);
182 return;
183 }
184 sync_gen = sdp->sd_ail_sync_gen++;
185
186 while(!done) {
187 done = 1;
188 list_for_each_entry_reverse(ai, head, ai_list) {
189 if (ai->ai_sync_gen >= sync_gen)
190 continue;
191 ai->ai_sync_gen = sync_gen;
192 gfs2_ail1_start_one(sdp, ai); /* This may drop ail lock */
193 done = 0;
194 break;
195 }
196 }
197
198 spin_unlock(&sdp->sd_ail_lock);
199}
200 211
201static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) 212static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
202{ 213{
203 struct gfs2_ail *ai, *s; 214 struct gfs2_ail *ai, *s;
204 int ret; 215 int ret;
205 216
206 spin_lock(&sdp->sd_ail_lock); 217 spin_lock(&sdp->sd_ail_lock);
207
208 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) { 218 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
209 if (gfs2_ail1_empty_one(sdp, ai, flags)) 219 gfs2_ail1_empty_one(sdp, ai);
220 if (list_empty(&ai->ai_ail1_list))
210 list_move(&ai->ai_list, &sdp->sd_ail2_list); 221 list_move(&ai->ai_list, &sdp->sd_ail2_list);
211 else if (!(flags & DIO_ALL)) 222 else
212 break; 223 break;
213 } 224 }
214
215 ret = list_empty(&sdp->sd_ail1_list); 225 ret = list_empty(&sdp->sd_ail1_list);
216
217 spin_unlock(&sdp->sd_ail_lock); 226 spin_unlock(&sdp->sd_ail_lock);
218 227
219 return ret; 228 return ret;
220} 229}
221 230
231static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
232{
233 struct gfs2_ail *ai;
234 struct gfs2_bufdata *bd;
235 struct buffer_head *bh;
236
237 spin_lock(&sdp->sd_ail_lock);
238 list_for_each_entry_reverse(ai, &sdp->sd_ail1_list, ai_list) {
239 list_for_each_entry(bd, &ai->ai_ail1_list, bd_ail_st_list) {
240 bh = bd->bd_bh;
241 if (!buffer_locked(bh))
242 continue;
243 get_bh(bh);
244 spin_unlock(&sdp->sd_ail_lock);
245 wait_on_buffer(bh);
246 brelse(bh);
247 return;
248 }
249 }
250 spin_unlock(&sdp->sd_ail_lock);
251}
222 252
223/** 253/**
224 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced 254 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
@@ -574,7 +604,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
574 set_buffer_uptodate(bh); 604 set_buffer_uptodate(bh);
575 clear_buffer_dirty(bh); 605 clear_buffer_dirty(bh);
576 606
577 gfs2_ail1_empty(sdp, 0); 607 gfs2_ail1_empty(sdp);
578 tail = current_tail(sdp); 608 tail = current_tail(sdp);
579 609
580 lh = (struct gfs2_log_header *)bh->b_data; 610 lh = (struct gfs2_log_header *)bh->b_data;
@@ -869,9 +899,9 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
869 gfs2_log_flush(sdp, NULL); 899 gfs2_log_flush(sdp, NULL);
870 for (;;) { 900 for (;;) {
871 gfs2_ail1_start(sdp); 901 gfs2_ail1_start(sdp);
872 if (gfs2_ail1_empty(sdp, DIO_ALL)) 902 gfs2_ail1_wait(sdp);
903 if (gfs2_ail1_empty(sdp))
873 break; 904 break;
874 msleep(10);
875 } 905 }
876} 906}
877 907
@@ -905,20 +935,20 @@ int gfs2_logd(void *data)
905 935
906 preflush = atomic_read(&sdp->sd_log_pinned); 936 preflush = atomic_read(&sdp->sd_log_pinned);
907 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { 937 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
908 gfs2_ail1_empty(sdp, DIO_ALL); 938 gfs2_ail1_empty(sdp);
909 gfs2_log_flush(sdp, NULL); 939 gfs2_log_flush(sdp, NULL);
910 gfs2_ail1_empty(sdp, DIO_ALL);
911 } 940 }
912 941
913 if (gfs2_ail_flush_reqd(sdp)) { 942 if (gfs2_ail_flush_reqd(sdp)) {
914 gfs2_ail1_start(sdp); 943 gfs2_ail1_start(sdp);
915 io_schedule(); 944 gfs2_ail1_wait(sdp);
916 gfs2_ail1_empty(sdp, 0); 945 gfs2_ail1_empty(sdp);
917 gfs2_log_flush(sdp, NULL); 946 gfs2_log_flush(sdp, NULL);
918 gfs2_ail1_empty(sdp, DIO_ALL);
919 } 947 }
920 948
921 wake_up(&sdp->sd_log_waitq); 949 if (!gfs2_ail_flush_reqd(sdp))
950 wake_up(&sdp->sd_log_waitq);
951
922 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; 952 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
923 if (freezing(current)) 953 if (freezing(current))
924 refrigerator(); 954 refrigerator();
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 0d007f920234..ab0621698b73 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -12,6 +12,7 @@
12 12
13#include <linux/list.h> 13#include <linux/list.h>
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/writeback.h>
15#include "incore.h" 16#include "incore.h"
16 17
17/** 18/**
@@ -59,6 +60,7 @@ extern struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
59extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); 60extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
60extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); 61extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
61extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); 62extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
63extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
62 64
63extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); 65extern void gfs2_log_shutdown(struct gfs2_sbd *sdp);
64extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp); 66extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 51d27f00ebb4..05bbb124699f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -40,7 +40,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
40{ 40{
41 struct gfs2_bufdata *bd; 41 struct gfs2_bufdata *bd;
42 42
43 gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)); 43 BUG_ON(!current->journal_info);
44 44
45 clear_buffer_dirty(bh); 45 clear_buffer_dirty(bh);
46 if (test_set_buffer_pinned(bh)) 46 if (test_set_buffer_pinned(bh))
@@ -65,6 +65,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
65 * @sdp: the filesystem the buffer belongs to 65 * @sdp: the filesystem the buffer belongs to
66 * @bh: The buffer to unpin 66 * @bh: The buffer to unpin
67 * @ai: 67 * @ai:
68 * @flags: The inode dirty flags
68 * 69 *
69 */ 70 */
70 71
@@ -73,10 +74,8 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
73{ 74{
74 struct gfs2_bufdata *bd = bh->b_private; 75 struct gfs2_bufdata *bd = bh->b_private;
75 76
76 gfs2_assert_withdraw(sdp, buffer_uptodate(bh)); 77 BUG_ON(!buffer_uptodate(bh));
77 78 BUG_ON(!buffer_pinned(bh));
78 if (!buffer_pinned(bh))
79 gfs2_assert_withdraw(sdp, 0);
80 79
81 lock_buffer(bh); 80 lock_buffer(bh);
82 mark_buffer_dirty(bh); 81 mark_buffer_dirty(bh);
@@ -95,8 +94,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
95 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); 94 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
96 spin_unlock(&sdp->sd_ail_lock); 95 spin_unlock(&sdp->sd_ail_lock);
97 96
98 if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags)) 97 clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
99 gfs2_glock_schedule_for_reclaim(bd->bd_gl);
100 trace_gfs2_pin(bd, 0); 98 trace_gfs2_pin(bd, 0);
101 unlock_buffer(bh); 99 unlock_buffer(bh);
102 atomic_dec(&sdp->sd_log_pinned); 100 atomic_dec(&sdp->sd_log_pinned);
@@ -322,12 +320,16 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
322 320
323static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 321static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
324{ 322{
323 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
324 struct gfs2_glock *gl = bd->bd_gl;
325 struct gfs2_trans *tr; 325 struct gfs2_trans *tr;
326 326
327 tr = current->journal_info; 327 tr = current->journal_info;
328 tr->tr_touched = 1; 328 tr->tr_touched = 1;
329 tr->tr_num_revoke++; 329 tr->tr_num_revoke++;
330 sdp->sd_log_num_revoke++; 330 sdp->sd_log_num_revoke++;
331 atomic_inc(&gl->gl_revokes);
332 set_bit(GLF_LFLUSH, &gl->gl_flags);
331 list_add(&le->le_list, &sdp->sd_log_le_revoke); 333 list_add(&le->le_list, &sdp->sd_log_le_revoke);
332} 334}
333 335
@@ -350,9 +352,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
350 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); 352 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
351 offset = sizeof(struct gfs2_log_descriptor); 353 offset = sizeof(struct gfs2_log_descriptor);
352 354
353 while (!list_empty(head)) { 355 list_for_each_entry(bd, head, bd_le.le_list) {
354 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
355 list_del_init(&bd->bd_le.le_list);
356 sdp->sd_log_num_revoke--; 356 sdp->sd_log_num_revoke--;
357 357
358 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { 358 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
@@ -367,8 +367,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
367 } 367 }
368 368
369 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno); 369 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno);
370 kmem_cache_free(gfs2_bufdata_cachep, bd);
371
372 offset += sizeof(u64); 370 offset += sizeof(u64);
373 } 371 }
374 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 372 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
@@ -376,6 +374,22 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
376 submit_bh(WRITE_SYNC, bh); 374 submit_bh(WRITE_SYNC, bh);
377} 375}
378 376
377static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
378{
379 struct list_head *head = &sdp->sd_log_le_revoke;
380 struct gfs2_bufdata *bd;
381 struct gfs2_glock *gl;
382
383 while (!list_empty(head)) {
384 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
385 list_del_init(&bd->bd_le.le_list);
386 gl = bd->bd_gl;
387 atomic_dec(&gl->gl_revokes);
388 clear_bit(GLF_LFLUSH, &gl->gl_flags);
389 kmem_cache_free(gfs2_bufdata_cachep, bd);
390 }
391}
392
379static void revoke_lo_before_scan(struct gfs2_jdesc *jd, 393static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
380 struct gfs2_log_header_host *head, int pass) 394 struct gfs2_log_header_host *head, int pass)
381{ 395{
@@ -749,6 +763,7 @@ const struct gfs2_log_operations gfs2_buf_lops = {
749const struct gfs2_log_operations gfs2_revoke_lops = { 763const struct gfs2_log_operations gfs2_revoke_lops = {
750 .lo_add = revoke_lo_add, 764 .lo_add = revoke_lo_add,
751 .lo_before_commit = revoke_lo_before_commit, 765 .lo_before_commit = revoke_lo_before_commit,
766 .lo_after_commit = revoke_lo_after_commit,
752 .lo_before_scan = revoke_lo_before_scan, 767 .lo_before_scan = revoke_lo_before_scan,
753 .lo_scan_elements = revoke_lo_scan_elements, 768 .lo_scan_elements = revoke_lo_scan_elements,
754 .lo_after_scan = revoke_lo_after_scan, 769 .lo_after_scan = revoke_lo_after_scan,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 888a5f5a1a58..cfa327d33194 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -53,6 +53,7 @@ static void gfs2_init_glock_once(void *foo)
53 INIT_LIST_HEAD(&gl->gl_lru); 53 INIT_LIST_HEAD(&gl->gl_lru);
54 INIT_LIST_HEAD(&gl->gl_ail_list); 54 INIT_LIST_HEAD(&gl->gl_ail_list);
55 atomic_set(&gl->gl_ail_count, 0); 55 atomic_set(&gl->gl_ail_count, 0);
56 atomic_set(&gl->gl_revokes, 0);
56} 57}
57 58
58static void gfs2_init_gl_aspace_once(void *foo) 59static void gfs2_init_gl_aspace_once(void *foo)
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 675349b5a133..747238cd9f96 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -31,6 +31,7 @@
31#include "rgrp.h" 31#include "rgrp.h"
32#include "trans.h" 32#include "trans.h"
33#include "util.h" 33#include "util.h"
34#include "trace_gfs2.h"
34 35
35static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) 36static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc)
36{ 37{
@@ -310,6 +311,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
310 struct gfs2_bufdata *bd = bh->b_private; 311 struct gfs2_bufdata *bd = bh->b_private;
311 312
312 if (test_clear_buffer_pinned(bh)) { 313 if (test_clear_buffer_pinned(bh)) {
314 trace_gfs2_pin(bd, 0);
313 atomic_dec(&sdp->sd_log_pinned); 315 atomic_dec(&sdp->sd_log_pinned);
314 list_del_init(&bd->bd_le.le_list); 316 list_del_init(&bd->bd_le.le_list);
315 if (meta) { 317 if (meta) {
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 6a1d9ba16411..22c526593131 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -77,8 +77,6 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
77 77
78#define buffer_busy(bh) \ 78#define buffer_busy(bh) \
79((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned))) 79((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
80#define buffer_in_io(bh) \
81((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
82 80
83#endif /* __DIO_DOT_H__ */ 81#endif /* __DIO_DOT_H__ */
84 82
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index d3c69eb91c74..8ac9ae189b53 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -126,8 +126,10 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
126 * changed. 126 * changed.
127 */ 127 */
128 128
129static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) 129static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
130{ 130{
131 struct gfs2_sb_host *sb = &sdp->sd_sb;
132
131 if (sb->sb_magic != GFS2_MAGIC || 133 if (sb->sb_magic != GFS2_MAGIC ||
132 sb->sb_type != GFS2_METATYPE_SB) { 134 sb->sb_type != GFS2_METATYPE_SB) {
133 if (!silent) 135 if (!silent)
@@ -157,8 +159,10 @@ static void end_bio_io_page(struct bio *bio, int error)
157 unlock_page(page); 159 unlock_page(page);
158} 160}
159 161
160static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) 162static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
161{ 163{
164 struct gfs2_sb_host *sb = &sdp->sd_sb;
165 struct super_block *s = sdp->sd_vfs;
162 const struct gfs2_sb *str = buf; 166 const struct gfs2_sb *str = buf;
163 167
164 sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); 168 sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
@@ -175,7 +179,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
175 179
176 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); 180 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
177 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); 181 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
178 memcpy(sb->sb_uuid, str->sb_uuid, 16); 182 memcpy(s->s_uuid, str->sb_uuid, 16);
179} 183}
180 184
181/** 185/**
@@ -197,7 +201,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
197 * Returns: 0 on success or error 201 * Returns: 0 on success or error
198 */ 202 */
199 203
200static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) 204static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
201{ 205{
202 struct super_block *sb = sdp->sd_vfs; 206 struct super_block *sb = sdp->sd_vfs;
203 struct gfs2_sb *p; 207 struct gfs2_sb *p;
@@ -227,10 +231,10 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
227 return -EIO; 231 return -EIO;
228 } 232 }
229 p = kmap(page); 233 p = kmap(page);
230 gfs2_sb_in(&sdp->sd_sb, p); 234 gfs2_sb_in(sdp, p);
231 kunmap(page); 235 kunmap(page);
232 __free_page(page); 236 __free_page(page);
233 return 0; 237 return gfs2_check_sb(sdp, silent);
234} 238}
235 239
236/** 240/**
@@ -247,17 +251,13 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
247 unsigned int x; 251 unsigned int x;
248 int error; 252 int error;
249 253
250 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); 254 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
251 if (error) { 255 if (error) {
252 if (!silent) 256 if (!silent)
253 fs_err(sdp, "can't read superblock\n"); 257 fs_err(sdp, "can't read superblock\n");
254 return error; 258 return error;
255 } 259 }
256 260
257 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
258 if (error)
259 return error;
260
261 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - 261 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
262 GFS2_BASIC_BLOCK_SHIFT; 262 GFS2_BASIC_BLOCK_SHIFT;
263 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; 263 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
@@ -340,14 +340,10 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
340 /* Try to autodetect */ 340 /* Try to autodetect */
341 341
342 if (!proto[0] || !table[0]) { 342 if (!proto[0] || !table[0]) {
343 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); 343 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
344 if (error) 344 if (error)
345 return error; 345 return error;
346 346
347 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
348 if (error)
349 goto out;
350
351 if (!proto[0]) 347 if (!proto[0])
352 proto = sdp->sd_sb.sb_lockproto; 348 proto = sdp->sd_sb.sb_lockproto;
353 if (!table[0]) 349 if (!table[0])
@@ -364,7 +360,6 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
364 while ((table = strchr(table, '/'))) 360 while ((table = strchr(table, '/')))
365 *table = '_'; 361 *table = '_';
366 362
367out:
368 return error; 363 return error;
369} 364}
370 365
@@ -1119,8 +1114,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
1119 if (sdp->sd_args.ar_statfs_quantum) { 1114 if (sdp->sd_args.ar_statfs_quantum) {
1120 sdp->sd_tune.gt_statfs_slow = 0; 1115 sdp->sd_tune.gt_statfs_slow = 0;
1121 sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum; 1116 sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum;
1122 } 1117 } else {
1123 else {
1124 sdp->sd_tune.gt_statfs_slow = 1; 1118 sdp->sd_tune.gt_statfs_slow = 1;
1125 sdp->sd_tune.gt_statfs_quantum = 30; 1119 sdp->sd_tune.gt_statfs_quantum = 30;
1126 } 1120 }
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
deleted file mode 100644
index 09e436a50723..000000000000
--- a/fs/gfs2/ops_inode.c
+++ /dev/null
@@ -1,1344 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/slab.h>
11#include <linux/spinlock.h>
12#include <linux/completion.h>
13#include <linux/buffer_head.h>
14#include <linux/namei.h>
15#include <linux/mm.h>
16#include <linux/xattr.h>
17#include <linux/posix_acl.h>
18#include <linux/gfs2_ondisk.h>
19#include <linux/crc32.h>
20#include <linux/fiemap.h>
21#include <asm/uaccess.h>
22
23#include "gfs2.h"
24#include "incore.h"
25#include "acl.h"
26#include "bmap.h"
27#include "dir.h"
28#include "xattr.h"
29#include "glock.h"
30#include "inode.h"
31#include "meta_io.h"
32#include "quota.h"
33#include "rgrp.h"
34#include "trans.h"
35#include "util.h"
36#include "super.h"
37
38/**
39 * gfs2_create - Create a file
40 * @dir: The directory in which to create the file
41 * @dentry: The dentry of the new file
42 * @mode: The mode of the new file
43 *
44 * Returns: errno
45 */
46
47static int gfs2_create(struct inode *dir, struct dentry *dentry,
48 int mode, struct nameidata *nd)
49{
50 struct gfs2_inode *dip = GFS2_I(dir);
51 struct gfs2_sbd *sdp = GFS2_SB(dir);
52 struct gfs2_holder ghs[2];
53 struct inode *inode;
54
55 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
56
57 for (;;) {
58 inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0);
59 if (!IS_ERR(inode)) {
60 gfs2_trans_end(sdp);
61 if (dip->i_alloc->al_rgd)
62 gfs2_inplace_release(dip);
63 gfs2_quota_unlock(dip);
64 gfs2_alloc_put(dip);
65 gfs2_glock_dq_uninit_m(2, ghs);
66 mark_inode_dirty(inode);
67 break;
68 } else if (PTR_ERR(inode) != -EEXIST ||
69 (nd && nd->flags & LOOKUP_EXCL)) {
70 gfs2_holder_uninit(ghs);
71 return PTR_ERR(inode);
72 }
73
74 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
75 if (inode) {
76 if (!IS_ERR(inode)) {
77 gfs2_holder_uninit(ghs);
78 break;
79 } else {
80 gfs2_holder_uninit(ghs);
81 return PTR_ERR(inode);
82 }
83 }
84 }
85
86 d_instantiate(dentry, inode);
87
88 return 0;
89}
90
91/**
92 * gfs2_lookup - Look up a filename in a directory and return its inode
93 * @dir: The directory inode
94 * @dentry: The dentry of the new inode
95 * @nd: passed from Linux VFS, ignored by us
96 *
97 * Called by the VFS layer. Lock dir and call gfs2_lookupi()
98 *
99 * Returns: errno
100 */
101
102static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
103 struct nameidata *nd)
104{
105 struct inode *inode = NULL;
106
107 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
108 if (inode && IS_ERR(inode))
109 return ERR_CAST(inode);
110
111 if (inode) {
112 struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
113 struct gfs2_holder gh;
114 int error;
115 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
116 if (error) {
117 iput(inode);
118 return ERR_PTR(error);
119 }
120 gfs2_glock_dq_uninit(&gh);
121 return d_splice_alias(inode, dentry);
122 }
123 d_add(dentry, inode);
124
125 return NULL;
126}
127
128/**
129 * gfs2_link - Link to a file
130 * @old_dentry: The inode to link
131 * @dir: Add link to this directory
132 * @dentry: The name of the link
133 *
134 * Link the inode in "old_dentry" into the directory "dir" with the
135 * name in "dentry".
136 *
137 * Returns: errno
138 */
139
140static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
141 struct dentry *dentry)
142{
143 struct gfs2_inode *dip = GFS2_I(dir);
144 struct gfs2_sbd *sdp = GFS2_SB(dir);
145 struct inode *inode = old_dentry->d_inode;
146 struct gfs2_inode *ip = GFS2_I(inode);
147 struct gfs2_holder ghs[2];
148 int alloc_required;
149 int error;
150
151 if (S_ISDIR(inode->i_mode))
152 return -EPERM;
153
154 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
155 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
156
157 error = gfs2_glock_nq(ghs); /* parent */
158 if (error)
159 goto out_parent;
160
161 error = gfs2_glock_nq(ghs + 1); /* child */
162 if (error)
163 goto out_child;
164
165 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
166 if (error)
167 goto out_gunlock;
168
169 error = gfs2_dir_check(dir, &dentry->d_name, NULL);
170 switch (error) {
171 case -ENOENT:
172 break;
173 case 0:
174 error = -EEXIST;
175 default:
176 goto out_gunlock;
177 }
178
179 error = -EINVAL;
180 if (!dip->i_inode.i_nlink)
181 goto out_gunlock;
182 error = -EFBIG;
183 if (dip->i_entries == (u32)-1)
184 goto out_gunlock;
185 error = -EPERM;
186 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
187 goto out_gunlock;
188 error = -EINVAL;
189 if (!ip->i_inode.i_nlink)
190 goto out_gunlock;
191 error = -EMLINK;
192 if (ip->i_inode.i_nlink == (u32)-1)
193 goto out_gunlock;
194
195 alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
196 if (error < 0)
197 goto out_gunlock;
198 error = 0;
199
200 if (alloc_required) {
201 struct gfs2_alloc *al = gfs2_alloc_get(dip);
202 if (!al) {
203 error = -ENOMEM;
204 goto out_gunlock;
205 }
206
207 error = gfs2_quota_lock_check(dip);
208 if (error)
209 goto out_alloc;
210
211 al->al_requested = sdp->sd_max_dirres;
212
213 error = gfs2_inplace_reserve(dip);
214 if (error)
215 goto out_gunlock_q;
216
217 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
218 gfs2_rg_blocks(al) +
219 2 * RES_DINODE + RES_STATFS +
220 RES_QUOTA, 0);
221 if (error)
222 goto out_ipres;
223 } else {
224 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
225 if (error)
226 goto out_ipres;
227 }
228
229 error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode));
230 if (error)
231 goto out_end_trans;
232
233 error = gfs2_change_nlink(ip, +1);
234
235out_end_trans:
236 gfs2_trans_end(sdp);
237out_ipres:
238 if (alloc_required)
239 gfs2_inplace_release(dip);
240out_gunlock_q:
241 if (alloc_required)
242 gfs2_quota_unlock(dip);
243out_alloc:
244 if (alloc_required)
245 gfs2_alloc_put(dip);
246out_gunlock:
247 gfs2_glock_dq(ghs + 1);
248out_child:
249 gfs2_glock_dq(ghs);
250out_parent:
251 gfs2_holder_uninit(ghs);
252 gfs2_holder_uninit(ghs + 1);
253 if (!error) {
254 ihold(inode);
255 d_instantiate(dentry, inode);
256 mark_inode_dirty(inode);
257 }
258 return error;
259}
260
261/*
262 * gfs2_unlink_ok - check to see that a inode is still in a directory
263 * @dip: the directory
264 * @name: the name of the file
265 * @ip: the inode
266 *
267 * Assumes that the lock on (at least) @dip is held.
268 *
269 * Returns: 0 if the parent/child relationship is correct, errno if it isn't
270 */
271
272static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
273 const struct gfs2_inode *ip)
274{
275 int error;
276
277 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
278 return -EPERM;
279
280 if ((dip->i_inode.i_mode & S_ISVTX) &&
281 dip->i_inode.i_uid != current_fsuid() &&
282 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
283 return -EPERM;
284
285 if (IS_APPEND(&dip->i_inode))
286 return -EPERM;
287
288 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
289 if (error)
290 return error;
291
292 error = gfs2_dir_check(&dip->i_inode, name, ip);
293 if (error)
294 return error;
295
296 return 0;
297}
298
299/**
300 * gfs2_unlink - Unlink a file
301 * @dir: The inode of the directory containing the file to unlink
302 * @dentry: The file itself
303 *
304 * Unlink a file. Call gfs2_unlinki()
305 *
306 * Returns: errno
307 */
308
309static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
310{
311 struct gfs2_inode *dip = GFS2_I(dir);
312 struct gfs2_sbd *sdp = GFS2_SB(dir);
313 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
314 struct gfs2_holder ghs[3];
315 struct gfs2_rgrpd *rgd;
316 struct gfs2_holder ri_gh;
317 int error;
318
319 error = gfs2_rindex_hold(sdp, &ri_gh);
320 if (error)
321 return error;
322
323 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
324 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
325
326 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
327 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
328
329
330 error = gfs2_glock_nq(ghs); /* parent */
331 if (error)
332 goto out_parent;
333
334 error = gfs2_glock_nq(ghs + 1); /* child */
335 if (error)
336 goto out_child;
337
338 error = gfs2_glock_nq(ghs + 2); /* rgrp */
339 if (error)
340 goto out_rgrp;
341
342 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
343 if (error)
344 goto out_gunlock;
345
346 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
347 if (error)
348 goto out_gunlock;
349
350 error = gfs2_dir_del(dip, &dentry->d_name);
351 if (error)
352 goto out_end_trans;
353
354 error = gfs2_change_nlink(ip, -1);
355
356out_end_trans:
357 gfs2_trans_end(sdp);
358out_gunlock:
359 gfs2_glock_dq(ghs + 2);
360out_rgrp:
361 gfs2_holder_uninit(ghs + 2);
362 gfs2_glock_dq(ghs + 1);
363out_child:
364 gfs2_holder_uninit(ghs + 1);
365 gfs2_glock_dq(ghs);
366out_parent:
367 gfs2_holder_uninit(ghs);
368 gfs2_glock_dq_uninit(&ri_gh);
369 return error;
370}
371
372/**
373 * gfs2_symlink - Create a symlink
374 * @dir: The directory to create the symlink in
375 * @dentry: The dentry to put the symlink in
376 * @symname: The thing which the link points to
377 *
378 * Returns: errno
379 */
380
381static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
382 const char *symname)
383{
384 struct gfs2_inode *dip = GFS2_I(dir), *ip;
385 struct gfs2_sbd *sdp = GFS2_SB(dir);
386 struct gfs2_holder ghs[2];
387 struct inode *inode;
388 struct buffer_head *dibh;
389 int size;
390 int error;
391
392 /* Must be stuffed with a null terminator for gfs2_follow_link() */
393 size = strlen(symname);
394 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
395 return -ENAMETOOLONG;
396
397 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
398
399 inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO, 0);
400 if (IS_ERR(inode)) {
401 gfs2_holder_uninit(ghs);
402 return PTR_ERR(inode);
403 }
404
405 ip = ghs[1].gh_gl->gl_object;
406
407 i_size_write(inode, size);
408
409 error = gfs2_meta_inode_buffer(ip, &dibh);
410
411 if (!gfs2_assert_withdraw(sdp, !error)) {
412 gfs2_dinode_out(ip, dibh->b_data);
413 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
414 size);
415 brelse(dibh);
416 }
417
418 gfs2_trans_end(sdp);
419 if (dip->i_alloc->al_rgd)
420 gfs2_inplace_release(dip);
421 gfs2_quota_unlock(dip);
422 gfs2_alloc_put(dip);
423
424 gfs2_glock_dq_uninit_m(2, ghs);
425
426 d_instantiate(dentry, inode);
427 mark_inode_dirty(inode);
428
429 return 0;
430}
431
432/**
433 * gfs2_mkdir - Make a directory
434 * @dir: The parent directory of the new one
435 * @dentry: The dentry of the new directory
436 * @mode: The mode of the new directory
437 *
438 * Returns: errno
439 */
440
441static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
442{
443 struct gfs2_inode *dip = GFS2_I(dir), *ip;
444 struct gfs2_sbd *sdp = GFS2_SB(dir);
445 struct gfs2_holder ghs[2];
446 struct inode *inode;
447 struct buffer_head *dibh;
448 int error;
449
450 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
451
452 inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode, 0);
453 if (IS_ERR(inode)) {
454 gfs2_holder_uninit(ghs);
455 return PTR_ERR(inode);
456 }
457
458 ip = ghs[1].gh_gl->gl_object;
459
460 ip->i_inode.i_nlink = 2;
461 i_size_write(inode, sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
462 ip->i_diskflags |= GFS2_DIF_JDATA;
463 ip->i_entries = 2;
464
465 error = gfs2_meta_inode_buffer(ip, &dibh);
466
467 if (!gfs2_assert_withdraw(sdp, !error)) {
468 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
469 struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
470
471 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
472 gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
473 dent->de_inum = di->di_num; /* already GFS2 endian */
474 dent->de_type = cpu_to_be16(DT_DIR);
475 di->di_entries = cpu_to_be32(1);
476
477 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
478 gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
479
480 gfs2_inum_out(dip, dent);
481 dent->de_type = cpu_to_be16(DT_DIR);
482
483 gfs2_dinode_out(ip, di);
484
485 brelse(dibh);
486 }
487
488 error = gfs2_change_nlink(dip, +1);
489 gfs2_assert_withdraw(sdp, !error); /* dip already pinned */
490
491 gfs2_trans_end(sdp);
492 if (dip->i_alloc->al_rgd)
493 gfs2_inplace_release(dip);
494 gfs2_quota_unlock(dip);
495 gfs2_alloc_put(dip);
496
497 gfs2_glock_dq_uninit_m(2, ghs);
498
499 d_instantiate(dentry, inode);
500 mark_inode_dirty(inode);
501
502 return 0;
503}
504
505/**
506 * gfs2_rmdiri - Remove a directory
507 * @dip: The parent directory of the directory to be removed
508 * @name: The name of the directory to be removed
509 * @ip: The GFS2 inode of the directory to be removed
510 *
511 * Assumes Glocks on dip and ip are held
512 *
513 * Returns: errno
514 */
515
516static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
517 struct gfs2_inode *ip)
518{
519 int error;
520
521 if (ip->i_entries != 2) {
522 if (gfs2_consist_inode(ip))
523 gfs2_dinode_print(ip);
524 return -EIO;
525 }
526
527 error = gfs2_dir_del(dip, name);
528 if (error)
529 return error;
530
531 error = gfs2_change_nlink(dip, -1);
532 if (error)
533 return error;
534
535 error = gfs2_dir_del(ip, &gfs2_qdot);
536 if (error)
537 return error;
538
539 error = gfs2_dir_del(ip, &gfs2_qdotdot);
540 if (error)
541 return error;
542
543 /* It looks odd, but it really should be done twice */
544 error = gfs2_change_nlink(ip, -1);
545 if (error)
546 return error;
547
548 error = gfs2_change_nlink(ip, -1);
549 if (error)
550 return error;
551
552 return error;
553}
554
555/**
556 * gfs2_rmdir - Remove a directory
557 * @dir: The parent directory of the directory to be removed
558 * @dentry: The dentry of the directory to remove
559 *
560 * Remove a directory. Call gfs2_rmdiri()
561 *
562 * Returns: errno
563 */
564
565static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
566{
567 struct gfs2_inode *dip = GFS2_I(dir);
568 struct gfs2_sbd *sdp = GFS2_SB(dir);
569 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
570 struct gfs2_holder ghs[3];
571 struct gfs2_rgrpd *rgd;
572 struct gfs2_holder ri_gh;
573 int error;
574
575 error = gfs2_rindex_hold(sdp, &ri_gh);
576 if (error)
577 return error;
578 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
579 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
580
581 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
582 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
583
584 error = gfs2_glock_nq(ghs); /* parent */
585 if (error)
586 goto out_parent;
587
588 error = gfs2_glock_nq(ghs + 1); /* child */
589 if (error)
590 goto out_child;
591
592 error = gfs2_glock_nq(ghs + 2); /* rgrp */
593 if (error)
594 goto out_rgrp;
595
596 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
597 if (error)
598 goto out_gunlock;
599
600 if (ip->i_entries < 2) {
601 if (gfs2_consist_inode(ip))
602 gfs2_dinode_print(ip);
603 error = -EIO;
604 goto out_gunlock;
605 }
606 if (ip->i_entries > 2) {
607 error = -ENOTEMPTY;
608 goto out_gunlock;
609 }
610
611 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF + RES_RG_BIT, 0);
612 if (error)
613 goto out_gunlock;
614
615 error = gfs2_rmdiri(dip, &dentry->d_name, ip);
616
617 gfs2_trans_end(sdp);
618
619out_gunlock:
620 gfs2_glock_dq(ghs + 2);
621out_rgrp:
622 gfs2_holder_uninit(ghs + 2);
623 gfs2_glock_dq(ghs + 1);
624out_child:
625 gfs2_holder_uninit(ghs + 1);
626 gfs2_glock_dq(ghs);
627out_parent:
628 gfs2_holder_uninit(ghs);
629 gfs2_glock_dq_uninit(&ri_gh);
630 return error;
631}
632
633/**
634 * gfs2_mknod - Make a special file
635 * @dir: The directory in which the special file will reside
636 * @dentry: The dentry of the special file
637 * @mode: The mode of the special file
638 * @rdev: The device specification of the special file
639 *
640 */
641
642static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
643 dev_t dev)
644{
645 struct gfs2_inode *dip = GFS2_I(dir);
646 struct gfs2_sbd *sdp = GFS2_SB(dir);
647 struct gfs2_holder ghs[2];
648 struct inode *inode;
649
650 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
651
652 inode = gfs2_createi(ghs, &dentry->d_name, mode, dev);
653 if (IS_ERR(inode)) {
654 gfs2_holder_uninit(ghs);
655 return PTR_ERR(inode);
656 }
657
658 gfs2_trans_end(sdp);
659 if (dip->i_alloc->al_rgd)
660 gfs2_inplace_release(dip);
661 gfs2_quota_unlock(dip);
662 gfs2_alloc_put(dip);
663
664 gfs2_glock_dq_uninit_m(2, ghs);
665
666 d_instantiate(dentry, inode);
667 mark_inode_dirty(inode);
668
669 return 0;
670}
671
672/*
673 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
674 * @this: move this
675 * @to: to here
676 *
677 * Follow @to back to the root and make sure we don't encounter @this
678 * Assumes we already hold the rename lock.
679 *
680 * Returns: errno
681 */
682
683static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
684{
685 struct inode *dir = &to->i_inode;
686 struct super_block *sb = dir->i_sb;
687 struct inode *tmp;
688 int error = 0;
689
690 igrab(dir);
691
692 for (;;) {
693 if (dir == &this->i_inode) {
694 error = -EINVAL;
695 break;
696 }
697 if (dir == sb->s_root->d_inode) {
698 error = 0;
699 break;
700 }
701
702 tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
703 if (IS_ERR(tmp)) {
704 error = PTR_ERR(tmp);
705 break;
706 }
707
708 iput(dir);
709 dir = tmp;
710 }
711
712 iput(dir);
713
714 return error;
715}
716
717/**
718 * gfs2_rename - Rename a file
719 * @odir: Parent directory of old file name
720 * @odentry: The old dentry of the file
721 * @ndir: Parent directory of new file name
722 * @ndentry: The new dentry of the file
723 *
724 * Returns: errno
725 */
726
727static int gfs2_rename(struct inode *odir, struct dentry *odentry,
728 struct inode *ndir, struct dentry *ndentry)
729{
730 struct gfs2_inode *odip = GFS2_I(odir);
731 struct gfs2_inode *ndip = GFS2_I(ndir);
732 struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
733 struct gfs2_inode *nip = NULL;
734 struct gfs2_sbd *sdp = GFS2_SB(odir);
735 struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
736 struct gfs2_rgrpd *nrgd;
737 unsigned int num_gh;
738 int dir_rename = 0;
739 int alloc_required = 0;
740 unsigned int x;
741 int error;
742
743 if (ndentry->d_inode) {
744 nip = GFS2_I(ndentry->d_inode);
745 if (ip == nip)
746 return 0;
747 }
748
749 error = gfs2_rindex_hold(sdp, &ri_gh);
750 if (error)
751 return error;
752
753 if (odip != ndip) {
754 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
755 0, &r_gh);
756 if (error)
757 goto out;
758
759 if (S_ISDIR(ip->i_inode.i_mode)) {
760 dir_rename = 1;
761 /* don't move a dirctory into it's subdir */
762 error = gfs2_ok_to_move(ip, ndip);
763 if (error)
764 goto out_gunlock_r;
765 }
766 }
767
768 num_gh = 1;
769 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
770 if (odip != ndip) {
771 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
772 num_gh++;
773 }
774 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
775 num_gh++;
776
777 if (nip) {
778 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
779 num_gh++;
780 /* grab the resource lock for unlink flag twiddling
781 * this is the case of the target file already existing
782 * so we unlink before doing the rename
783 */
784 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
785 if (nrgd)
786 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
787 }
788
789 for (x = 0; x < num_gh; x++) {
790 error = gfs2_glock_nq(ghs + x);
791 if (error)
792 goto out_gunlock;
793 }
794
795 /* Check out the old directory */
796
797 error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
798 if (error)
799 goto out_gunlock;
800
801 /* Check out the new directory */
802
803 if (nip) {
804 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
805 if (error)
806 goto out_gunlock;
807
808 if (S_ISDIR(nip->i_inode.i_mode)) {
809 if (nip->i_entries < 2) {
810 if (gfs2_consist_inode(nip))
811 gfs2_dinode_print(nip);
812 error = -EIO;
813 goto out_gunlock;
814 }
815 if (nip->i_entries > 2) {
816 error = -ENOTEMPTY;
817 goto out_gunlock;
818 }
819 }
820 } else {
821 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
822 if (error)
823 goto out_gunlock;
824
825 error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
826 switch (error) {
827 case -ENOENT:
828 error = 0;
829 break;
830 case 0:
831 error = -EEXIST;
832 default:
833 goto out_gunlock;
834 };
835
836 if (odip != ndip) {
837 if (!ndip->i_inode.i_nlink) {
838 error = -EINVAL;
839 goto out_gunlock;
840 }
841 if (ndip->i_entries == (u32)-1) {
842 error = -EFBIG;
843 goto out_gunlock;
844 }
845 if (S_ISDIR(ip->i_inode.i_mode) &&
846 ndip->i_inode.i_nlink == (u32)-1) {
847 error = -EMLINK;
848 goto out_gunlock;
849 }
850 }
851 }
852
853 /* Check out the dir to be renamed */
854
855 if (dir_rename) {
856 error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
857 if (error)
858 goto out_gunlock;
859 }
860
861 if (nip == NULL)
862 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
863 error = alloc_required;
864 if (error < 0)
865 goto out_gunlock;
866 error = 0;
867
868 if (alloc_required) {
869 struct gfs2_alloc *al = gfs2_alloc_get(ndip);
870 if (!al) {
871 error = -ENOMEM;
872 goto out_gunlock;
873 }
874
875 error = gfs2_quota_lock_check(ndip);
876 if (error)
877 goto out_alloc;
878
879 al->al_requested = sdp->sd_max_dirres;
880
881 error = gfs2_inplace_reserve_ri(ndip);
882 if (error)
883 goto out_gunlock_q;
884
885 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
886 gfs2_rg_blocks(al) +
887 4 * RES_DINODE + 4 * RES_LEAF +
888 RES_STATFS + RES_QUOTA + 4, 0);
889 if (error)
890 goto out_ipreserv;
891 } else {
892 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
893 5 * RES_LEAF + 4, 0);
894 if (error)
895 goto out_gunlock;
896 }
897
898 /* Remove the target file, if it exists */
899
900 if (nip) {
901 if (S_ISDIR(nip->i_inode.i_mode))
902 error = gfs2_rmdiri(ndip, &ndentry->d_name, nip);
903 else {
904 error = gfs2_dir_del(ndip, &ndentry->d_name);
905 if (error)
906 goto out_end_trans;
907 error = gfs2_change_nlink(nip, -1);
908 }
909 if (error)
910 goto out_end_trans;
911 }
912
913 if (dir_rename) {
914 error = gfs2_change_nlink(ndip, +1);
915 if (error)
916 goto out_end_trans;
917 error = gfs2_change_nlink(odip, -1);
918 if (error)
919 goto out_end_trans;
920
921 error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
922 if (error)
923 goto out_end_trans;
924 } else {
925 struct buffer_head *dibh;
926 error = gfs2_meta_inode_buffer(ip, &dibh);
927 if (error)
928 goto out_end_trans;
929 ip->i_inode.i_ctime = CURRENT_TIME;
930 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
931 gfs2_dinode_out(ip, dibh->b_data);
932 brelse(dibh);
933 }
934
935 error = gfs2_dir_del(odip, &odentry->d_name);
936 if (error)
937 goto out_end_trans;
938
939 error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode));
940 if (error)
941 goto out_end_trans;
942
943out_end_trans:
944 gfs2_trans_end(sdp);
945out_ipreserv:
946 if (alloc_required)
947 gfs2_inplace_release(ndip);
948out_gunlock_q:
949 if (alloc_required)
950 gfs2_quota_unlock(ndip);
951out_alloc:
952 if (alloc_required)
953 gfs2_alloc_put(ndip);
954out_gunlock:
955 while (x--) {
956 gfs2_glock_dq(ghs + x);
957 gfs2_holder_uninit(ghs + x);
958 }
959out_gunlock_r:
960 if (r_gh.gh_gl)
961 gfs2_glock_dq_uninit(&r_gh);
962out:
963 gfs2_glock_dq_uninit(&ri_gh);
964 return error;
965}
966
967/**
968 * gfs2_follow_link - Follow a symbolic link
969 * @dentry: The dentry of the link
970 * @nd: Data that we pass to vfs_follow_link()
971 *
972 * This can handle symlinks of any size.
973 *
974 * Returns: 0 on success or error code
975 */
976
977static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
978{
979 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
980 struct gfs2_holder i_gh;
981 struct buffer_head *dibh;
982 unsigned int x, size;
983 char *buf;
984 int error;
985
986 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
987 error = gfs2_glock_nq(&i_gh);
988 if (error) {
989 gfs2_holder_uninit(&i_gh);
990 nd_set_link(nd, ERR_PTR(error));
991 return NULL;
992 }
993
994 size = (unsigned int)i_size_read(&ip->i_inode);
995 if (size == 0) {
996 gfs2_consist_inode(ip);
997 buf = ERR_PTR(-EIO);
998 goto out;
999 }
1000
1001 error = gfs2_meta_inode_buffer(ip, &dibh);
1002 if (error) {
1003 buf = ERR_PTR(error);
1004 goto out;
1005 }
1006
1007 x = size + 1;
1008 buf = kmalloc(x, GFP_NOFS);
1009 if (!buf)
1010 buf = ERR_PTR(-ENOMEM);
1011 else
1012 memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
1013 brelse(dibh);
1014out:
1015 gfs2_glock_dq_uninit(&i_gh);
1016 nd_set_link(nd, buf);
1017 return NULL;
1018}
1019
1020static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1021{
1022 char *s = nd_get_link(nd);
1023 if (!IS_ERR(s))
1024 kfree(s);
1025}
1026
1027/**
1028 * gfs2_permission -
1029 * @inode: The inode
1030 * @mask: The mask to be tested
1031 * @flags: Indicates whether this is an RCU path walk or not
1032 *
1033 * This may be called from the VFS directly, or from within GFS2 with the
1034 * inode locked, so we look to see if the glock is already locked and only
1035 * lock the glock if its not already been done.
1036 *
1037 * Returns: errno
1038 */
1039
1040int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1041{
1042 struct gfs2_inode *ip;
1043 struct gfs2_holder i_gh;
1044 int error;
1045 int unlock = 0;
1046
1047
1048 ip = GFS2_I(inode);
1049 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1050 if (flags & IPERM_FLAG_RCU)
1051 return -ECHILD;
1052 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1053 if (error)
1054 return error;
1055 unlock = 1;
1056 }
1057
1058 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
1059 error = -EACCES;
1060 else
1061 error = generic_permission(inode, mask, flags, gfs2_check_acl);
1062 if (unlock)
1063 gfs2_glock_dq_uninit(&i_gh);
1064
1065 return error;
1066}
1067
1068static int setattr_chown(struct inode *inode, struct iattr *attr)
1069{
1070 struct gfs2_inode *ip = GFS2_I(inode);
1071 struct gfs2_sbd *sdp = GFS2_SB(inode);
1072 u32 ouid, ogid, nuid, ngid;
1073 int error;
1074
1075 ouid = inode->i_uid;
1076 ogid = inode->i_gid;
1077 nuid = attr->ia_uid;
1078 ngid = attr->ia_gid;
1079
1080 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
1081 ouid = nuid = NO_QUOTA_CHANGE;
1082 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
1083 ogid = ngid = NO_QUOTA_CHANGE;
1084
1085 if (!gfs2_alloc_get(ip))
1086 return -ENOMEM;
1087
1088 error = gfs2_quota_lock(ip, nuid, ngid);
1089 if (error)
1090 goto out_alloc;
1091
1092 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1093 error = gfs2_quota_check(ip, nuid, ngid);
1094 if (error)
1095 goto out_gunlock_q;
1096 }
1097
1098 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
1099 if (error)
1100 goto out_gunlock_q;
1101
1102 error = gfs2_setattr_simple(ip, attr);
1103 if (error)
1104 goto out_end_trans;
1105
1106 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1107 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
1108 gfs2_quota_change(ip, -blocks, ouid, ogid);
1109 gfs2_quota_change(ip, blocks, nuid, ngid);
1110 }
1111
1112out_end_trans:
1113 gfs2_trans_end(sdp);
1114out_gunlock_q:
1115 gfs2_quota_unlock(ip);
1116out_alloc:
1117 gfs2_alloc_put(ip);
1118 return error;
1119}
1120
1121/**
1122 * gfs2_setattr - Change attributes on an inode
1123 * @dentry: The dentry which is changing
1124 * @attr: The structure describing the change
1125 *
1126 * The VFS layer wants to change one or more of an inodes attributes. Write
1127 * that change out to disk.
1128 *
1129 * Returns: errno
1130 */
1131
1132static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
1133{
1134 struct inode *inode = dentry->d_inode;
1135 struct gfs2_inode *ip = GFS2_I(inode);
1136 struct gfs2_holder i_gh;
1137 int error;
1138
1139 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1140 if (error)
1141 return error;
1142
1143 error = -EPERM;
1144 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1145 goto out;
1146
1147 error = inode_change_ok(inode, attr);
1148 if (error)
1149 goto out;
1150
1151 if (attr->ia_valid & ATTR_SIZE)
1152 error = gfs2_setattr_size(inode, attr->ia_size);
1153 else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
1154 error = setattr_chown(inode, attr);
1155 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
1156 error = gfs2_acl_chmod(ip, attr);
1157 else
1158 error = gfs2_setattr_simple(ip, attr);
1159
1160out:
1161 gfs2_glock_dq_uninit(&i_gh);
1162 if (!error)
1163 mark_inode_dirty(inode);
1164 return error;
1165}
1166
1167/**
1168 * gfs2_getattr - Read out an inode's attributes
1169 * @mnt: The vfsmount the inode is being accessed from
1170 * @dentry: The dentry to stat
1171 * @stat: The inode's stats
1172 *
1173 * This may be called from the VFS directly, or from within GFS2 with the
1174 * inode locked, so we look to see if the glock is already locked and only
1175 * lock the glock if its not already been done. Note that its the NFS
1176 * readdirplus operation which causes this to be called (from filldir)
1177 * with the glock already held.
1178 *
1179 * Returns: errno
1180 */
1181
1182static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1183 struct kstat *stat)
1184{
1185 struct inode *inode = dentry->d_inode;
1186 struct gfs2_inode *ip = GFS2_I(inode);
1187 struct gfs2_holder gh;
1188 int error;
1189 int unlock = 0;
1190
1191 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1192 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1193 if (error)
1194 return error;
1195 unlock = 1;
1196 }
1197
1198 generic_fillattr(inode, stat);
1199 if (unlock)
1200 gfs2_glock_dq_uninit(&gh);
1201
1202 return 0;
1203}
1204
1205static int gfs2_setxattr(struct dentry *dentry, const char *name,
1206 const void *data, size_t size, int flags)
1207{
1208 struct inode *inode = dentry->d_inode;
1209 struct gfs2_inode *ip = GFS2_I(inode);
1210 struct gfs2_holder gh;
1211 int ret;
1212
1213 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1214 ret = gfs2_glock_nq(&gh);
1215 if (ret == 0) {
1216 ret = generic_setxattr(dentry, name, data, size, flags);
1217 gfs2_glock_dq(&gh);
1218 }
1219 gfs2_holder_uninit(&gh);
1220 return ret;
1221}
1222
1223static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1224 void *data, size_t size)
1225{
1226 struct inode *inode = dentry->d_inode;
1227 struct gfs2_inode *ip = GFS2_I(inode);
1228 struct gfs2_holder gh;
1229 int ret;
1230
1231 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1232 ret = gfs2_glock_nq(&gh);
1233 if (ret == 0) {
1234 ret = generic_getxattr(dentry, name, data, size);
1235 gfs2_glock_dq(&gh);
1236 }
1237 gfs2_holder_uninit(&gh);
1238 return ret;
1239}
1240
1241static int gfs2_removexattr(struct dentry *dentry, const char *name)
1242{
1243 struct inode *inode = dentry->d_inode;
1244 struct gfs2_inode *ip = GFS2_I(inode);
1245 struct gfs2_holder gh;
1246 int ret;
1247
1248 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1249 ret = gfs2_glock_nq(&gh);
1250 if (ret == 0) {
1251 ret = generic_removexattr(dentry, name);
1252 gfs2_glock_dq(&gh);
1253 }
1254 gfs2_holder_uninit(&gh);
1255 return ret;
1256}
1257
1258static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1259 u64 start, u64 len)
1260{
1261 struct gfs2_inode *ip = GFS2_I(inode);
1262 struct gfs2_holder gh;
1263 int ret;
1264
1265 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
1266 if (ret)
1267 return ret;
1268
1269 mutex_lock(&inode->i_mutex);
1270
1271 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
1272 if (ret)
1273 goto out;
1274
1275 if (gfs2_is_stuffed(ip)) {
1276 u64 phys = ip->i_no_addr << inode->i_blkbits;
1277 u64 size = i_size_read(inode);
1278 u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
1279 FIEMAP_EXTENT_DATA_INLINE;
1280 phys += sizeof(struct gfs2_dinode);
1281 phys += start;
1282 if (start + len > size)
1283 len = size - start;
1284 if (start < size)
1285 ret = fiemap_fill_next_extent(fieinfo, start, phys,
1286 len, flags);
1287 if (ret == 1)
1288 ret = 0;
1289 } else {
1290 ret = __generic_block_fiemap(inode, fieinfo, start, len,
1291 gfs2_block_map);
1292 }
1293
1294 gfs2_glock_dq_uninit(&gh);
1295out:
1296 mutex_unlock(&inode->i_mutex);
1297 return ret;
1298}
1299
1300const struct inode_operations gfs2_file_iops = {
1301 .permission = gfs2_permission,
1302 .setattr = gfs2_setattr,
1303 .getattr = gfs2_getattr,
1304 .setxattr = gfs2_setxattr,
1305 .getxattr = gfs2_getxattr,
1306 .listxattr = gfs2_listxattr,
1307 .removexattr = gfs2_removexattr,
1308 .fiemap = gfs2_fiemap,
1309};
1310
1311const struct inode_operations gfs2_dir_iops = {
1312 .create = gfs2_create,
1313 .lookup = gfs2_lookup,
1314 .link = gfs2_link,
1315 .unlink = gfs2_unlink,
1316 .symlink = gfs2_symlink,
1317 .mkdir = gfs2_mkdir,
1318 .rmdir = gfs2_rmdir,
1319 .mknod = gfs2_mknod,
1320 .rename = gfs2_rename,
1321 .permission = gfs2_permission,
1322 .setattr = gfs2_setattr,
1323 .getattr = gfs2_getattr,
1324 .setxattr = gfs2_setxattr,
1325 .getxattr = gfs2_getxattr,
1326 .listxattr = gfs2_listxattr,
1327 .removexattr = gfs2_removexattr,
1328 .fiemap = gfs2_fiemap,
1329};
1330
1331const struct inode_operations gfs2_symlink_iops = {
1332 .readlink = generic_readlink,
1333 .follow_link = gfs2_follow_link,
1334 .put_link = gfs2_put_link,
1335 .permission = gfs2_permission,
1336 .setattr = gfs2_setattr,
1337 .getattr = gfs2_getattr,
1338 .setxattr = gfs2_setxattr,
1339 .getxattr = gfs2_getxattr,
1340 .listxattr = gfs2_listxattr,
1341 .removexattr = gfs2_removexattr,
1342 .fiemap = gfs2_fiemap,
1343};
1344
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e23d9864c418..42e8d23bc047 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -38,6 +38,7 @@
38 38
39#include <linux/sched.h> 39#include <linux/sched.h>
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/mm.h>
41#include <linux/spinlock.h> 42#include <linux/spinlock.h>
42#include <linux/completion.h> 43#include <linux/completion.h>
43#include <linux/buffer_head.h> 44#include <linux/buffer_head.h>
@@ -77,19 +78,20 @@ static LIST_HEAD(qd_lru_list);
77static atomic_t qd_lru_count = ATOMIC_INIT(0); 78static atomic_t qd_lru_count = ATOMIC_INIT(0);
78static DEFINE_SPINLOCK(qd_lru_lock); 79static DEFINE_SPINLOCK(qd_lru_lock);
79 80
80int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 81int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc)
81{ 82{
82 struct gfs2_quota_data *qd; 83 struct gfs2_quota_data *qd;
83 struct gfs2_sbd *sdp; 84 struct gfs2_sbd *sdp;
85 int nr_to_scan = sc->nr_to_scan;
84 86
85 if (nr == 0) 87 if (nr_to_scan == 0)
86 goto out; 88 goto out;
87 89
88 if (!(gfp_mask & __GFP_FS)) 90 if (!(sc->gfp_mask & __GFP_FS))
89 return -1; 91 return -1;
90 92
91 spin_lock(&qd_lru_lock); 93 spin_lock(&qd_lru_lock);
92 while (nr && !list_empty(&qd_lru_list)) { 94 while (nr_to_scan && !list_empty(&qd_lru_list)) {
93 qd = list_entry(qd_lru_list.next, 95 qd = list_entry(qd_lru_list.next,
94 struct gfs2_quota_data, qd_reclaim); 96 struct gfs2_quota_data, qd_reclaim);
95 sdp = qd->qd_gl->gl_sbd; 97 sdp = qd->qd_gl->gl_sbd;
@@ -110,7 +112,7 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
110 spin_unlock(&qd_lru_lock); 112 spin_unlock(&qd_lru_lock);
111 kmem_cache_free(gfs2_quotad_cachep, qd); 113 kmem_cache_free(gfs2_quotad_cachep, qd);
112 spin_lock(&qd_lru_lock); 114 spin_lock(&qd_lru_lock);
113 nr--; 115 nr_to_scan--;
114 } 116 }
115 spin_unlock(&qd_lru_lock); 117 spin_unlock(&qd_lru_lock);
116 118
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index e7d236ca48bd..90bf1c302a98 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -12,6 +12,7 @@
12 12
13struct gfs2_inode; 13struct gfs2_inode;
14struct gfs2_sbd; 14struct gfs2_sbd;
15struct shrink_control;
15 16
16#define NO_QUOTA_CHANGE ((u32)-1) 17#define NO_QUOTA_CHANGE ((u32)-1)
17 18
@@ -51,7 +52,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
51 return ret; 52 return ret;
52} 53}
53 54
54extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask); 55extern int gfs2_shrink_qd_memory(struct shrinker *shrink,
56 struct shrink_control *sc);
55extern const struct quotactl_ops gfs2_quotactl_ops; 57extern const struct quotactl_ops gfs2_quotactl_ops;
56 58
57#endif /* __QUOTA_DOT_H__ */ 59#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 6fcae8469f6d..9b780df3fd54 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -78,10 +78,11 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
78 78
79static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, 79static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
80 unsigned char *buf2, unsigned int offset, 80 unsigned char *buf2, unsigned int offset,
81 unsigned int buflen, u32 block, 81 struct gfs2_bitmap *bi, u32 block,
82 unsigned char new_state) 82 unsigned char new_state)
83{ 83{
84 unsigned char *byte1, *byte2, *end, cur_state; 84 unsigned char *byte1, *byte2, *end, cur_state;
85 unsigned int buflen = bi->bi_len;
85 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; 86 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
86 87
87 byte1 = buf1 + offset + (block / GFS2_NBBY); 88 byte1 = buf1 + offset + (block / GFS2_NBBY);
@@ -92,6 +93,16 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
92 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; 93 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
93 94
94 if (unlikely(!valid_change[new_state * 4 + cur_state])) { 95 if (unlikely(!valid_change[new_state * 4 + cur_state])) {
96 printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, "
97 "new_state=%d\n",
98 (unsigned long long)block, cur_state, new_state);
99 printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n",
100 (unsigned long long)rgd->rd_addr,
101 (unsigned long)bi->bi_start);
102 printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n",
103 (unsigned long)bi->bi_offset,
104 (unsigned long)bi->bi_len);
105 dump_stack();
95 gfs2_consist_rgrpd(rgd); 106 gfs2_consist_rgrpd(rgd);
96 return; 107 return;
97 } 108 }
@@ -381,6 +392,7 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp)
381 392
382 if (gl) { 393 if (gl) {
383 gl->gl_object = NULL; 394 gl->gl_object = NULL;
395 gfs2_glock_add_to_lru(gl);
384 gfs2_glock_put(gl); 396 gfs2_glock_put(gl);
385 } 397 }
386 398
@@ -1365,7 +1377,7 @@ skip:
1365 1377
1366 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1378 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1367 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1379 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1368 bi->bi_len, blk, new_state); 1380 bi, blk, new_state);
1369 goal = blk; 1381 goal = blk;
1370 while (*n < elen) { 1382 while (*n < elen) {
1371 goal++; 1383 goal++;
@@ -1375,7 +1387,7 @@ skip:
1375 GFS2_BLKST_FREE) 1387 GFS2_BLKST_FREE)
1376 break; 1388 break;
1377 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1389 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1378 bi->bi_len, goal, new_state); 1390 bi, goal, new_state);
1379 (*n)++; 1391 (*n)++;
1380 } 1392 }
1381out: 1393out:
@@ -1432,7 +1444,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1432 } 1444 }
1433 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1445 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1434 gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset, 1446 gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset,
1435 bi->bi_len, buf_blk, new_state); 1447 bi, buf_blk, new_state);
1436 } 1448 }
1437 1449
1438 return rgd; 1450 return rgd;
@@ -1617,6 +1629,10 @@ void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1617 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1629 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1618 1630
1619 gfs2_trans_add_rg(rgd); 1631 gfs2_trans_add_rg(rgd);
1632
1633 /* Directories keep their data in the metadata address space */
1634 if (ip->i_depth)
1635 gfs2_meta_wipe(ip, bstart, blen);
1620} 1636}
1621 1637
1622/** 1638/**
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b9f28e66dad1..ed540e7018be 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -23,6 +23,7 @@
23#include <linux/time.h> 23#include <linux/time.h>
24#include <linux/wait.h> 24#include <linux/wait.h>
25#include <linux/writeback.h> 25#include <linux/writeback.h>
26#include <linux/backing-dev.h>
26 27
27#include "gfs2.h" 28#include "gfs2.h"
28#include "incore.h" 29#include "incore.h"
@@ -700,11 +701,47 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
700 mutex_unlock(&sdp->sd_freeze_lock); 701 mutex_unlock(&sdp->sd_freeze_lock);
701} 702}
702 703
704void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
705{
706 struct gfs2_dinode *str = buf;
707
708 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
709 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
710 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
711 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
712 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
713 str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
714 str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
715 str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
716 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
717 str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
718 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
719 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
720 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
721 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
722
723 str->di_goal_meta = cpu_to_be64(ip->i_goal);
724 str->di_goal_data = cpu_to_be64(ip->i_goal);
725 str->di_generation = cpu_to_be64(ip->i_generation);
726
727 str->di_flags = cpu_to_be32(ip->i_diskflags);
728 str->di_height = cpu_to_be16(ip->i_height);
729 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
730 !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
731 GFS2_FORMAT_DE : 0);
732 str->di_depth = cpu_to_be16(ip->i_depth);
733 str->di_entries = cpu_to_be32(ip->i_entries);
734
735 str->di_eattr = cpu_to_be64(ip->i_eattr);
736 str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
737 str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
738 str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
739}
703 740
704/** 741/**
705 * gfs2_write_inode - Make sure the inode is stable on the disk 742 * gfs2_write_inode - Make sure the inode is stable on the disk
706 * @inode: The inode 743 * @inode: The inode
707 * @sync: synchronous write flag 744 * @wbc: The writeback control structure
708 * 745 *
709 * Returns: errno 746 * Returns: errno
710 */ 747 */
@@ -713,15 +750,17 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
713{ 750{
714 struct gfs2_inode *ip = GFS2_I(inode); 751 struct gfs2_inode *ip = GFS2_I(inode);
715 struct gfs2_sbd *sdp = GFS2_SB(inode); 752 struct gfs2_sbd *sdp = GFS2_SB(inode);
753 struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
754 struct backing_dev_info *bdi = metamapping->backing_dev_info;
716 struct gfs2_holder gh; 755 struct gfs2_holder gh;
717 struct buffer_head *bh; 756 struct buffer_head *bh;
718 struct timespec atime; 757 struct timespec atime;
719 struct gfs2_dinode *di; 758 struct gfs2_dinode *di;
720 int ret = 0; 759 int ret = -EAGAIN;
721 760
722 /* Check this is a "normal" inode, etc */ 761 /* Skip timestamp update, if this is from a memalloc */
723 if (current->flags & PF_MEMALLOC) 762 if (current->flags & PF_MEMALLOC)
724 return 0; 763 goto do_flush;
725 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 764 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
726 if (ret) 765 if (ret)
727 goto do_flush; 766 goto do_flush;
@@ -745,6 +784,13 @@ do_unlock:
745do_flush: 784do_flush:
746 if (wbc->sync_mode == WB_SYNC_ALL) 785 if (wbc->sync_mode == WB_SYNC_ALL)
747 gfs2_log_flush(GFS2_SB(inode), ip->i_gl); 786 gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
787 filemap_fdatawrite(metamapping);
788 if (bdi->dirty_exceeded)
789 gfs2_ail1_flush(sdp, wbc);
790 if (!ret && (wbc->sync_mode == WB_SYNC_ALL))
791 ret = filemap_fdatawait(metamapping);
792 if (ret)
793 mark_inode_dirty_sync(inode);
748 return ret; 794 return ret;
749} 795}
750 796
@@ -874,8 +920,9 @@ restart:
874 920
875static int gfs2_sync_fs(struct super_block *sb, int wait) 921static int gfs2_sync_fs(struct super_block *sb, int wait)
876{ 922{
877 if (wait && sb->s_fs_info) 923 struct gfs2_sbd *sdp = sb->s_fs_info;
878 gfs2_log_flush(sb->s_fs_info, NULL); 924 if (wait && sdp)
925 gfs2_log_flush(sdp, NULL);
879 return 0; 926 return 0;
880} 927}
881 928
@@ -1308,6 +1355,78 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1308 return 0; 1355 return 0;
1309} 1356}
1310 1357
1358static void gfs2_final_release_pages(struct gfs2_inode *ip)
1359{
1360 struct inode *inode = &ip->i_inode;
1361 struct gfs2_glock *gl = ip->i_gl;
1362
1363 truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0);
1364 truncate_inode_pages(&inode->i_data, 0);
1365
1366 if (atomic_read(&gl->gl_revokes) == 0) {
1367 clear_bit(GLF_LFLUSH, &gl->gl_flags);
1368 clear_bit(GLF_DIRTY, &gl->gl_flags);
1369 }
1370}
1371
1372static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1373{
1374 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1375 struct gfs2_alloc *al;
1376 struct gfs2_rgrpd *rgd;
1377 int error;
1378
1379 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
1380 gfs2_consist_inode(ip);
1381 return -EIO;
1382 }
1383
1384 al = gfs2_alloc_get(ip);
1385 if (!al)
1386 return -ENOMEM;
1387
1388 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1389 if (error)
1390 goto out;
1391
1392 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
1393 if (error)
1394 goto out_qs;
1395
1396 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
1397 if (!rgd) {
1398 gfs2_consist_inode(ip);
1399 error = -EIO;
1400 goto out_rindex_relse;
1401 }
1402
1403 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
1404 &al->al_rgd_gh);
1405 if (error)
1406 goto out_rindex_relse;
1407
1408 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
1409 sdp->sd_jdesc->jd_blocks);
1410 if (error)
1411 goto out_rg_gunlock;
1412
1413 gfs2_free_di(rgd, ip);
1414
1415 gfs2_final_release_pages(ip);
1416
1417 gfs2_trans_end(sdp);
1418
1419out_rg_gunlock:
1420 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1421out_rindex_relse:
1422 gfs2_glock_dq_uninit(&al->al_ri_gh);
1423out_qs:
1424 gfs2_quota_unhold(ip);
1425out:
1426 gfs2_alloc_put(ip);
1427 return error;
1428}
1429
1311/* 1430/*
1312 * We have to (at the moment) hold the inodes main lock to cover 1431 * We have to (at the moment) hold the inodes main lock to cover
1313 * the gap between unlocking the shared lock on the iopen lock and 1432 * the gap between unlocking the shared lock on the iopen lock and
@@ -1371,15 +1490,13 @@ static void gfs2_evict_inode(struct inode *inode)
1371 } 1490 }
1372 1491
1373 error = gfs2_dinode_dealloc(ip); 1492 error = gfs2_dinode_dealloc(ip);
1374 if (error) 1493 goto out_unlock;
1375 goto out_unlock;
1376 1494
1377out_truncate: 1495out_truncate:
1378 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); 1496 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
1379 if (error) 1497 if (error)
1380 goto out_unlock; 1498 goto out_unlock;
1381 /* Needs to be done before glock release & also in a transaction */ 1499 gfs2_final_release_pages(ip);
1382 truncate_inode_pages(&inode->i_data, 0);
1383 gfs2_trans_end(sdp); 1500 gfs2_trans_end(sdp);
1384 1501
1385out_unlock: 1502out_unlock:
@@ -1394,6 +1511,7 @@ out:
1394 end_writeback(inode); 1511 end_writeback(inode);
1395 1512
1396 ip->i_gl->gl_object = NULL; 1513 ip->i_gl->gl_object = NULL;
1514 gfs2_glock_add_to_lru(ip->i_gl);
1397 gfs2_glock_put(ip->i_gl); 1515 gfs2_glock_put(ip->i_gl);
1398 ip->i_gl = NULL; 1516 ip->i_gl = NULL;
1399 if (ip->i_iopen_gh.gh_gl) { 1517 if (ip->i_iopen_gh.gh_gl) {
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 748ccb557c18..e20eab37bc80 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -81,7 +81,8 @@ static int gfs2_uuid_valid(const u8 *uuid)
81 81
82static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf) 82static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
83{ 83{
84 const u8 *uuid = sdp->sd_sb.sb_uuid; 84 struct super_block *s = sdp->sd_vfs;
85 const u8 *uuid = s->s_uuid;
85 buf[0] = '\0'; 86 buf[0] = '\0';
86 if (!gfs2_uuid_valid(uuid)) 87 if (!gfs2_uuid_valid(uuid))
87 return 0; 88 return 0;
@@ -616,7 +617,8 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
616 struct kobj_uevent_env *env) 617 struct kobj_uevent_env *env)
617{ 618{
618 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); 619 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
619 const u8 *uuid = sdp->sd_sb.sb_uuid; 620 struct super_block *s = sdp->sd_vfs;
621 const u8 *uuid = s->s_uuid;
620 622
621 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); 623 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
622 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); 624 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index cedb0bb96d96..5d07609ec57d 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -10,6 +10,7 @@
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/dlmconstants.h> 11#include <linux/dlmconstants.h>
12#include <linux/gfs2_ondisk.h> 12#include <linux/gfs2_ondisk.h>
13#include <linux/writeback.h>
13#include "incore.h" 14#include "incore.h"
14#include "glock.h" 15#include "glock.h"
15 16
@@ -40,7 +41,9 @@
40 {(1UL << GLF_REPLY_PENDING), "r" }, \ 41 {(1UL << GLF_REPLY_PENDING), "r" }, \
41 {(1UL << GLF_INITIAL), "I" }, \ 42 {(1UL << GLF_INITIAL), "I" }, \
42 {(1UL << GLF_FROZEN), "F" }, \ 43 {(1UL << GLF_FROZEN), "F" }, \
43 {(1UL << GLF_QUEUED), "q" }) 44 {(1UL << GLF_QUEUED), "q" }, \
45 {(1UL << GLF_LRU), "L" }, \
46 {(1UL << GLF_OBJECT), "o" })
44 47
45#ifndef NUMPTY 48#ifndef NUMPTY
46#define NUMPTY 49#define NUMPTY
@@ -94,7 +97,7 @@ TRACE_EVENT(gfs2_glock_state_change,
94 __entry->new_state = glock_trace_state(new_state); 97 __entry->new_state = glock_trace_state(new_state);
95 __entry->tgt_state = glock_trace_state(gl->gl_target); 98 __entry->tgt_state = glock_trace_state(gl->gl_target);
96 __entry->dmt_state = glock_trace_state(gl->gl_demote_state); 99 __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
97 __entry->flags = gl->gl_flags; 100 __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
98 ), 101 ),
99 102
100 TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s", 103 TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s",
@@ -127,7 +130,7 @@ TRACE_EVENT(gfs2_glock_put,
127 __entry->gltype = gl->gl_name.ln_type; 130 __entry->gltype = gl->gl_name.ln_type;
128 __entry->glnum = gl->gl_name.ln_number; 131 __entry->glnum = gl->gl_name.ln_number;
129 __entry->cur_state = glock_trace_state(gl->gl_state); 132 __entry->cur_state = glock_trace_state(gl->gl_state);
130 __entry->flags = gl->gl_flags; 133 __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
131 ), 134 ),
132 135
133 TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s", 136 TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s",
@@ -161,7 +164,7 @@ TRACE_EVENT(gfs2_demote_rq,
161 __entry->glnum = gl->gl_name.ln_number; 164 __entry->glnum = gl->gl_name.ln_number;
162 __entry->cur_state = glock_trace_state(gl->gl_state); 165 __entry->cur_state = glock_trace_state(gl->gl_state);
163 __entry->dmt_state = glock_trace_state(gl->gl_demote_state); 166 __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
164 __entry->flags = gl->gl_flags; 167 __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
165 ), 168 ),
166 169
167 TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s", 170 TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s",
@@ -318,6 +321,33 @@ TRACE_EVENT(gfs2_log_blocks,
318 MINOR(__entry->dev), __entry->blocks) 321 MINOR(__entry->dev), __entry->blocks)
319); 322);
320 323
324/* Writing back the AIL */
325TRACE_EVENT(gfs2_ail_flush,
326
327 TP_PROTO(const struct gfs2_sbd *sdp, const struct writeback_control *wbc, int start),
328
329 TP_ARGS(sdp, wbc, start),
330
331 TP_STRUCT__entry(
332 __field( dev_t, dev )
333 __field( int, start )
334 __field( int, sync_mode )
335 __field( long, nr_to_write )
336 ),
337
338 TP_fast_assign(
339 __entry->dev = sdp->sd_vfs->s_dev;
340 __entry->start = start;
341 __entry->sync_mode = wbc->sync_mode;
342 __entry->nr_to_write = wbc->nr_to_write;
343 ),
344
345 TP_printk("%u,%u ail flush %s %s %ld", MAJOR(__entry->dev),
346 MINOR(__entry->dev), __entry->start ? "start" : "end",
347 __entry->sync_mode == WB_SYNC_ALL ? "all" : "none",
348 __entry->nr_to_write)
349);
350
321/* Section 3 - bmap 351/* Section 3 - bmap
322 * 352 *
323 * Objectives: 353 * Objectives:
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b9eeb1cd03ff..e7a035781b7d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -412,10 +412,10 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
412 pgoff = offset >> PAGE_SHIFT; 412 pgoff = offset >> PAGE_SHIFT;
413 413
414 i_size_write(inode, offset); 414 i_size_write(inode, offset);
415 spin_lock(&mapping->i_mmap_lock); 415 mutex_lock(&mapping->i_mmap_mutex);
416 if (!prio_tree_empty(&mapping->i_mmap)) 416 if (!prio_tree_empty(&mapping->i_mmap))
417 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); 417 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
418 spin_unlock(&mapping->i_mmap_lock); 418 mutex_unlock(&mapping->i_mmap_mutex);
419 truncate_hugepages(inode, offset); 419 truncate_hugepages(inode, offset);
420 return 0; 420 return 0;
421} 421}
diff --git a/fs/inode.c b/fs/inode.c
index 33c963d08ab4..990d284877a1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -24,6 +24,7 @@
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <linux/async.h> 25#include <linux/async.h>
26#include <linux/posix_acl.h> 26#include <linux/posix_acl.h>
27#include <linux/prefetch.h>
27#include <linux/ima.h> 28#include <linux/ima.h>
28#include <linux/cred.h> 29#include <linux/cred.h>
29#include "internal.h" 30#include "internal.h"
@@ -325,12 +326,11 @@ void address_space_init_once(struct address_space *mapping)
325 memset(mapping, 0, sizeof(*mapping)); 326 memset(mapping, 0, sizeof(*mapping));
326 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); 327 INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
327 spin_lock_init(&mapping->tree_lock); 328 spin_lock_init(&mapping->tree_lock);
328 spin_lock_init(&mapping->i_mmap_lock); 329 mutex_init(&mapping->i_mmap_mutex);
329 INIT_LIST_HEAD(&mapping->private_list); 330 INIT_LIST_HEAD(&mapping->private_list);
330 spin_lock_init(&mapping->private_lock); 331 spin_lock_init(&mapping->private_lock);
331 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); 332 INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
332 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); 333 INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
333 mutex_init(&mapping->unmap_mutex);
334} 334}
335EXPORT_SYMBOL(address_space_init_once); 335EXPORT_SYMBOL(address_space_init_once);
336 336
@@ -751,8 +751,12 @@ static void prune_icache(int nr_to_scan)
751 * This function is passed the number of inodes to scan, and it returns the 751 * This function is passed the number of inodes to scan, and it returns the
752 * total number of remaining possibly-reclaimable inodes. 752 * total number of remaining possibly-reclaimable inodes.
753 */ 753 */
754static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 754static int shrink_icache_memory(struct shrinker *shrink,
755 struct shrink_control *sc)
755{ 756{
757 int nr = sc->nr_to_scan;
758 gfp_t gfp_mask = sc->gfp_mask;
759
756 if (nr) { 760 if (nr) {
757 /* 761 /*
758 * Nasty deadlock avoidance. We may hold various FS locks, 762 * Nasty deadlock avoidance. We may hold various FS locks,
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 69b180459463..72ffa974b0b8 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -302,12 +302,6 @@ void journal_commit_transaction(journal_t *journal)
302 * all outstanding updates to complete. 302 * all outstanding updates to complete.
303 */ 303 */
304 304
305#ifdef COMMIT_STATS
306 spin_lock(&journal->j_list_lock);
307 summarise_journal_usage(journal);
308 spin_unlock(&journal->j_list_lock);
309#endif
310
311 /* Do we need to erase the effects of a prior journal_flush? */ 305 /* Do we need to erase the effects of a prior journal_flush? */
312 if (journal->j_flags & JFS_FLUSHED) { 306 if (journal->j_flags & JFS_FLUSHED) {
313 jbd_debug(3, "super block updated\n"); 307 jbd_debug(3, "super block updated\n");
@@ -722,8 +716,13 @@ wait_for_iobuf:
722 required. */ 716 required. */
723 JBUFFER_TRACE(jh, "file as BJ_Forget"); 717 JBUFFER_TRACE(jh, "file as BJ_Forget");
724 journal_file_buffer(jh, commit_transaction, BJ_Forget); 718 journal_file_buffer(jh, commit_transaction, BJ_Forget);
725 /* Wake up any transactions which were waiting for this 719 /*
726 IO to complete */ 720 * Wake up any transactions which were waiting for this
721 * IO to complete. The barrier must be here so that changes
722 * by journal_file_buffer() take effect before wake_up_bit()
723 * does the waitqueue check.
724 */
725 smp_mb();
727 wake_up_bit(&bh->b_state, BH_Unshadow); 726 wake_up_bit(&bh->b_state, BH_Unshadow);
728 JBUFFER_TRACE(jh, "brelse shadowed buffer"); 727 JBUFFER_TRACE(jh, "brelse shadowed buffer");
729 __brelse(bh); 728 __brelse(bh);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b3713afaaa9e..e2d4285fbe90 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -437,9 +437,12 @@ int __log_space_left(journal_t *journal)
437int __log_start_commit(journal_t *journal, tid_t target) 437int __log_start_commit(journal_t *journal, tid_t target)
438{ 438{
439 /* 439 /*
440 * Are we already doing a recent enough commit? 440 * The only transaction we can possibly wait upon is the
441 * currently running transaction (if it exists). Otherwise,
442 * the target tid must be an old one.
441 */ 443 */
442 if (!tid_geq(journal->j_commit_request, target)) { 444 if (journal->j_running_transaction &&
445 journal->j_running_transaction->t_tid == target) {
443 /* 446 /*
444 * We want a new commit: OK, mark the request and wakeup the 447 * We want a new commit: OK, mark the request and wakeup the
445 * commit thread. We do _not_ do the commit ourselves. 448 * commit thread. We do _not_ do the commit ourselves.
@@ -451,7 +454,14 @@ int __log_start_commit(journal_t *journal, tid_t target)
451 journal->j_commit_sequence); 454 journal->j_commit_sequence);
452 wake_up(&journal->j_wait_commit); 455 wake_up(&journal->j_wait_commit);
453 return 1; 456 return 1;
454 } 457 } else if (!tid_geq(journal->j_commit_request, target))
458 /* This should never happen, but if it does, preserve
459 the evidence before kjournald goes into a loop and
460 increments j_commit_sequence beyond all recognition. */
461 WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n",
462 journal->j_commit_request, journal->j_commit_sequence,
463 target, journal->j_running_transaction ?
464 journal->j_running_transaction->t_tid : 0);
455 return 0; 465 return 0;
456} 466}
457 467
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 60d2319651b2..f7ee81a065da 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -266,7 +266,8 @@ static handle_t *new_handle(int nblocks)
266 * This function is visible to journal users (like ext3fs), so is not 266 * This function is visible to journal users (like ext3fs), so is not
267 * called with the journal already locked. 267 * called with the journal already locked.
268 * 268 *
269 * Return a pointer to a newly allocated handle, or NULL on failure 269 * Return a pointer to a newly allocated handle, or an ERR_PTR() value
270 * on failure.
270 */ 271 */
271handle_t *journal_start(journal_t *journal, int nblocks) 272handle_t *journal_start(journal_t *journal, int nblocks)
272{ 273{
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6e28000a4b21..29148a81c783 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -338,12 +338,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
338 * all outstanding updates to complete. 338 * all outstanding updates to complete.
339 */ 339 */
340 340
341#ifdef COMMIT_STATS
342 spin_lock(&journal->j_list_lock);
343 summarise_journal_usage(journal);
344 spin_unlock(&journal->j_list_lock);
345#endif
346
347 /* Do we need to erase the effects of a prior jbd2_journal_flush? */ 341 /* Do we need to erase the effects of a prior jbd2_journal_flush? */
348 if (journal->j_flags & JBD2_FLUSHED) { 342 if (journal->j_flags & JBD2_FLUSHED) {
349 jbd_debug(3, "super block updated\n"); 343 jbd_debug(3, "super block updated\n");
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 1adc8d455f0e..df0de27c2733 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -10,6 +10,7 @@
10#include <linux/blkdev.h> 10#include <linux/blkdev.h>
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/gfp.h> 12#include <linux/gfp.h>
13#include <linux/prefetch.h>
13 14
14#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) 15#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
15 16
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 9e22085231b3..d8d09380c7de 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -481,7 +481,7 @@ static int inode_write_alias(struct super_block *sb,
481 val = inode_val0(inode); 481 val = inode_val0(inode);
482 break; 482 break;
483 case INODE_USED_OFS: 483 case INODE_USED_OFS:
484 val = cpu_to_be64(li->li_used_bytes);; 484 val = cpu_to_be64(li->li_used_bytes);
485 break; 485 break;
486 case INODE_SIZE_OFS: 486 case INODE_SIZE_OFS:
487 val = cpu_to_be64(i_size_read(inode)); 487 val = cpu_to_be64(i_size_read(inode));
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 2f174be06555..8c32ef3ba88e 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -90,7 +90,8 @@ static DEFINE_SPINLOCK(mb_cache_spinlock);
90 * What the mbcache registers as to get shrunk dynamically. 90 * What the mbcache registers as to get shrunk dynamically.
91 */ 91 */
92 92
93static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); 93static int mb_cache_shrink_fn(struct shrinker *shrink,
94 struct shrink_control *sc);
94 95
95static struct shrinker mb_cache_shrinker = { 96static struct shrinker mb_cache_shrinker = {
96 .shrink = mb_cache_shrink_fn, 97 .shrink = mb_cache_shrink_fn,
@@ -156,18 +157,19 @@ forget:
156 * gets low. 157 * gets low.
157 * 158 *
158 * @shrink: (ignored) 159 * @shrink: (ignored)
159 * @nr_to_scan: Number of objects to scan 160 * @sc: shrink_control passed from reclaim
160 * @gfp_mask: (ignored)
161 * 161 *
162 * Returns the number of objects which are present in the cache. 162 * Returns the number of objects which are present in the cache.
163 */ 163 */
164static int 164static int
165mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) 165mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc)
166{ 166{
167 LIST_HEAD(free_list); 167 LIST_HEAD(free_list);
168 struct mb_cache *cache; 168 struct mb_cache *cache;
169 struct mb_cache_entry *entry, *tmp; 169 struct mb_cache_entry *entry, *tmp;
170 int count = 0; 170 int count = 0;
171 int nr_to_scan = sc->nr_to_scan;
172 gfp_t gfp_mask = sc->gfp_mask;
171 173
172 mb_debug("trying to free %d entries", nr_to_scan); 174 mb_debug("trying to free %d entries", nr_to_scan);
173 spin_lock(&mb_cache_spinlock); 175 spin_lock(&mb_cache_spinlock);
diff --git a/fs/namei.c b/fs/namei.c
index e3c4f112ebf7..6ff858c049c0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1378,12 +1378,12 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
1378{ 1378{
1379 int res; 1379 int res;
1380 1380
1381 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1382 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) { 1381 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
1383 path_put_conditional(path, nd); 1382 path_put_conditional(path, nd);
1384 path_put(&nd->path); 1383 path_put(&nd->path);
1385 return -ELOOP; 1384 return -ELOOP;
1386 } 1385 }
1386 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1387 1387
1388 nd->depth++; 1388 nd->depth++;
1389 current->link_count++; 1389 current->link_count++;
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 0250e4ce4893..202f370526a7 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -461,7 +461,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
461#endif 461#endif
462 struct ncp_entry_info finfo; 462 struct ncp_entry_info finfo;
463 463
464 data.wdog_pid = NULL; 464 memset(&data, 0, sizeof(data));
465 server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL); 465 server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL);
466 if (!server) 466 if (!server)
467 return -ENOMEM; 467 return -ENOMEM;
@@ -496,7 +496,6 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
496 struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data; 496 struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data;
497 497
498 data.flags = md->flags; 498 data.flags = md->flags;
499 data.int_flags = 0;
500 data.mounted_uid = md->mounted_uid; 499 data.mounted_uid = md->mounted_uid;
501 data.wdog_pid = find_get_pid(md->wdog_pid); 500 data.wdog_pid = find_get_pid(md->wdog_pid);
502 data.ncp_fd = md->ncp_fd; 501 data.ncp_fd = md->ncp_fd;
@@ -507,7 +506,6 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
507 data.file_mode = md->file_mode; 506 data.file_mode = md->file_mode;
508 data.dir_mode = md->dir_mode; 507 data.dir_mode = md->dir_mode;
509 data.info_fd = -1; 508 data.info_fd = -1;
510 data.mounted_vol[0] = 0;
511 } 509 }
512 break; 510 break;
513 default: 511 default:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7237672216c8..424e47773a84 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2042,11 +2042,14 @@ static void nfs_access_free_list(struct list_head *head)
2042 } 2042 }
2043} 2043}
2044 2044
2045int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) 2045int nfs_access_cache_shrinker(struct shrinker *shrink,
2046 struct shrink_control *sc)
2046{ 2047{
2047 LIST_HEAD(head); 2048 LIST_HEAD(head);
2048 struct nfs_inode *nfsi, *next; 2049 struct nfs_inode *nfsi, *next;
2049 struct nfs_access_entry *cache; 2050 struct nfs_access_entry *cache;
2051 int nr_to_scan = sc->nr_to_scan;
2052 gfp_t gfp_mask = sc->gfp_mask;
2050 2053
2051 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) 2054 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
2052 return (nr_to_scan == 0) ? 0 : -1; 2055 return (nr_to_scan == 0) ? 0 : -1;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ce118ce885dd..2df6ca7b5898 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -234,7 +234,7 @@ extern int nfs_init_client(struct nfs_client *clp,
234 234
235/* dir.c */ 235/* dir.c */
236extern int nfs_access_cache_shrinker(struct shrinker *shrink, 236extern int nfs_access_cache_shrinker(struct shrinker *shrink,
237 int nr_to_scan, gfp_t gfp_mask); 237 struct shrink_control *sc);
238 238
239/* inode.c */ 239/* inode.c */
240extern struct workqueue_struct *nfsiod_workqueue; 240extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 5232d3e8fb2f..a2e2402b2afb 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -8,7 +8,7 @@
8 * Statistsics for the reply cache 8 * Statistsics for the reply cache
9 * fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache> 9 * fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache>
10 * statistics for filehandle lookup 10 * statistics for filehandle lookup
11 * io <bytes-read> <bytes-writtten> 11 * io <bytes-read> <bytes-written>
12 * statistics for IO throughput 12 * statistics for IO throughput
13 * th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%> 13 * th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%>
14 * time (seconds) when nfsd thread usage above thresholds 14 * time (seconds) when nfsd thread usage above thresholds
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index f7684483785e..eed4d7b26249 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -489,8 +489,8 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
489void nilfs_palloc_commit_alloc_entry(struct inode *inode, 489void nilfs_palloc_commit_alloc_entry(struct inode *inode,
490 struct nilfs_palloc_req *req) 490 struct nilfs_palloc_req *req)
491{ 491{
492 nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); 492 mark_buffer_dirty(req->pr_bitmap_bh);
493 nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); 493 mark_buffer_dirty(req->pr_desc_bh);
494 nilfs_mdt_mark_dirty(inode); 494 nilfs_mdt_mark_dirty(inode);
495 495
496 brelse(req->pr_bitmap_bh); 496 brelse(req->pr_bitmap_bh);
@@ -527,8 +527,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
527 kunmap(req->pr_bitmap_bh->b_page); 527 kunmap(req->pr_bitmap_bh->b_page);
528 kunmap(req->pr_desc_bh->b_page); 528 kunmap(req->pr_desc_bh->b_page);
529 529
530 nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); 530 mark_buffer_dirty(req->pr_desc_bh);
531 nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); 531 mark_buffer_dirty(req->pr_bitmap_bh);
532 nilfs_mdt_mark_dirty(inode); 532 nilfs_mdt_mark_dirty(inode);
533 533
534 brelse(req->pr_bitmap_bh); 534 brelse(req->pr_bitmap_bh);
@@ -683,8 +683,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
683 kunmap(bitmap_bh->b_page); 683 kunmap(bitmap_bh->b_page);
684 kunmap(desc_bh->b_page); 684 kunmap(desc_bh->b_page);
685 685
686 nilfs_mdt_mark_buffer_dirty(desc_bh); 686 mark_buffer_dirty(desc_bh);
687 nilfs_mdt_mark_buffer_dirty(bitmap_bh); 687 mark_buffer_dirty(bitmap_bh);
688 nilfs_mdt_mark_dirty(inode); 688 nilfs_mdt_mark_dirty(inode);
689 689
690 brelse(bitmap_bh); 690 brelse(bitmap_bh);
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 4723f04e9b12..aadbd0b5e3e8 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -34,7 +34,9 @@
34 34
35struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) 35struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
36{ 36{
37 return NILFS_I_NILFS(bmap->b_inode)->ns_dat; 37 struct the_nilfs *nilfs = bmap->b_inode->i_sb->s_fs_info;
38
39 return nilfs->ns_dat;
38} 40}
39 41
40static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap, 42static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap,
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 609cd223eea8..a35ae35e6932 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -34,12 +34,6 @@
34#include "page.h" 34#include "page.h"
35#include "btnode.h" 35#include "btnode.h"
36 36
37void nilfs_btnode_cache_init(struct address_space *btnc,
38 struct backing_dev_info *bdi)
39{
40 nilfs_mapping_init(btnc, bdi);
41}
42
43void nilfs_btnode_cache_clear(struct address_space *btnc) 37void nilfs_btnode_cache_clear(struct address_space *btnc)
44{ 38{
45 invalidate_mapping_pages(btnc, 0, -1); 39 invalidate_mapping_pages(btnc, 0, -1);
@@ -62,7 +56,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
62 BUG(); 56 BUG();
63 } 57 }
64 memset(bh->b_data, 0, 1 << inode->i_blkbits); 58 memset(bh->b_data, 0, 1 << inode->i_blkbits);
65 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 59 bh->b_bdev = inode->i_sb->s_bdev;
66 bh->b_blocknr = blocknr; 60 bh->b_blocknr = blocknr;
67 set_buffer_mapped(bh); 61 set_buffer_mapped(bh);
68 set_buffer_uptodate(bh); 62 set_buffer_uptodate(bh);
@@ -94,10 +88,11 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
94 if (pblocknr == 0) { 88 if (pblocknr == 0) {
95 pblocknr = blocknr; 89 pblocknr = blocknr;
96 if (inode->i_ino != NILFS_DAT_INO) { 90 if (inode->i_ino != NILFS_DAT_INO) {
97 struct inode *dat = NILFS_I_NILFS(inode)->ns_dat; 91 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
98 92
99 /* blocknr is a virtual block number */ 93 /* blocknr is a virtual block number */
100 err = nilfs_dat_translate(dat, blocknr, &pblocknr); 94 err = nilfs_dat_translate(nilfs->ns_dat, blocknr,
95 &pblocknr);
101 if (unlikely(err)) { 96 if (unlikely(err)) {
102 brelse(bh); 97 brelse(bh);
103 goto out_locked; 98 goto out_locked;
@@ -120,7 +115,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
120 goto found; 115 goto found;
121 } 116 }
122 set_buffer_mapped(bh); 117 set_buffer_mapped(bh);
123 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 118 bh->b_bdev = inode->i_sb->s_bdev;
124 bh->b_blocknr = pblocknr; /* set block address for read */ 119 bh->b_blocknr = pblocknr; /* set block address for read */
125 bh->b_end_io = end_buffer_read_sync; 120 bh->b_end_io = end_buffer_read_sync;
126 get_bh(bh); 121 get_bh(bh);
@@ -259,7 +254,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
259 "invalid oldkey %lld (newkey=%lld)", 254 "invalid oldkey %lld (newkey=%lld)",
260 (unsigned long long)oldkey, 255 (unsigned long long)oldkey,
261 (unsigned long long)newkey); 256 (unsigned long long)newkey);
262 nilfs_btnode_mark_dirty(obh); 257 mark_buffer_dirty(obh);
263 258
264 spin_lock_irq(&btnc->tree_lock); 259 spin_lock_irq(&btnc->tree_lock);
265 radix_tree_delete(&btnc->page_tree, oldkey); 260 radix_tree_delete(&btnc->page_tree, oldkey);
@@ -271,7 +266,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
271 unlock_page(opage); 266 unlock_page(opage);
272 } else { 267 } else {
273 nilfs_copy_buffer(nbh, obh); 268 nilfs_copy_buffer(nbh, obh);
274 nilfs_btnode_mark_dirty(nbh); 269 mark_buffer_dirty(nbh);
275 270
276 nbh->b_blocknr = newkey; 271 nbh->b_blocknr = newkey;
277 ctxt->bh = nbh; 272 ctxt->bh = nbh;
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 1b8ebd888c28..3a4dd2d8d3fc 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
37 struct buffer_head *newbh; 37 struct buffer_head *newbh;
38}; 38};
39 39
40void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
41void nilfs_btnode_cache_clear(struct address_space *); 40void nilfs_btnode_cache_clear(struct address_space *);
42struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, 41struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
43 __u64 blocknr); 42 __u64 blocknr);
@@ -51,7 +50,4 @@ void nilfs_btnode_commit_change_key(struct address_space *,
51void nilfs_btnode_abort_change_key(struct address_space *, 50void nilfs_btnode_abort_change_key(struct address_space *,
52 struct nilfs_btnode_chkey_ctxt *); 51 struct nilfs_btnode_chkey_ctxt *);
53 52
54#define nilfs_btnode_mark_dirty(bh) nilfs_mark_buffer_dirty(bh)
55
56
57#endif /* _NILFS_BTNODE_H */ 53#endif /* _NILFS_BTNODE_H */
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index d451ae0e0bf3..7eafe468a29c 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -714,7 +714,7 @@ static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
714 nilfs_btree_get_nonroot_node(path, level), 714 nilfs_btree_get_nonroot_node(path, level),
715 path[level].bp_index, key); 715 path[level].bp_index, key);
716 if (!buffer_dirty(path[level].bp_bh)) 716 if (!buffer_dirty(path[level].bp_bh))
717 nilfs_btnode_mark_dirty(path[level].bp_bh); 717 mark_buffer_dirty(path[level].bp_bh);
718 } while ((path[level].bp_index == 0) && 718 } while ((path[level].bp_index == 0) &&
719 (++level < nilfs_btree_height(btree) - 1)); 719 (++level < nilfs_btree_height(btree) - 1));
720 } 720 }
@@ -739,7 +739,7 @@ static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
739 nilfs_btree_node_insert(node, path[level].bp_index, 739 nilfs_btree_node_insert(node, path[level].bp_index,
740 *keyp, *ptrp, ncblk); 740 *keyp, *ptrp, ncblk);
741 if (!buffer_dirty(path[level].bp_bh)) 741 if (!buffer_dirty(path[level].bp_bh))
742 nilfs_btnode_mark_dirty(path[level].bp_bh); 742 mark_buffer_dirty(path[level].bp_bh);
743 743
744 if (path[level].bp_index == 0) 744 if (path[level].bp_index == 0)
745 nilfs_btree_promote_key(btree, path, level + 1, 745 nilfs_btree_promote_key(btree, path, level + 1,
@@ -777,9 +777,9 @@ static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
777 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 777 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
778 778
779 if (!buffer_dirty(path[level].bp_bh)) 779 if (!buffer_dirty(path[level].bp_bh))
780 nilfs_btnode_mark_dirty(path[level].bp_bh); 780 mark_buffer_dirty(path[level].bp_bh);
781 if (!buffer_dirty(path[level].bp_sib_bh)) 781 if (!buffer_dirty(path[level].bp_sib_bh))
782 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 782 mark_buffer_dirty(path[level].bp_sib_bh);
783 783
784 nilfs_btree_promote_key(btree, path, level + 1, 784 nilfs_btree_promote_key(btree, path, level + 1,
785 nilfs_btree_node_get_key(node, 0)); 785 nilfs_btree_node_get_key(node, 0));
@@ -823,9 +823,9 @@ static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
823 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 823 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
824 824
825 if (!buffer_dirty(path[level].bp_bh)) 825 if (!buffer_dirty(path[level].bp_bh))
826 nilfs_btnode_mark_dirty(path[level].bp_bh); 826 mark_buffer_dirty(path[level].bp_bh);
827 if (!buffer_dirty(path[level].bp_sib_bh)) 827 if (!buffer_dirty(path[level].bp_sib_bh))
828 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 828 mark_buffer_dirty(path[level].bp_sib_bh);
829 829
830 path[level + 1].bp_index++; 830 path[level + 1].bp_index++;
831 nilfs_btree_promote_key(btree, path, level + 1, 831 nilfs_btree_promote_key(btree, path, level + 1,
@@ -870,9 +870,9 @@ static void nilfs_btree_split(struct nilfs_bmap *btree,
870 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 870 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
871 871
872 if (!buffer_dirty(path[level].bp_bh)) 872 if (!buffer_dirty(path[level].bp_bh))
873 nilfs_btnode_mark_dirty(path[level].bp_bh); 873 mark_buffer_dirty(path[level].bp_bh);
874 if (!buffer_dirty(path[level].bp_sib_bh)) 874 if (!buffer_dirty(path[level].bp_sib_bh))
875 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 875 mark_buffer_dirty(path[level].bp_sib_bh);
876 876
877 newkey = nilfs_btree_node_get_key(right, 0); 877 newkey = nilfs_btree_node_get_key(right, 0);
878 newptr = path[level].bp_newreq.bpr_ptr; 878 newptr = path[level].bp_newreq.bpr_ptr;
@@ -919,7 +919,7 @@ static void nilfs_btree_grow(struct nilfs_bmap *btree,
919 nilfs_btree_node_set_level(root, level + 1); 919 nilfs_btree_node_set_level(root, level + 1);
920 920
921 if (!buffer_dirty(path[level].bp_sib_bh)) 921 if (!buffer_dirty(path[level].bp_sib_bh))
922 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 922 mark_buffer_dirty(path[level].bp_sib_bh);
923 923
924 path[level].bp_bh = path[level].bp_sib_bh; 924 path[level].bp_bh = path[level].bp_sib_bh;
925 path[level].bp_sib_bh = NULL; 925 path[level].bp_sib_bh = NULL;
@@ -1194,7 +1194,7 @@ static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1194 nilfs_btree_node_delete(node, path[level].bp_index, 1194 nilfs_btree_node_delete(node, path[level].bp_index,
1195 keyp, ptrp, ncblk); 1195 keyp, ptrp, ncblk);
1196 if (!buffer_dirty(path[level].bp_bh)) 1196 if (!buffer_dirty(path[level].bp_bh))
1197 nilfs_btnode_mark_dirty(path[level].bp_bh); 1197 mark_buffer_dirty(path[level].bp_bh);
1198 if (path[level].bp_index == 0) 1198 if (path[level].bp_index == 0)
1199 nilfs_btree_promote_key(btree, path, level + 1, 1199 nilfs_btree_promote_key(btree, path, level + 1,
1200 nilfs_btree_node_get_key(node, 0)); 1200 nilfs_btree_node_get_key(node, 0));
@@ -1226,9 +1226,9 @@ static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1226 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); 1226 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1227 1227
1228 if (!buffer_dirty(path[level].bp_bh)) 1228 if (!buffer_dirty(path[level].bp_bh))
1229 nilfs_btnode_mark_dirty(path[level].bp_bh); 1229 mark_buffer_dirty(path[level].bp_bh);
1230 if (!buffer_dirty(path[level].bp_sib_bh)) 1230 if (!buffer_dirty(path[level].bp_sib_bh))
1231 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1231 mark_buffer_dirty(path[level].bp_sib_bh);
1232 1232
1233 nilfs_btree_promote_key(btree, path, level + 1, 1233 nilfs_btree_promote_key(btree, path, level + 1,
1234 nilfs_btree_node_get_key(node, 0)); 1234 nilfs_btree_node_get_key(node, 0));
@@ -1258,9 +1258,9 @@ static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1258 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1258 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1259 1259
1260 if (!buffer_dirty(path[level].bp_bh)) 1260 if (!buffer_dirty(path[level].bp_bh))
1261 nilfs_btnode_mark_dirty(path[level].bp_bh); 1261 mark_buffer_dirty(path[level].bp_bh);
1262 if (!buffer_dirty(path[level].bp_sib_bh)) 1262 if (!buffer_dirty(path[level].bp_sib_bh))
1263 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1263 mark_buffer_dirty(path[level].bp_sib_bh);
1264 1264
1265 path[level + 1].bp_index++; 1265 path[level + 1].bp_index++;
1266 nilfs_btree_promote_key(btree, path, level + 1, 1266 nilfs_btree_promote_key(btree, path, level + 1,
@@ -1289,7 +1289,7 @@ static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1289 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 1289 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1290 1290
1291 if (!buffer_dirty(path[level].bp_sib_bh)) 1291 if (!buffer_dirty(path[level].bp_sib_bh))
1292 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1292 mark_buffer_dirty(path[level].bp_sib_bh);
1293 1293
1294 nilfs_btnode_delete(path[level].bp_bh); 1294 nilfs_btnode_delete(path[level].bp_bh);
1295 path[level].bp_bh = path[level].bp_sib_bh; 1295 path[level].bp_bh = path[level].bp_sib_bh;
@@ -1315,7 +1315,7 @@ static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1315 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1315 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1316 1316
1317 if (!buffer_dirty(path[level].bp_bh)) 1317 if (!buffer_dirty(path[level].bp_bh))
1318 nilfs_btnode_mark_dirty(path[level].bp_bh); 1318 mark_buffer_dirty(path[level].bp_bh);
1319 1319
1320 nilfs_btnode_delete(path[level].bp_sib_bh); 1320 nilfs_btnode_delete(path[level].bp_sib_bh);
1321 path[level].bp_sib_bh = NULL; 1321 path[level].bp_sib_bh = NULL;
@@ -1709,7 +1709,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1709 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); 1709 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1710 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); 1710 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1711 if (!buffer_dirty(bh)) 1711 if (!buffer_dirty(bh))
1712 nilfs_btnode_mark_dirty(bh); 1712 mark_buffer_dirty(bh);
1713 if (!nilfs_bmap_dirty(btree)) 1713 if (!nilfs_bmap_dirty(btree))
1714 nilfs_bmap_set_dirty(btree); 1714 nilfs_bmap_set_dirty(btree);
1715 1715
@@ -1787,7 +1787,7 @@ static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1787{ 1787{
1788 while ((++level < nilfs_btree_height(btree) - 1) && 1788 while ((++level < nilfs_btree_height(btree) - 1) &&
1789 !buffer_dirty(path[level].bp_bh)) 1789 !buffer_dirty(path[level].bp_bh))
1790 nilfs_btnode_mark_dirty(path[level].bp_bh); 1790 mark_buffer_dirty(path[level].bp_bh);
1791 1791
1792 return 0; 1792 return 0;
1793} 1793}
@@ -2229,7 +2229,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2229 } 2229 }
2230 2230
2231 if (!buffer_dirty(bh)) 2231 if (!buffer_dirty(bh))
2232 nilfs_btnode_mark_dirty(bh); 2232 mark_buffer_dirty(bh);
2233 brelse(bh); 2233 brelse(bh);
2234 if (!nilfs_bmap_dirty(btree)) 2234 if (!nilfs_bmap_dirty(btree))
2235 nilfs_bmap_set_dirty(btree); 2235 nilfs_bmap_set_dirty(btree);
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 5ff15a8a1024..c9b342c8b503 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -216,14 +216,14 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
216 if (!nilfs_cpfile_is_in_first(cpfile, cno)) 216 if (!nilfs_cpfile_is_in_first(cpfile, cno))
217 nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh, 217 nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh,
218 kaddr, 1); 218 kaddr, 1);
219 nilfs_mdt_mark_buffer_dirty(cp_bh); 219 mark_buffer_dirty(cp_bh);
220 220
221 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 221 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
222 header = nilfs_cpfile_block_get_header(cpfile, header_bh, 222 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
223 kaddr); 223 kaddr);
224 le64_add_cpu(&header->ch_ncheckpoints, 1); 224 le64_add_cpu(&header->ch_ncheckpoints, 1);
225 kunmap_atomic(kaddr, KM_USER0); 225 kunmap_atomic(kaddr, KM_USER0);
226 nilfs_mdt_mark_buffer_dirty(header_bh); 226 mark_buffer_dirty(header_bh);
227 nilfs_mdt_mark_dirty(cpfile); 227 nilfs_mdt_mark_dirty(cpfile);
228 } 228 }
229 229
@@ -326,7 +326,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
326 } 326 }
327 if (nicps > 0) { 327 if (nicps > 0) {
328 tnicps += nicps; 328 tnicps += nicps;
329 nilfs_mdt_mark_buffer_dirty(cp_bh); 329 mark_buffer_dirty(cp_bh);
330 nilfs_mdt_mark_dirty(cpfile); 330 nilfs_mdt_mark_dirty(cpfile);
331 if (!nilfs_cpfile_is_in_first(cpfile, cno)) { 331 if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
332 count = 332 count =
@@ -358,7 +358,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
358 header = nilfs_cpfile_block_get_header(cpfile, header_bh, 358 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
359 kaddr); 359 kaddr);
360 le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); 360 le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps);
361 nilfs_mdt_mark_buffer_dirty(header_bh); 361 mark_buffer_dirty(header_bh);
362 nilfs_mdt_mark_dirty(cpfile); 362 nilfs_mdt_mark_dirty(cpfile);
363 kunmap_atomic(kaddr, KM_USER0); 363 kunmap_atomic(kaddr, KM_USER0);
364 } 364 }
@@ -671,10 +671,10 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
671 le64_add_cpu(&header->ch_nsnapshots, 1); 671 le64_add_cpu(&header->ch_nsnapshots, 1);
672 kunmap_atomic(kaddr, KM_USER0); 672 kunmap_atomic(kaddr, KM_USER0);
673 673
674 nilfs_mdt_mark_buffer_dirty(prev_bh); 674 mark_buffer_dirty(prev_bh);
675 nilfs_mdt_mark_buffer_dirty(curr_bh); 675 mark_buffer_dirty(curr_bh);
676 nilfs_mdt_mark_buffer_dirty(cp_bh); 676 mark_buffer_dirty(cp_bh);
677 nilfs_mdt_mark_buffer_dirty(header_bh); 677 mark_buffer_dirty(header_bh);
678 nilfs_mdt_mark_dirty(cpfile); 678 nilfs_mdt_mark_dirty(cpfile);
679 679
680 brelse(prev_bh); 680 brelse(prev_bh);
@@ -774,10 +774,10 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
774 le64_add_cpu(&header->ch_nsnapshots, -1); 774 le64_add_cpu(&header->ch_nsnapshots, -1);
775 kunmap_atomic(kaddr, KM_USER0); 775 kunmap_atomic(kaddr, KM_USER0);
776 776
777 nilfs_mdt_mark_buffer_dirty(next_bh); 777 mark_buffer_dirty(next_bh);
778 nilfs_mdt_mark_buffer_dirty(prev_bh); 778 mark_buffer_dirty(prev_bh);
779 nilfs_mdt_mark_buffer_dirty(cp_bh); 779 mark_buffer_dirty(cp_bh);
780 nilfs_mdt_mark_buffer_dirty(header_bh); 780 mark_buffer_dirty(header_bh);
781 nilfs_mdt_mark_dirty(cpfile); 781 nilfs_mdt_mark_dirty(cpfile);
782 782
783 brelse(prev_bh); 783 brelse(prev_bh);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 59e5fe742f7b..fcc2f869af16 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -54,7 +54,7 @@ static int nilfs_dat_prepare_entry(struct inode *dat,
54static void nilfs_dat_commit_entry(struct inode *dat, 54static void nilfs_dat_commit_entry(struct inode *dat,
55 struct nilfs_palloc_req *req) 55 struct nilfs_palloc_req *req)
56{ 56{
57 nilfs_mdt_mark_buffer_dirty(req->pr_entry_bh); 57 mark_buffer_dirty(req->pr_entry_bh);
58 nilfs_mdt_mark_dirty(dat); 58 nilfs_mdt_mark_dirty(dat);
59 brelse(req->pr_entry_bh); 59 brelse(req->pr_entry_bh);
60} 60}
@@ -361,7 +361,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
361 entry->de_blocknr = cpu_to_le64(blocknr); 361 entry->de_blocknr = cpu_to_le64(blocknr);
362 kunmap_atomic(kaddr, KM_USER0); 362 kunmap_atomic(kaddr, KM_USER0);
363 363
364 nilfs_mdt_mark_buffer_dirty(entry_bh); 364 mark_buffer_dirty(entry_bh);
365 nilfs_mdt_mark_dirty(dat); 365 nilfs_mdt_mark_dirty(dat);
366 366
367 brelse(entry_bh); 367 brelse(entry_bh);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 397e73258631..d7eeca62febd 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -111,7 +111,6 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
111 nilfs_transaction_commit(inode->i_sb); 111 nilfs_transaction_commit(inode->i_sb);
112 112
113 mapped: 113 mapped:
114 SetPageChecked(page);
115 wait_on_page_writeback(page); 114 wait_on_page_writeback(page);
116 return VM_FAULT_LOCKED; 115 return VM_FAULT_LOCKED;
117} 116}
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 1c2a3e23f8b2..08a07a218d26 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -48,9 +48,6 @@
48#include "dat.h" 48#include "dat.h"
49#include "ifile.h" 49#include "ifile.h"
50 50
51static const struct address_space_operations def_gcinode_aops = {
52};
53
54/* 51/*
55 * nilfs_gccache_submit_read_data() - add data buffer and submit read request 52 * nilfs_gccache_submit_read_data() - add data buffer and submit read request
56 * @inode - gc inode 53 * @inode - gc inode
@@ -87,9 +84,9 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
87 goto out; 84 goto out;
88 85
89 if (pbn == 0) { 86 if (pbn == 0) {
90 struct inode *dat_inode = NILFS_I_NILFS(inode)->ns_dat; 87 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
91 /* use original dat, not gc dat. */ 88
92 err = nilfs_dat_translate(dat_inode, vbn, &pbn); 89 err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn);
93 if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */ 90 if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */
94 brelse(bh); 91 brelse(bh);
95 goto failed; 92 goto failed;
@@ -103,7 +100,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
103 } 100 }
104 101
105 if (!buffer_mapped(bh)) { 102 if (!buffer_mapped(bh)) {
106 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 103 bh->b_bdev = inode->i_sb->s_bdev;
107 set_buffer_mapped(bh); 104 set_buffer_mapped(bh);
108 } 105 }
109 bh->b_blocknr = pbn; 106 bh->b_blocknr = pbn;
@@ -160,15 +157,11 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
160 if (buffer_dirty(bh)) 157 if (buffer_dirty(bh))
161 return -EEXIST; 158 return -EEXIST;
162 159
163 if (buffer_nilfs_node(bh)) { 160 if (buffer_nilfs_node(bh) && nilfs_btree_broken_node_block(bh)) {
164 if (nilfs_btree_broken_node_block(bh)) { 161 clear_buffer_uptodate(bh);
165 clear_buffer_uptodate(bh); 162 return -EIO;
166 return -EIO;
167 }
168 nilfs_btnode_mark_dirty(bh);
169 } else {
170 nilfs_mark_buffer_dirty(bh);
171 } 163 }
164 mark_buffer_dirty(bh);
172 return 0; 165 return 0;
173} 166}
174 167
@@ -178,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode)
178 171
179 inode->i_mode = S_IFREG; 172 inode->i_mode = S_IFREG;
180 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 173 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
181 inode->i_mapping->a_ops = &def_gcinode_aops; 174 inode->i_mapping->a_ops = &empty_aops;
182 inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; 175 inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi;
183 176
184 ii->i_flags = 0; 177 ii->i_flags = 0;
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index bfc73d3a30ed..684d76300a80 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -80,7 +80,7 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
80 return ret; 80 return ret;
81 } 81 }
82 nilfs_palloc_commit_alloc_entry(ifile, &req); 82 nilfs_palloc_commit_alloc_entry(ifile, &req);
83 nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); 83 mark_buffer_dirty(req.pr_entry_bh);
84 nilfs_mdt_mark_dirty(ifile); 84 nilfs_mdt_mark_dirty(ifile);
85 *out_ino = (ino_t)req.pr_entry_nr; 85 *out_ino = (ino_t)req.pr_entry_nr;
86 *out_bh = req.pr_entry_bh; 86 *out_bh = req.pr_entry_bh;
@@ -128,7 +128,7 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
128 raw_inode->i_flags = 0; 128 raw_inode->i_flags = 0;
129 kunmap_atomic(kaddr, KM_USER0); 129 kunmap_atomic(kaddr, KM_USER0);
130 130
131 nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); 131 mark_buffer_dirty(req.pr_entry_bh);
132 brelse(req.pr_entry_bh); 132 brelse(req.pr_entry_bh);
133 133
134 nilfs_palloc_commit_free_entry(ifile, &req); 134 nilfs_palloc_commit_free_entry(ifile, &req);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index c0aa27490c02..587f18432832 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -74,14 +74,14 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
74 struct buffer_head *bh_result, int create) 74 struct buffer_head *bh_result, int create)
75{ 75{
76 struct nilfs_inode_info *ii = NILFS_I(inode); 76 struct nilfs_inode_info *ii = NILFS_I(inode);
77 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
77 __u64 blknum = 0; 78 __u64 blknum = 0;
78 int err = 0, ret; 79 int err = 0, ret;
79 struct inode *dat = NILFS_I_NILFS(inode)->ns_dat;
80 unsigned maxblocks = bh_result->b_size >> inode->i_blkbits; 80 unsigned maxblocks = bh_result->b_size >> inode->i_blkbits;
81 81
82 down_read(&NILFS_MDT(dat)->mi_sem); 82 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
83 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); 83 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
84 up_read(&NILFS_MDT(dat)->mi_sem); 84 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
85 if (ret >= 0) { /* found */ 85 if (ret >= 0) { /* found */
86 map_bh(bh_result, inode->i_sb, blknum); 86 map_bh(bh_result, inode->i_sb, blknum);
87 if (ret > 0) 87 if (ret > 0)
@@ -596,6 +596,16 @@ void nilfs_write_inode_common(struct inode *inode,
596 raw_inode->i_flags = cpu_to_le32(ii->i_flags); 596 raw_inode->i_flags = cpu_to_le32(ii->i_flags);
597 raw_inode->i_generation = cpu_to_le32(inode->i_generation); 597 raw_inode->i_generation = cpu_to_le32(inode->i_generation);
598 598
599 if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) {
600 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
601
602 /* zero-fill unused portion in the case of super root block */
603 raw_inode->i_xattr = 0;
604 raw_inode->i_pad = 0;
605 memset((void *)raw_inode + sizeof(*raw_inode), 0,
606 nilfs->ns_inode_size - sizeof(*raw_inode));
607 }
608
599 if (has_bmap) 609 if (has_bmap)
600 nilfs_bmap_write(ii->i_bmap, raw_inode); 610 nilfs_bmap_write(ii->i_bmap, raw_inode);
601 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 611 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
@@ -872,8 +882,7 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
872 return -EINVAL; /* NILFS_I_DIRTY may remain for 882 return -EINVAL; /* NILFS_I_DIRTY may remain for
873 freeing inode */ 883 freeing inode */
874 } 884 }
875 list_del(&ii->i_dirty); 885 list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
876 list_add_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
877 set_bit(NILFS_I_QUEUED, &ii->i_state); 886 set_bit(NILFS_I_QUEUED, &ii->i_state);
878 } 887 }
879 spin_unlock(&nilfs->ns_inode_lock); 888 spin_unlock(&nilfs->ns_inode_lock);
@@ -892,7 +901,7 @@ int nilfs_mark_inode_dirty(struct inode *inode)
892 return err; 901 return err;
893 } 902 }
894 nilfs_update_inode(inode, ibh); 903 nilfs_update_inode(inode, ibh);
895 nilfs_mdt_mark_buffer_dirty(ibh); 904 mark_buffer_dirty(ibh);
896 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); 905 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile);
897 brelse(ibh); 906 brelse(ibh);
898 return 0; 907 return 0;
@@ -931,7 +940,7 @@ void nilfs_dirty_inode(struct inode *inode)
931int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 940int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
932 __u64 start, __u64 len) 941 __u64 start, __u64 len)
933{ 942{
934 struct the_nilfs *nilfs = NILFS_I_NILFS(inode); 943 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
935 __u64 logical = 0, phys = 0, size = 0; 944 __u64 logical = 0, phys = 0, size = 0;
936 __u32 flags = 0; 945 __u32 flags = 0;
937 loff_t isize; 946 loff_t isize;
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index f2469ba6246b..41d6743d303c 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -698,6 +698,63 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
698 return 0; 698 return 0;
699} 699}
700 700
701static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
702 void __user *argp)
703{
704 __u64 newsize;
705 int ret = -EPERM;
706
707 if (!capable(CAP_SYS_ADMIN))
708 goto out;
709
710 ret = mnt_want_write(filp->f_path.mnt);
711 if (ret)
712 goto out;
713
714 ret = -EFAULT;
715 if (copy_from_user(&newsize, argp, sizeof(newsize)))
716 goto out_drop_write;
717
718 ret = nilfs_resize_fs(inode->i_sb, newsize);
719
720out_drop_write:
721 mnt_drop_write(filp->f_path.mnt);
722out:
723 return ret;
724}
725
726static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
727{
728 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
729 __u64 range[2];
730 __u64 minseg, maxseg;
731 unsigned long segbytes;
732 int ret = -EPERM;
733
734 if (!capable(CAP_SYS_ADMIN))
735 goto out;
736
737 ret = -EFAULT;
738 if (copy_from_user(range, argp, sizeof(__u64[2])))
739 goto out;
740
741 ret = -ERANGE;
742 if (range[1] > i_size_read(inode->i_sb->s_bdev->bd_inode))
743 goto out;
744
745 segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize;
746
747 minseg = range[0] + segbytes - 1;
748 do_div(minseg, segbytes);
749 maxseg = NILFS_SB2_OFFSET_BYTES(range[1]);
750 do_div(maxseg, segbytes);
751 maxseg--;
752
753 ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg);
754out:
755 return ret;
756}
757
701static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, 758static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
702 unsigned int cmd, void __user *argp, 759 unsigned int cmd, void __user *argp,
703 size_t membsz, 760 size_t membsz,
@@ -763,6 +820,10 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
763 return nilfs_ioctl_clean_segments(inode, filp, cmd, argp); 820 return nilfs_ioctl_clean_segments(inode, filp, cmd, argp);
764 case NILFS_IOCTL_SYNC: 821 case NILFS_IOCTL_SYNC:
765 return nilfs_ioctl_sync(inode, filp, cmd, argp); 822 return nilfs_ioctl_sync(inode, filp, cmd, argp);
823 case NILFS_IOCTL_RESIZE:
824 return nilfs_ioctl_resize(inode, filp, argp);
825 case NILFS_IOCTL_SET_ALLOC_RANGE:
826 return nilfs_ioctl_set_alloc_range(inode, argp);
766 default: 827 default:
767 return -ENOTTY; 828 return -ENOTTY;
768 } 829 }
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index a649b05f7069..800e8d78a83b 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -66,7 +66,7 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
66 kunmap_atomic(kaddr, KM_USER0); 66 kunmap_atomic(kaddr, KM_USER0);
67 67
68 set_buffer_uptodate(bh); 68 set_buffer_uptodate(bh);
69 nilfs_mark_buffer_dirty(bh); 69 mark_buffer_dirty(bh);
70 nilfs_mdt_mark_dirty(inode); 70 nilfs_mdt_mark_dirty(inode);
71 return 0; 71 return 0;
72} 72}
@@ -355,7 +355,7 @@ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block)
355 err = nilfs_mdt_read_block(inode, block, 0, &bh); 355 err = nilfs_mdt_read_block(inode, block, 0, &bh);
356 if (unlikely(err)) 356 if (unlikely(err))
357 return err; 357 return err;
358 nilfs_mark_buffer_dirty(bh); 358 mark_buffer_dirty(bh);
359 nilfs_mdt_mark_dirty(inode); 359 nilfs_mdt_mark_dirty(inode);
360 brelse(bh); 360 brelse(bh);
361 return 0; 361 return 0;
@@ -450,9 +450,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
450 450
451 INIT_LIST_HEAD(&shadow->frozen_buffers); 451 INIT_LIST_HEAD(&shadow->frozen_buffers);
452 address_space_init_once(&shadow->frozen_data); 452 address_space_init_once(&shadow->frozen_data);
453 nilfs_mapping_init(&shadow->frozen_data, bdi); 453 nilfs_mapping_init(&shadow->frozen_data, inode, bdi);
454 address_space_init_once(&shadow->frozen_btnodes); 454 address_space_init_once(&shadow->frozen_btnodes);
455 nilfs_mapping_init(&shadow->frozen_btnodes, bdi); 455 nilfs_mapping_init(&shadow->frozen_btnodes, inode, bdi);
456 mi->mi_shadow = shadow; 456 mi->mi_shadow = shadow;
457 return 0; 457 return 0;
458} 458}
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index ed68563ec708..ab20a4baa50f 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -64,11 +64,6 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
64 return inode->i_private; 64 return inode->i_private;
65} 65}
66 66
67static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode)
68{
69 return inode->i_sb->s_fs_info;
70}
71
72/* Default GFP flags using highmem */ 67/* Default GFP flags using highmem */
73#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM) 68#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM)
74 69
@@ -93,8 +88,6 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh);
93struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode, 88struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode,
94 struct buffer_head *bh); 89 struct buffer_head *bh);
95 90
96#define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh)
97
98static inline void nilfs_mdt_mark_dirty(struct inode *inode) 91static inline void nilfs_mdt_mark_dirty(struct inode *inode)
99{ 92{
100 if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state)) 93 if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state))
@@ -108,7 +101,7 @@ static inline void nilfs_mdt_clear_dirty(struct inode *inode)
108 101
109static inline __u64 nilfs_mdt_cno(struct inode *inode) 102static inline __u64 nilfs_mdt_cno(struct inode *inode)
110{ 103{
111 return NILFS_I_NILFS(inode)->ns_cno; 104 return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno;
112} 105}
113 106
114#define nilfs_mdt_bgl_lock(inode, bg) \ 107#define nilfs_mdt_bgl_lock(inode, bg) \
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index a8dd344303cb..a9c6a531f80c 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -80,12 +80,6 @@ static inline struct inode *NILFS_BTNC_I(struct address_space *btnc)
80 return &ii->vfs_inode; 80 return &ii->vfs_inode;
81} 81}
82 82
83static inline struct inode *NILFS_AS_I(struct address_space *mapping)
84{
85 return (mapping->host) ? :
86 container_of(mapping, struct inode, i_data);
87}
88
89/* 83/*
90 * Dynamic state flags of NILFS on-memory inode (i_state) 84 * Dynamic state flags of NILFS on-memory inode (i_state)
91 */ 85 */
@@ -298,6 +292,7 @@ struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
298 int flip); 292 int flip);
299int nilfs_commit_super(struct super_block *sb, int flag); 293int nilfs_commit_super(struct super_block *sb, int flag);
300int nilfs_cleanup_super(struct super_block *sb); 294int nilfs_cleanup_super(struct super_block *sb);
295int nilfs_resize_fs(struct super_block *sb, __u64 newsize);
301int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, 296int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
302 struct nilfs_root **root); 297 struct nilfs_root **root);
303int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno); 298int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 1168059c7efd..65221a04c6f0 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -37,8 +37,7 @@
37 37
38#define NILFS_BUFFER_INHERENT_BITS \ 38#define NILFS_BUFFER_INHERENT_BITS \
39 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ 39 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
40 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \ 40 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked))
41 (1UL << BH_NILFS_Checked))
42 41
43static struct buffer_head * 42static struct buffer_head *
44__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, 43__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
@@ -59,19 +58,6 @@ __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
59 return bh; 58 return bh;
60} 59}
61 60
62/*
63 * Since the page cache of B-tree node pages or data page cache of pseudo
64 * inodes does not have a valid mapping->host pointer, calling
65 * mark_buffer_dirty() for their buffers causes a NULL pointer dereference;
66 * it calls __mark_inode_dirty(NULL) through __set_page_dirty().
67 * To avoid this problem, the old style mark_buffer_dirty() is used instead.
68 */
69void nilfs_mark_buffer_dirty(struct buffer_head *bh)
70{
71 if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
72 __set_page_dirty_nobuffers(bh->b_page);
73}
74
75struct buffer_head *nilfs_grab_buffer(struct inode *inode, 61struct buffer_head *nilfs_grab_buffer(struct inode *inode,
76 struct address_space *mapping, 62 struct address_space *mapping,
77 unsigned long blkoff, 63 unsigned long blkoff,
@@ -183,7 +169,7 @@ int nilfs_page_buffers_clean(struct page *page)
183void nilfs_page_bug(struct page *page) 169void nilfs_page_bug(struct page *page)
184{ 170{
185 struct address_space *m; 171 struct address_space *m;
186 unsigned long ino = 0; 172 unsigned long ino;
187 173
188 if (unlikely(!page)) { 174 if (unlikely(!page)) {
189 printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); 175 printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
@@ -191,11 +177,8 @@ void nilfs_page_bug(struct page *page)
191 } 177 }
192 178
193 m = page->mapping; 179 m = page->mapping;
194 if (m) { 180 ino = m ? m->host->i_ino : 0;
195 struct inode *inode = NILFS_AS_I(m); 181
196 if (inode != NULL)
197 ino = inode->i_ino;
198 }
199 printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " 182 printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
200 "mapping=%p ino=%lu\n", 183 "mapping=%p ino=%lu\n",
201 page, atomic_read(&page->_count), 184 page, atomic_read(&page->_count),
@@ -217,56 +200,6 @@ void nilfs_page_bug(struct page *page)
217} 200}
218 201
219/** 202/**
220 * nilfs_alloc_private_page - allocate a private page with buffer heads
221 *
222 * Return Value: On success, a pointer to the allocated page is returned.
223 * On error, NULL is returned.
224 */
225struct page *nilfs_alloc_private_page(struct block_device *bdev, int size,
226 unsigned long state)
227{
228 struct buffer_head *bh, *head, *tail;
229 struct page *page;
230
231 page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */
232 if (unlikely(!page))
233 return NULL;
234
235 lock_page(page);
236 head = alloc_page_buffers(page, size, 0);
237 if (unlikely(!head)) {
238 unlock_page(page);
239 __free_page(page);
240 return NULL;
241 }
242
243 bh = head;
244 do {
245 bh->b_state = (1UL << BH_NILFS_Allocated) | state;
246 tail = bh;
247 bh->b_bdev = bdev;
248 bh = bh->b_this_page;
249 } while (bh);
250
251 tail->b_this_page = head;
252 attach_page_buffers(page, head);
253
254 return page;
255}
256
257void nilfs_free_private_page(struct page *page)
258{
259 BUG_ON(!PageLocked(page));
260 BUG_ON(page->mapping);
261
262 if (page_has_buffers(page) && !try_to_free_buffers(page))
263 NILFS_PAGE_BUG(page, "failed to free page");
264
265 unlock_page(page);
266 __free_page(page);
267}
268
269/**
270 * nilfs_copy_page -- copy the page with buffers 203 * nilfs_copy_page -- copy the page with buffers
271 * @dst: destination page 204 * @dst: destination page
272 * @src: source page 205 * @src: source page
@@ -492,10 +425,10 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
492 return nc; 425 return nc;
493} 426}
494 427
495void nilfs_mapping_init(struct address_space *mapping, 428void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
496 struct backing_dev_info *bdi) 429 struct backing_dev_info *bdi)
497{ 430{
498 mapping->host = NULL; 431 mapping->host = inode;
499 mapping->flags = 0; 432 mapping->flags = 0;
500 mapping_set_gfp_mask(mapping, GFP_NOFS); 433 mapping_set_gfp_mask(mapping, GFP_NOFS);
501 mapping->assoc_mapping = NULL; 434 mapping->assoc_mapping = NULL;
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index f06b79ad7493..fb7de71605a0 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -38,14 +38,12 @@ enum {
38 BH_NILFS_Redirected, 38 BH_NILFS_Redirected,
39}; 39};
40 40
41BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */
42BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ 41BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */
43BUFFER_FNS(NILFS_Volatile, nilfs_volatile) 42BUFFER_FNS(NILFS_Volatile, nilfs_volatile)
44BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */ 43BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */
45BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */ 44BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */
46 45
47 46
48void nilfs_mark_buffer_dirty(struct buffer_head *bh);
49int __nilfs_clear_page_dirty(struct page *); 47int __nilfs_clear_page_dirty(struct page *);
50 48
51struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *, 49struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *,
@@ -54,14 +52,11 @@ void nilfs_forget_buffer(struct buffer_head *);
54void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *); 52void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *);
55int nilfs_page_buffers_clean(struct page *); 53int nilfs_page_buffers_clean(struct page *);
56void nilfs_page_bug(struct page *); 54void nilfs_page_bug(struct page *);
57struct page *nilfs_alloc_private_page(struct block_device *, int,
58 unsigned long);
59void nilfs_free_private_page(struct page *);
60 55
61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); 56int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
62void nilfs_copy_back_pages(struct address_space *, struct address_space *); 57void nilfs_copy_back_pages(struct address_space *, struct address_space *);
63void nilfs_clear_dirty_pages(struct address_space *); 58void nilfs_clear_dirty_pages(struct address_space *);
64void nilfs_mapping_init(struct address_space *mapping, 59void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
65 struct backing_dev_info *bdi); 60 struct backing_dev_info *bdi);
66unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); 61unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
67unsigned long nilfs_find_uncommitted_extent(struct inode *inode, 62unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index ba4a64518f38..a604ac0331b2 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -387,9 +387,9 @@ static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr,
387static void dispose_recovery_list(struct list_head *head) 387static void dispose_recovery_list(struct list_head *head)
388{ 388{
389 while (!list_empty(head)) { 389 while (!list_empty(head)) {
390 struct nilfs_recovery_block *rb 390 struct nilfs_recovery_block *rb;
391 = list_entry(head->next, 391
392 struct nilfs_recovery_block, list); 392 rb = list_first_entry(head, struct nilfs_recovery_block, list);
393 list_del(&rb->list); 393 list_del(&rb->list);
394 kfree(rb); 394 kfree(rb);
395 } 395 }
@@ -416,9 +416,9 @@ static int nilfs_segment_list_add(struct list_head *head, __u64 segnum)
416void nilfs_dispose_segment_list(struct list_head *head) 416void nilfs_dispose_segment_list(struct list_head *head)
417{ 417{
418 while (!list_empty(head)) { 418 while (!list_empty(head)) {
419 struct nilfs_segment_entry *ent 419 struct nilfs_segment_entry *ent;
420 = list_entry(head->next, 420
421 struct nilfs_segment_entry, list); 421 ent = list_first_entry(head, struct nilfs_segment_entry, list);
422 list_del(&ent->list); 422 list_del(&ent->list);
423 kfree(ent); 423 kfree(ent);
424 } 424 }
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 2853ff20f85a..850a7c0228fb 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -239,12 +239,15 @@ nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf,
239 u32 seed) 239 u32 seed)
240{ 240{
241 struct nilfs_super_root *raw_sr; 241 struct nilfs_super_root *raw_sr;
242 struct the_nilfs *nilfs = segbuf->sb_super->s_fs_info;
243 unsigned srsize;
242 u32 crc; 244 u32 crc;
243 245
244 raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data; 246 raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data;
247 srsize = NILFS_SR_BYTES(nilfs->ns_inode_size);
245 crc = crc32_le(seed, 248 crc = crc32_le(seed,
246 (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum), 249 (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
247 NILFS_SR_BYTES - sizeof(raw_sr->sr_sum)); 250 srsize - sizeof(raw_sr->sr_sum));
248 raw_sr->sr_sum = cpu_to_le32(crc); 251 raw_sr->sr_sum = cpu_to_le32(crc);
249} 252}
250 253
@@ -254,18 +257,6 @@ static void nilfs_release_buffers(struct list_head *list)
254 257
255 list_for_each_entry_safe(bh, n, list, b_assoc_buffers) { 258 list_for_each_entry_safe(bh, n, list, b_assoc_buffers) {
256 list_del_init(&bh->b_assoc_buffers); 259 list_del_init(&bh->b_assoc_buffers);
257 if (buffer_nilfs_allocated(bh)) {
258 struct page *clone_page = bh->b_page;
259
260 /* remove clone page */
261 brelse(bh);
262 page_cache_release(clone_page); /* for each bh */
263 if (page_count(clone_page) <= 2) {
264 lock_page(clone_page);
265 nilfs_free_private_page(clone_page);
266 }
267 continue;
268 }
269 brelse(bh); 260 brelse(bh);
270 } 261 }
271} 262}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index afe4f2183454..141646e88fb5 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -655,13 +655,10 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
655 if (unlikely(page->index > last)) 655 if (unlikely(page->index > last))
656 break; 656 break;
657 657
658 if (mapping->host) { 658 lock_page(page);
659 lock_page(page); 659 if (!page_has_buffers(page))
660 if (!page_has_buffers(page)) 660 create_empty_buffers(page, 1 << inode->i_blkbits, 0);
661 create_empty_buffers(page, 661 unlock_page(page);
662 1 << inode->i_blkbits, 0);
663 unlock_page(page);
664 }
665 662
666 bh = head = page_buffers(page); 663 bh = head = page_buffers(page);
667 do { 664 do {
@@ -809,7 +806,7 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
809 /* The following code is duplicated with cpfile. But, it is 806 /* The following code is duplicated with cpfile. But, it is
810 needed to collect the checkpoint even if it was not newly 807 needed to collect the checkpoint even if it was not newly
811 created */ 808 created */
812 nilfs_mdt_mark_buffer_dirty(bh_cp); 809 mark_buffer_dirty(bh_cp);
813 nilfs_mdt_mark_dirty(nilfs->ns_cpfile); 810 nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
814 nilfs_cpfile_put_checkpoint( 811 nilfs_cpfile_put_checkpoint(
815 nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); 812 nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
@@ -889,12 +886,14 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
889{ 886{
890 struct buffer_head *bh_sr; 887 struct buffer_head *bh_sr;
891 struct nilfs_super_root *raw_sr; 888 struct nilfs_super_root *raw_sr;
892 unsigned isz = nilfs->ns_inode_size; 889 unsigned isz, srsz;
893 890
894 bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root; 891 bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
895 raw_sr = (struct nilfs_super_root *)bh_sr->b_data; 892 raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
893 isz = nilfs->ns_inode_size;
894 srsz = NILFS_SR_BYTES(isz);
896 895
897 raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES); 896 raw_sr->sr_bytes = cpu_to_le16(srsz);
898 raw_sr->sr_nongc_ctime 897 raw_sr->sr_nongc_ctime
899 = cpu_to_le64(nilfs_doing_gc() ? 898 = cpu_to_le64(nilfs_doing_gc() ?
900 nilfs->ns_nongc_ctime : sci->sc_seg_ctime); 899 nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
@@ -906,6 +905,7 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
906 NILFS_SR_CPFILE_OFFSET(isz), 1); 905 NILFS_SR_CPFILE_OFFSET(isz), 1);
907 nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + 906 nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
908 NILFS_SR_SUFILE_OFFSET(isz), 1); 907 NILFS_SR_SUFILE_OFFSET(isz), 1);
908 memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
909} 909}
910 910
911static void nilfs_redirty_inodes(struct list_head *head) 911static void nilfs_redirty_inodes(struct list_head *head)
@@ -954,8 +954,8 @@ static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
954 954
955 dispose_buffers: 955 dispose_buffers:
956 while (!list_empty(listp)) { 956 while (!list_empty(listp)) {
957 bh = list_entry(listp->next, struct buffer_head, 957 bh = list_first_entry(listp, struct buffer_head,
958 b_assoc_buffers); 958 b_assoc_buffers);
959 list_del_init(&bh->b_assoc_buffers); 959 list_del_init(&bh->b_assoc_buffers);
960 brelse(bh); 960 brelse(bh);
961 } 961 }
@@ -1500,10 +1500,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
1500 nblocks = le32_to_cpu(finfo->fi_nblocks); 1500 nblocks = le32_to_cpu(finfo->fi_nblocks);
1501 ndatablk = le32_to_cpu(finfo->fi_ndatablk); 1501 ndatablk = le32_to_cpu(finfo->fi_ndatablk);
1502 1502
1503 if (buffer_nilfs_node(bh)) 1503 inode = bh->b_page->mapping->host;
1504 inode = NILFS_BTNC_I(bh->b_page->mapping);
1505 else
1506 inode = NILFS_AS_I(bh->b_page->mapping);
1507 1504
1508 if (mode == SC_LSEG_DSYNC) 1505 if (mode == SC_LSEG_DSYNC)
1509 sc_op = &nilfs_sc_dsync_ops; 1506 sc_op = &nilfs_sc_dsync_ops;
@@ -1556,83 +1553,24 @@ static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode)
1556 return 0; 1553 return 0;
1557} 1554}
1558 1555
1559static int 1556static void nilfs_begin_page_io(struct page *page)
1560nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out)
1561{
1562 struct page *clone_page;
1563 struct buffer_head *bh, *head, *bh2;
1564 void *kaddr;
1565
1566 bh = head = page_buffers(page);
1567
1568 clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0);
1569 if (unlikely(!clone_page))
1570 return -ENOMEM;
1571
1572 bh2 = page_buffers(clone_page);
1573 kaddr = kmap_atomic(page, KM_USER0);
1574 do {
1575 if (list_empty(&bh->b_assoc_buffers))
1576 continue;
1577 get_bh(bh2);
1578 page_cache_get(clone_page); /* for each bh */
1579 memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size);
1580 bh2->b_blocknr = bh->b_blocknr;
1581 list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers);
1582 list_add_tail(&bh->b_assoc_buffers, out);
1583 } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head);
1584 kunmap_atomic(kaddr, KM_USER0);
1585
1586 if (!TestSetPageWriteback(clone_page))
1587 account_page_writeback(clone_page);
1588 unlock_page(clone_page);
1589
1590 return 0;
1591}
1592
1593static int nilfs_test_page_to_be_frozen(struct page *page)
1594{
1595 struct address_space *mapping = page->mapping;
1596
1597 if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode))
1598 return 0;
1599
1600 if (page_mapped(page)) {
1601 ClearPageChecked(page);
1602 return 1;
1603 }
1604 return PageChecked(page);
1605}
1606
1607static int nilfs_begin_page_io(struct page *page, struct list_head *out)
1608{ 1557{
1609 if (!page || PageWriteback(page)) 1558 if (!page || PageWriteback(page))
1610 /* For split b-tree node pages, this function may be called 1559 /* For split b-tree node pages, this function may be called
1611 twice. We ignore the 2nd or later calls by this check. */ 1560 twice. We ignore the 2nd or later calls by this check. */
1612 return 0; 1561 return;
1613 1562
1614 lock_page(page); 1563 lock_page(page);
1615 clear_page_dirty_for_io(page); 1564 clear_page_dirty_for_io(page);
1616 set_page_writeback(page); 1565 set_page_writeback(page);
1617 unlock_page(page); 1566 unlock_page(page);
1618
1619 if (nilfs_test_page_to_be_frozen(page)) {
1620 int err = nilfs_copy_replace_page_buffers(page, out);
1621 if (unlikely(err))
1622 return err;
1623 }
1624 return 0;
1625} 1567}
1626 1568
1627static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, 1569static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
1628 struct page **failed_page)
1629{ 1570{
1630 struct nilfs_segment_buffer *segbuf; 1571 struct nilfs_segment_buffer *segbuf;
1631 struct page *bd_page = NULL, *fs_page = NULL; 1572 struct page *bd_page = NULL, *fs_page = NULL;
1632 struct list_head *list = &sci->sc_copied_buffers;
1633 int err;
1634 1573
1635 *failed_page = NULL;
1636 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1574 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1637 struct buffer_head *bh; 1575 struct buffer_head *bh;
1638 1576
@@ -1662,11 +1600,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
1662 break; 1600 break;
1663 } 1601 }
1664 if (bh->b_page != fs_page) { 1602 if (bh->b_page != fs_page) {
1665 err = nilfs_begin_page_io(fs_page, list); 1603 nilfs_begin_page_io(fs_page);
1666 if (unlikely(err)) {
1667 *failed_page = fs_page;
1668 goto out;
1669 }
1670 fs_page = bh->b_page; 1604 fs_page = bh->b_page;
1671 } 1605 }
1672 } 1606 }
@@ -1677,11 +1611,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
1677 set_page_writeback(bd_page); 1611 set_page_writeback(bd_page);
1678 unlock_page(bd_page); 1612 unlock_page(bd_page);
1679 } 1613 }
1680 err = nilfs_begin_page_io(fs_page, list); 1614 nilfs_begin_page_io(fs_page);
1681 if (unlikely(err))
1682 *failed_page = fs_page;
1683 out:
1684 return err;
1685} 1615}
1686 1616
1687static int nilfs_segctor_write(struct nilfs_sc_info *sci, 1617static int nilfs_segctor_write(struct nilfs_sc_info *sci,
@@ -1694,24 +1624,6 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci,
1694 return ret; 1624 return ret;
1695} 1625}
1696 1626
1697static void __nilfs_end_page_io(struct page *page, int err)
1698{
1699 if (!err) {
1700 if (!nilfs_page_buffers_clean(page))
1701 __set_page_dirty_nobuffers(page);
1702 ClearPageError(page);
1703 } else {
1704 __set_page_dirty_nobuffers(page);
1705 SetPageError(page);
1706 }
1707
1708 if (buffer_nilfs_allocated(page_buffers(page))) {
1709 if (TestClearPageWriteback(page))
1710 dec_zone_page_state(page, NR_WRITEBACK);
1711 } else
1712 end_page_writeback(page);
1713}
1714
1715static void nilfs_end_page_io(struct page *page, int err) 1627static void nilfs_end_page_io(struct page *page, int err)
1716{ 1628{
1717 if (!page) 1629 if (!page)
@@ -1738,40 +1650,19 @@ static void nilfs_end_page_io(struct page *page, int err)
1738 return; 1650 return;
1739 } 1651 }
1740 1652
1741 __nilfs_end_page_io(page, err); 1653 if (!err) {
1742} 1654 if (!nilfs_page_buffers_clean(page))
1743 1655 __set_page_dirty_nobuffers(page);
1744static void nilfs_clear_copied_buffers(struct list_head *list, int err) 1656 ClearPageError(page);
1745{ 1657 } else {
1746 struct buffer_head *bh, *head; 1658 __set_page_dirty_nobuffers(page);
1747 struct page *page; 1659 SetPageError(page);
1748
1749 while (!list_empty(list)) {
1750 bh = list_entry(list->next, struct buffer_head,
1751 b_assoc_buffers);
1752 page = bh->b_page;
1753 page_cache_get(page);
1754 head = bh = page_buffers(page);
1755 do {
1756 if (!list_empty(&bh->b_assoc_buffers)) {
1757 list_del_init(&bh->b_assoc_buffers);
1758 if (!err) {
1759 set_buffer_uptodate(bh);
1760 clear_buffer_dirty(bh);
1761 clear_buffer_delay(bh);
1762 clear_buffer_nilfs_volatile(bh);
1763 }
1764 brelse(bh); /* for b_assoc_buffers */
1765 }
1766 } while ((bh = bh->b_this_page) != head);
1767
1768 __nilfs_end_page_io(page, err);
1769 page_cache_release(page);
1770 } 1660 }
1661
1662 end_page_writeback(page);
1771} 1663}
1772 1664
1773static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page, 1665static void nilfs_abort_logs(struct list_head *logs, int err)
1774 int err)
1775{ 1666{
1776 struct nilfs_segment_buffer *segbuf; 1667 struct nilfs_segment_buffer *segbuf;
1777 struct page *bd_page = NULL, *fs_page = NULL; 1668 struct page *bd_page = NULL, *fs_page = NULL;
@@ -1801,8 +1692,6 @@ static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
1801 } 1692 }
1802 if (bh->b_page != fs_page) { 1693 if (bh->b_page != fs_page) {
1803 nilfs_end_page_io(fs_page, err); 1694 nilfs_end_page_io(fs_page, err);
1804 if (fs_page && fs_page == failed_page)
1805 return;
1806 fs_page = bh->b_page; 1695 fs_page = bh->b_page;
1807 } 1696 }
1808 } 1697 }
@@ -1821,12 +1710,11 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
1821 1710
1822 list_splice_tail_init(&sci->sc_write_logs, &logs); 1711 list_splice_tail_init(&sci->sc_write_logs, &logs);
1823 ret = nilfs_wait_on_logs(&logs); 1712 ret = nilfs_wait_on_logs(&logs);
1824 nilfs_abort_logs(&logs, NULL, ret ? : err); 1713 nilfs_abort_logs(&logs, ret ? : err);
1825 1714
1826 list_splice_tail_init(&sci->sc_segbufs, &logs); 1715 list_splice_tail_init(&sci->sc_segbufs, &logs);
1827 nilfs_cancel_segusage(&logs, nilfs->ns_sufile); 1716 nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
1828 nilfs_free_incomplete_logs(&logs, nilfs); 1717 nilfs_free_incomplete_logs(&logs, nilfs);
1829 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err);
1830 1718
1831 if (sci->sc_stage.flags & NILFS_CF_SUFREED) { 1719 if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1832 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, 1720 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
@@ -1920,8 +1808,6 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1920 1808
1921 nilfs_end_page_io(fs_page, 0); 1809 nilfs_end_page_io(fs_page, 0);
1922 1810
1923 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0);
1924
1925 nilfs_drop_collected_inodes(&sci->sc_dirty_files); 1811 nilfs_drop_collected_inodes(&sci->sc_dirty_files);
1926 1812
1927 if (nilfs_doing_gc()) 1813 if (nilfs_doing_gc())
@@ -1979,7 +1865,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1979 "failed to get inode block.\n"); 1865 "failed to get inode block.\n");
1980 return err; 1866 return err;
1981 } 1867 }
1982 nilfs_mdt_mark_buffer_dirty(ibh); 1868 mark_buffer_dirty(ibh);
1983 nilfs_mdt_mark_dirty(ifile); 1869 nilfs_mdt_mark_dirty(ifile);
1984 spin_lock(&nilfs->ns_inode_lock); 1870 spin_lock(&nilfs->ns_inode_lock);
1985 if (likely(!ii->i_bh)) 1871 if (likely(!ii->i_bh))
@@ -1991,8 +1877,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1991 1877
1992 clear_bit(NILFS_I_QUEUED, &ii->i_state); 1878 clear_bit(NILFS_I_QUEUED, &ii->i_state);
1993 set_bit(NILFS_I_BUSY, &ii->i_state); 1879 set_bit(NILFS_I_BUSY, &ii->i_state);
1994 list_del(&ii->i_dirty); 1880 list_move_tail(&ii->i_dirty, &sci->sc_dirty_files);
1995 list_add_tail(&ii->i_dirty, &sci->sc_dirty_files);
1996 } 1881 }
1997 spin_unlock(&nilfs->ns_inode_lock); 1882 spin_unlock(&nilfs->ns_inode_lock);
1998 1883
@@ -2014,8 +1899,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
2014 clear_bit(NILFS_I_BUSY, &ii->i_state); 1899 clear_bit(NILFS_I_BUSY, &ii->i_state);
2015 brelse(ii->i_bh); 1900 brelse(ii->i_bh);
2016 ii->i_bh = NULL; 1901 ii->i_bh = NULL;
2017 list_del(&ii->i_dirty); 1902 list_move_tail(&ii->i_dirty, &ti->ti_garbage);
2018 list_add_tail(&ii->i_dirty, &ti->ti_garbage);
2019 } 1903 }
2020 spin_unlock(&nilfs->ns_inode_lock); 1904 spin_unlock(&nilfs->ns_inode_lock);
2021} 1905}
@@ -2026,7 +1910,6 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
2026static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) 1910static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2027{ 1911{
2028 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 1912 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2029 struct page *failed_page;
2030 int err; 1913 int err;
2031 1914
2032 sci->sc_stage.scnt = NILFS_ST_INIT; 1915 sci->sc_stage.scnt = NILFS_ST_INIT;
@@ -2081,11 +1964,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2081 nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); 1964 nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
2082 1965
2083 /* Write partial segments */ 1966 /* Write partial segments */
2084 err = nilfs_segctor_prepare_write(sci, &failed_page); 1967 nilfs_segctor_prepare_write(sci);
2085 if (err) {
2086 nilfs_abort_logs(&sci->sc_segbufs, failed_page, err);
2087 goto failed_to_write;
2088 }
2089 1968
2090 nilfs_add_checksums_on_logs(&sci->sc_segbufs, 1969 nilfs_add_checksums_on_logs(&sci->sc_segbufs,
2091 nilfs->ns_crc_seed); 1970 nilfs->ns_crc_seed);
@@ -2687,7 +2566,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
2687 INIT_LIST_HEAD(&sci->sc_segbufs); 2566 INIT_LIST_HEAD(&sci->sc_segbufs);
2688 INIT_LIST_HEAD(&sci->sc_write_logs); 2567 INIT_LIST_HEAD(&sci->sc_write_logs);
2689 INIT_LIST_HEAD(&sci->sc_gc_inodes); 2568 INIT_LIST_HEAD(&sci->sc_gc_inodes);
2690 INIT_LIST_HEAD(&sci->sc_copied_buffers);
2691 init_timer(&sci->sc_timer); 2569 init_timer(&sci->sc_timer);
2692 2570
2693 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; 2571 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
@@ -2741,8 +2619,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2741 if (flag || !nilfs_segctor_confirm(sci)) 2619 if (flag || !nilfs_segctor_confirm(sci))
2742 nilfs_segctor_write_out(sci); 2620 nilfs_segctor_write_out(sci);
2743 2621
2744 WARN_ON(!list_empty(&sci->sc_copied_buffers));
2745
2746 if (!list_empty(&sci->sc_dirty_files)) { 2622 if (!list_empty(&sci->sc_dirty_files)) {
2747 nilfs_warning(sci->sc_super, __func__, 2623 nilfs_warning(sci->sc_super, __func__,
2748 "dirty file(s) after the final construction\n"); 2624 "dirty file(s) after the final construction\n");
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 6c02a86745fb..38a1d0013314 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -92,7 +92,6 @@ struct nilfs_segsum_pointer {
92 * @sc_nblk_inc: Block count of current generation 92 * @sc_nblk_inc: Block count of current generation
93 * @sc_dirty_files: List of files to be written 93 * @sc_dirty_files: List of files to be written
94 * @sc_gc_inodes: List of GC inodes having blocks to be written 94 * @sc_gc_inodes: List of GC inodes having blocks to be written
95 * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data
96 * @sc_freesegs: array of segment numbers to be freed 95 * @sc_freesegs: array of segment numbers to be freed
97 * @sc_nfreesegs: number of segments on @sc_freesegs 96 * @sc_nfreesegs: number of segments on @sc_freesegs
98 * @sc_dsync_inode: inode whose data pages are written for a sync operation 97 * @sc_dsync_inode: inode whose data pages are written for a sync operation
@@ -136,7 +135,6 @@ struct nilfs_sc_info {
136 135
137 struct list_head sc_dirty_files; 136 struct list_head sc_dirty_files;
138 struct list_head sc_gc_inodes; 137 struct list_head sc_gc_inodes;
139 struct list_head sc_copied_buffers;
140 138
141 __u64 *sc_freesegs; 139 __u64 *sc_freesegs;
142 size_t sc_nfreesegs; 140 size_t sc_nfreesegs;
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 1d6f488ccae8..0a0aba617d8a 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -33,7 +33,9 @@
33 33
34struct nilfs_sufile_info { 34struct nilfs_sufile_info {
35 struct nilfs_mdt_info mi; 35 struct nilfs_mdt_info mi;
36 unsigned long ncleansegs; 36 unsigned long ncleansegs;/* number of clean segments */
37 __u64 allocmin; /* lower limit of allocatable segment range */
38 __u64 allocmax; /* upper limit of allocatable segment range */
37}; 39};
38 40
39static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile) 41static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile)
@@ -96,6 +98,13 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum,
96 create, NULL, bhp); 98 create, NULL, bhp);
97} 99}
98 100
101static int nilfs_sufile_delete_segment_usage_block(struct inode *sufile,
102 __u64 segnum)
103{
104 return nilfs_mdt_delete_block(sufile,
105 nilfs_sufile_get_blkoff(sufile, segnum));
106}
107
99static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, 108static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
100 u64 ncleanadd, u64 ndirtyadd) 109 u64 ncleanadd, u64 ndirtyadd)
101{ 110{
@@ -108,7 +117,7 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
108 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); 117 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
109 kunmap_atomic(kaddr, KM_USER0); 118 kunmap_atomic(kaddr, KM_USER0);
110 119
111 nilfs_mdt_mark_buffer_dirty(header_bh); 120 mark_buffer_dirty(header_bh);
112} 121}
113 122
114/** 123/**
@@ -248,6 +257,35 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
248} 257}
249 258
250/** 259/**
260 * nilfs_sufile_set_alloc_range - limit range of segment to be allocated
261 * @sufile: inode of segment usage file
262 * @start: minimum segment number of allocatable region (inclusive)
263 * @end: maximum segment number of allocatable region (inclusive)
264 *
265 * Return Value: On success, 0 is returned. On error, one of the
266 * following negative error codes is returned.
267 *
268 * %-ERANGE - invalid segment region
269 */
270int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end)
271{
272 struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
273 __u64 nsegs;
274 int ret = -ERANGE;
275
276 down_write(&NILFS_MDT(sufile)->mi_sem);
277 nsegs = nilfs_sufile_get_nsegments(sufile);
278
279 if (start <= end && end < nsegs) {
280 sui->allocmin = start;
281 sui->allocmax = end;
282 ret = 0;
283 }
284 up_write(&NILFS_MDT(sufile)->mi_sem);
285 return ret;
286}
287
288/**
251 * nilfs_sufile_alloc - allocate a segment 289 * nilfs_sufile_alloc - allocate a segment
252 * @sufile: inode of segment usage file 290 * @sufile: inode of segment usage file
253 * @segnump: pointer to segment number 291 * @segnump: pointer to segment number
@@ -269,11 +307,12 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
269 struct buffer_head *header_bh, *su_bh; 307 struct buffer_head *header_bh, *su_bh;
270 struct nilfs_sufile_header *header; 308 struct nilfs_sufile_header *header;
271 struct nilfs_segment_usage *su; 309 struct nilfs_segment_usage *su;
310 struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
272 size_t susz = NILFS_MDT(sufile)->mi_entry_size; 311 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
273 __u64 segnum, maxsegnum, last_alloc; 312 __u64 segnum, maxsegnum, last_alloc;
274 void *kaddr; 313 void *kaddr;
275 unsigned long nsegments, ncleansegs, nsus; 314 unsigned long nsegments, ncleansegs, nsus, cnt;
276 int ret, i, j; 315 int ret, j;
277 316
278 down_write(&NILFS_MDT(sufile)->mi_sem); 317 down_write(&NILFS_MDT(sufile)->mi_sem);
279 318
@@ -287,13 +326,31 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
287 kunmap_atomic(kaddr, KM_USER0); 326 kunmap_atomic(kaddr, KM_USER0);
288 327
289 nsegments = nilfs_sufile_get_nsegments(sufile); 328 nsegments = nilfs_sufile_get_nsegments(sufile);
329 maxsegnum = sui->allocmax;
290 segnum = last_alloc + 1; 330 segnum = last_alloc + 1;
291 maxsegnum = nsegments - 1; 331 if (segnum < sui->allocmin || segnum > sui->allocmax)
292 for (i = 0; i < nsegments; i += nsus) { 332 segnum = sui->allocmin;
293 if (segnum >= nsegments) { 333
294 /* wrap around */ 334 for (cnt = 0; cnt < nsegments; cnt += nsus) {
295 segnum = 0; 335 if (segnum > maxsegnum) {
296 maxsegnum = last_alloc; 336 if (cnt < sui->allocmax - sui->allocmin + 1) {
337 /*
338 * wrap around in the limited region.
339 * if allocation started from
340 * sui->allocmin, this never happens.
341 */
342 segnum = sui->allocmin;
343 maxsegnum = last_alloc;
344 } else if (segnum > sui->allocmin &&
345 sui->allocmax + 1 < nsegments) {
346 segnum = sui->allocmax + 1;
347 maxsegnum = nsegments - 1;
348 } else if (sui->allocmin > 0) {
349 segnum = 0;
350 maxsegnum = sui->allocmin - 1;
351 } else {
352 break; /* never happens */
353 }
297 } 354 }
298 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, 355 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1,
299 &su_bh); 356 &su_bh);
@@ -319,9 +376,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
319 header->sh_last_alloc = cpu_to_le64(segnum); 376 header->sh_last_alloc = cpu_to_le64(segnum);
320 kunmap_atomic(kaddr, KM_USER0); 377 kunmap_atomic(kaddr, KM_USER0);
321 378
322 NILFS_SUI(sufile)->ncleansegs--; 379 sui->ncleansegs--;
323 nilfs_mdt_mark_buffer_dirty(header_bh); 380 mark_buffer_dirty(header_bh);
324 nilfs_mdt_mark_buffer_dirty(su_bh); 381 mark_buffer_dirty(su_bh);
325 nilfs_mdt_mark_dirty(sufile); 382 nilfs_mdt_mark_dirty(sufile);
326 brelse(su_bh); 383 brelse(su_bh);
327 *segnump = segnum; 384 *segnump = segnum;
@@ -364,7 +421,7 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
364 nilfs_sufile_mod_counter(header_bh, -1, 1); 421 nilfs_sufile_mod_counter(header_bh, -1, 1);
365 NILFS_SUI(sufile)->ncleansegs--; 422 NILFS_SUI(sufile)->ncleansegs--;
366 423
367 nilfs_mdt_mark_buffer_dirty(su_bh); 424 mark_buffer_dirty(su_bh);
368 nilfs_mdt_mark_dirty(sufile); 425 nilfs_mdt_mark_dirty(sufile);
369} 426}
370 427
@@ -395,7 +452,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
395 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); 452 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
396 NILFS_SUI(sufile)->ncleansegs -= clean; 453 NILFS_SUI(sufile)->ncleansegs -= clean;
397 454
398 nilfs_mdt_mark_buffer_dirty(su_bh); 455 mark_buffer_dirty(su_bh);
399 nilfs_mdt_mark_dirty(sufile); 456 nilfs_mdt_mark_dirty(sufile);
400} 457}
401 458
@@ -421,7 +478,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
421 sudirty = nilfs_segment_usage_dirty(su); 478 sudirty = nilfs_segment_usage_dirty(su);
422 nilfs_segment_usage_set_clean(su); 479 nilfs_segment_usage_set_clean(su);
423 kunmap_atomic(kaddr, KM_USER0); 480 kunmap_atomic(kaddr, KM_USER0);
424 nilfs_mdt_mark_buffer_dirty(su_bh); 481 mark_buffer_dirty(su_bh);
425 482
426 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); 483 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
427 NILFS_SUI(sufile)->ncleansegs++; 484 NILFS_SUI(sufile)->ncleansegs++;
@@ -441,7 +498,7 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
441 498
442 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); 499 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
443 if (!ret) { 500 if (!ret) {
444 nilfs_mdt_mark_buffer_dirty(bh); 501 mark_buffer_dirty(bh);
445 nilfs_mdt_mark_dirty(sufile); 502 nilfs_mdt_mark_dirty(sufile);
446 brelse(bh); 503 brelse(bh);
447 } 504 }
@@ -476,7 +533,7 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
476 su->su_nblocks = cpu_to_le32(nblocks); 533 su->su_nblocks = cpu_to_le32(nblocks);
477 kunmap_atomic(kaddr, KM_USER0); 534 kunmap_atomic(kaddr, KM_USER0);
478 535
479 nilfs_mdt_mark_buffer_dirty(bh); 536 mark_buffer_dirty(bh);
480 nilfs_mdt_mark_dirty(sufile); 537 nilfs_mdt_mark_dirty(sufile);
481 brelse(bh); 538 brelse(bh);
482 539
@@ -505,7 +562,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
505{ 562{
506 struct buffer_head *header_bh; 563 struct buffer_head *header_bh;
507 struct nilfs_sufile_header *header; 564 struct nilfs_sufile_header *header;
508 struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); 565 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
509 void *kaddr; 566 void *kaddr;
510 int ret; 567 int ret;
511 568
@@ -555,11 +612,183 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
555 nilfs_sufile_mod_counter(header_bh, -1, 0); 612 nilfs_sufile_mod_counter(header_bh, -1, 0);
556 NILFS_SUI(sufile)->ncleansegs--; 613 NILFS_SUI(sufile)->ncleansegs--;
557 } 614 }
558 nilfs_mdt_mark_buffer_dirty(su_bh); 615 mark_buffer_dirty(su_bh);
559 nilfs_mdt_mark_dirty(sufile); 616 nilfs_mdt_mark_dirty(sufile);
560} 617}
561 618
562/** 619/**
620 * nilfs_sufile_truncate_range - truncate range of segment array
621 * @sufile: inode of segment usage file
622 * @start: start segment number (inclusive)
623 * @end: end segment number (inclusive)
624 *
625 * Return Value: On success, 0 is returned. On error, one of the
626 * following negative error codes is returned.
627 *
628 * %-EIO - I/O error.
629 *
630 * %-ENOMEM - Insufficient amount of memory available.
631 *
632 * %-EINVAL - Invalid number of segments specified
633 *
634 * %-EBUSY - Dirty or active segments are present in the range
635 */
636static int nilfs_sufile_truncate_range(struct inode *sufile,
637 __u64 start, __u64 end)
638{
639 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
640 struct buffer_head *header_bh;
641 struct buffer_head *su_bh;
642 struct nilfs_segment_usage *su, *su2;
643 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
644 unsigned long segusages_per_block;
645 unsigned long nsegs, ncleaned;
646 __u64 segnum;
647 void *kaddr;
648 ssize_t n, nc;
649 int ret;
650 int j;
651
652 nsegs = nilfs_sufile_get_nsegments(sufile);
653
654 ret = -EINVAL;
655 if (start > end || start >= nsegs)
656 goto out;
657
658 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
659 if (ret < 0)
660 goto out;
661
662 segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile);
663 ncleaned = 0;
664
665 for (segnum = start; segnum <= end; segnum += n) {
666 n = min_t(unsigned long,
667 segusages_per_block -
668 nilfs_sufile_get_offset(sufile, segnum),
669 end - segnum + 1);
670 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0,
671 &su_bh);
672 if (ret < 0) {
673 if (ret != -ENOENT)
674 goto out_header;
675 /* hole */
676 continue;
677 }
678 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
679 su = nilfs_sufile_block_get_segment_usage(
680 sufile, segnum, su_bh, kaddr);
681 su2 = su;
682 for (j = 0; j < n; j++, su = (void *)su + susz) {
683 if ((le32_to_cpu(su->su_flags) &
684 ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) ||
685 nilfs_segment_is_active(nilfs, segnum + j)) {
686 ret = -EBUSY;
687 kunmap_atomic(kaddr, KM_USER0);
688 brelse(su_bh);
689 goto out_header;
690 }
691 }
692 nc = 0;
693 for (su = su2, j = 0; j < n; j++, su = (void *)su + susz) {
694 if (nilfs_segment_usage_error(su)) {
695 nilfs_segment_usage_set_clean(su);
696 nc++;
697 }
698 }
699 kunmap_atomic(kaddr, KM_USER0);
700 if (nc > 0) {
701 mark_buffer_dirty(su_bh);
702 ncleaned += nc;
703 }
704 brelse(su_bh);
705
706 if (n == segusages_per_block) {
707 /* make hole */
708 nilfs_sufile_delete_segment_usage_block(sufile, segnum);
709 }
710 }
711 ret = 0;
712
713out_header:
714 if (ncleaned > 0) {
715 NILFS_SUI(sufile)->ncleansegs += ncleaned;
716 nilfs_sufile_mod_counter(header_bh, ncleaned, 0);
717 nilfs_mdt_mark_dirty(sufile);
718 }
719 brelse(header_bh);
720out:
721 return ret;
722}
723
724/**
725 * nilfs_sufile_resize - resize segment array
726 * @sufile: inode of segment usage file
727 * @newnsegs: new number of segments
728 *
729 * Return Value: On success, 0 is returned. On error, one of the
730 * following negative error codes is returned.
731 *
732 * %-EIO - I/O error.
733 *
734 * %-ENOMEM - Insufficient amount of memory available.
735 *
736 * %-ENOSPC - Enough free space is not left for shrinking
737 *
738 * %-EBUSY - Dirty or active segments exist in the region to be truncated
739 */
740int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
741{
742 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
743 struct buffer_head *header_bh;
744 struct nilfs_sufile_header *header;
745 struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
746 void *kaddr;
747 unsigned long nsegs, nrsvsegs;
748 int ret = 0;
749
750 down_write(&NILFS_MDT(sufile)->mi_sem);
751
752 nsegs = nilfs_sufile_get_nsegments(sufile);
753 if (nsegs == newnsegs)
754 goto out;
755
756 ret = -ENOSPC;
757 nrsvsegs = nilfs_nrsvsegs(nilfs, newnsegs);
758 if (newnsegs < nsegs && nsegs - newnsegs + nrsvsegs > sui->ncleansegs)
759 goto out;
760
761 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
762 if (ret < 0)
763 goto out;
764
765 if (newnsegs > nsegs) {
766 sui->ncleansegs += newnsegs - nsegs;
767 } else /* newnsegs < nsegs */ {
768 ret = nilfs_sufile_truncate_range(sufile, newnsegs, nsegs - 1);
769 if (ret < 0)
770 goto out_header;
771
772 sui->ncleansegs -= nsegs - newnsegs;
773 }
774
775 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
776 header = kaddr + bh_offset(header_bh);
777 header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs);
778 kunmap_atomic(kaddr, KM_USER0);
779
780 mark_buffer_dirty(header_bh);
781 nilfs_mdt_mark_dirty(sufile);
782 nilfs_set_nsegments(nilfs, newnsegs);
783
784out_header:
785 brelse(header_bh);
786out:
787 up_write(&NILFS_MDT(sufile)->mi_sem);
788 return ret;
789}
790
791/**
563 * nilfs_sufile_get_suinfo - 792 * nilfs_sufile_get_suinfo -
564 * @sufile: inode of segment usage file 793 * @sufile: inode of segment usage file
565 * @segnum: segment number to start looking 794 * @segnum: segment number to start looking
@@ -583,7 +812,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
583 struct nilfs_segment_usage *su; 812 struct nilfs_segment_usage *su;
584 struct nilfs_suinfo *si = buf; 813 struct nilfs_suinfo *si = buf;
585 size_t susz = NILFS_MDT(sufile)->mi_entry_size; 814 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
586 struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); 815 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
587 void *kaddr; 816 void *kaddr;
588 unsigned long nsegs, segusages_per_block; 817 unsigned long nsegs, segusages_per_block;
589 ssize_t n; 818 ssize_t n;
@@ -679,6 +908,9 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
679 kunmap_atomic(kaddr, KM_USER0); 908 kunmap_atomic(kaddr, KM_USER0);
680 brelse(header_bh); 909 brelse(header_bh);
681 910
911 sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1;
912 sui->allocmin = 0;
913
682 unlock_new_inode(sufile); 914 unlock_new_inode(sufile);
683 out: 915 out:
684 *inodep = sufile; 916 *inodep = sufile;
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index a943fbacb45b..e84bc5b51fc1 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -31,11 +31,12 @@
31 31
32static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) 32static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
33{ 33{
34 return NILFS_I_NILFS(sufile)->ns_nsegments; 34 return ((struct the_nilfs *)sufile->i_sb->s_fs_info)->ns_nsegments;
35} 35}
36 36
37unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile); 37unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile);
38 38
39int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end);
39int nilfs_sufile_alloc(struct inode *, __u64 *); 40int nilfs_sufile_alloc(struct inode *, __u64 *);
40int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum); 41int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum);
41int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, 42int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
@@ -61,6 +62,7 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
61void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, 62void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
62 struct buffer_head *); 63 struct buffer_head *);
63 64
65int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs);
64int nilfs_sufile_read(struct super_block *sb, size_t susize, 66int nilfs_sufile_read(struct super_block *sb, size_t susize,
65 struct nilfs_inode *raw_inode, struct inode **inodep); 67 struct nilfs_inode *raw_inode, struct inode **inodep);
66 68
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 062cca065195..8351c44a7320 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -56,6 +56,7 @@
56#include "btnode.h" 56#include "btnode.h"
57#include "page.h" 57#include "page.h"
58#include "cpfile.h" 58#include "cpfile.h"
59#include "sufile.h" /* nilfs_sufile_resize(), nilfs_sufile_set_alloc_range() */
59#include "ifile.h" 60#include "ifile.h"
60#include "dat.h" 61#include "dat.h"
61#include "segment.h" 62#include "segment.h"
@@ -165,7 +166,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
165 ii->i_state = 0; 166 ii->i_state = 0;
166 ii->i_cno = 0; 167 ii->i_cno = 0;
167 ii->vfs_inode.i_version = 1; 168 ii->vfs_inode.i_version = 1;
168 nilfs_btnode_cache_init(&ii->i_btnode_cache, sb->s_bdi); 169 nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode, sb->s_bdi);
169 return &ii->vfs_inode; 170 return &ii->vfs_inode;
170} 171}
171 172
@@ -347,6 +348,134 @@ int nilfs_cleanup_super(struct super_block *sb)
347 return ret; 348 return ret;
348} 349}
349 350
351/**
352 * nilfs_move_2nd_super - relocate secondary super block
353 * @sb: super block instance
354 * @sb2off: new offset of the secondary super block (in bytes)
355 */
356static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
357{
358 struct the_nilfs *nilfs = sb->s_fs_info;
359 struct buffer_head *nsbh;
360 struct nilfs_super_block *nsbp;
361 sector_t blocknr, newblocknr;
362 unsigned long offset;
363 int sb2i = -1; /* array index of the secondary superblock */
364 int ret = 0;
365
366 /* nilfs->ns_sem must be locked by the caller. */
367 if (nilfs->ns_sbh[1] &&
368 nilfs->ns_sbh[1]->b_blocknr > nilfs->ns_first_data_block) {
369 sb2i = 1;
370 blocknr = nilfs->ns_sbh[1]->b_blocknr;
371 } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) {
372 sb2i = 0;
373 blocknr = nilfs->ns_sbh[0]->b_blocknr;
374 }
375 if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off)
376 goto out; /* super block location is unchanged */
377
378 /* Get new super block buffer */
379 newblocknr = sb2off >> nilfs->ns_blocksize_bits;
380 offset = sb2off & (nilfs->ns_blocksize - 1);
381 nsbh = sb_getblk(sb, newblocknr);
382 if (!nsbh) {
383 printk(KERN_WARNING
384 "NILFS warning: unable to move secondary superblock "
385 "to block %llu\n", (unsigned long long)newblocknr);
386 ret = -EIO;
387 goto out;
388 }
389 nsbp = (void *)nsbh->b_data + offset;
390 memset(nsbp, 0, nilfs->ns_blocksize);
391
392 if (sb2i >= 0) {
393 memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
394 brelse(nilfs->ns_sbh[sb2i]);
395 nilfs->ns_sbh[sb2i] = nsbh;
396 nilfs->ns_sbp[sb2i] = nsbp;
397 } else if (nilfs->ns_sbh[0]->b_blocknr < nilfs->ns_first_data_block) {
398 /* secondary super block will be restored to index 1 */
399 nilfs->ns_sbh[1] = nsbh;
400 nilfs->ns_sbp[1] = nsbp;
401 } else {
402 brelse(nsbh);
403 }
404out:
405 return ret;
406}
407
408/**
409 * nilfs_resize_fs - resize the filesystem
410 * @sb: super block instance
411 * @newsize: new size of the filesystem (in bytes)
412 */
413int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
414{
415 struct the_nilfs *nilfs = sb->s_fs_info;
416 struct nilfs_super_block **sbp;
417 __u64 devsize, newnsegs;
418 loff_t sb2off;
419 int ret;
420
421 ret = -ERANGE;
422 devsize = i_size_read(sb->s_bdev->bd_inode);
423 if (newsize > devsize)
424 goto out;
425
426 /*
427 * Write lock is required to protect some functions depending
428 * on the number of segments, the number of reserved segments,
429 * and so forth.
430 */
431 down_write(&nilfs->ns_segctor_sem);
432
433 sb2off = NILFS_SB2_OFFSET_BYTES(newsize);
434 newnsegs = sb2off >> nilfs->ns_blocksize_bits;
435 do_div(newnsegs, nilfs->ns_blocks_per_segment);
436
437 ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs);
438 up_write(&nilfs->ns_segctor_sem);
439 if (ret < 0)
440 goto out;
441
442 ret = nilfs_construct_segment(sb);
443 if (ret < 0)
444 goto out;
445
446 down_write(&nilfs->ns_sem);
447 nilfs_move_2nd_super(sb, sb2off);
448 ret = -EIO;
449 sbp = nilfs_prepare_super(sb, 0);
450 if (likely(sbp)) {
451 nilfs_set_log_cursor(sbp[0], nilfs);
452 /*
453 * Drop NILFS_RESIZE_FS flag for compatibility with
454 * mount-time resize which may be implemented in a
455 * future release.
456 */
457 sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) &
458 ~NILFS_RESIZE_FS);
459 sbp[0]->s_dev_size = cpu_to_le64(newsize);
460 sbp[0]->s_nsegments = cpu_to_le64(nilfs->ns_nsegments);
461 if (sbp[1])
462 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
463 ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
464 }
465 up_write(&nilfs->ns_sem);
466
467 /*
468 * Reset the range of allocatable segments last. This order
469 * is important in the case of expansion because the secondary
470 * superblock must be protected from log write until migration
471 * completes.
472 */
473 if (!ret)
474 nilfs_sufile_set_alloc_range(nilfs->ns_sufile, 0, newnsegs - 1);
475out:
476 return ret;
477}
478
350static void nilfs_put_super(struct super_block *sb) 479static void nilfs_put_super(struct super_block *sb)
351{ 480{
352 struct the_nilfs *nilfs = sb->s_fs_info; 481 struct the_nilfs *nilfs = sb->s_fs_info;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index d2acd1a651f3..d32714094375 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -363,6 +363,24 @@ static unsigned long long nilfs_max_size(unsigned int blkbits)
363 return res; 363 return res;
364} 364}
365 365
366/**
367 * nilfs_nrsvsegs - calculate the number of reserved segments
368 * @nilfs: nilfs object
369 * @nsegs: total number of segments
370 */
371unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs)
372{
373 return max_t(unsigned long, NILFS_MIN_NRSVSEGS,
374 DIV_ROUND_UP(nsegs * nilfs->ns_r_segments_percentage,
375 100));
376}
377
378void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
379{
380 nilfs->ns_nsegments = nsegs;
381 nilfs->ns_nrsvsegs = nilfs_nrsvsegs(nilfs, nsegs);
382}
383
366static int nilfs_store_disk_layout(struct the_nilfs *nilfs, 384static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
367 struct nilfs_super_block *sbp) 385 struct nilfs_super_block *sbp)
368{ 386{
@@ -389,13 +407,9 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
389 } 407 }
390 408
391 nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block); 409 nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block);
392 nilfs->ns_nsegments = le64_to_cpu(sbp->s_nsegments);
393 nilfs->ns_r_segments_percentage = 410 nilfs->ns_r_segments_percentage =
394 le32_to_cpu(sbp->s_r_segments_percentage); 411 le32_to_cpu(sbp->s_r_segments_percentage);
395 nilfs->ns_nrsvsegs = 412 nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
396 max_t(unsigned long, NILFS_MIN_NRSVSEGS,
397 DIV_ROUND_UP(nilfs->ns_nsegments *
398 nilfs->ns_r_segments_percentage, 100));
399 nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); 413 nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
400 return 0; 414 return 0;
401} 415}
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index f4968145c2a3..9992b11312ff 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -268,6 +268,8 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev);
268void destroy_nilfs(struct the_nilfs *nilfs); 268void destroy_nilfs(struct the_nilfs *nilfs);
269int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data); 269int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data);
270int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb); 270int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
271unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs);
272void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs);
271int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t); 273int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t);
272int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); 274int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
273struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno); 275struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 5d32749c896d..3c7606cff1ab 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3706,7 +3706,7 @@ int ocfs2_refcount_cow_xattr(struct inode *inode,
3706 context->cow_start = cow_start; 3706 context->cow_start = cow_start;
3707 context->cow_len = cow_len; 3707 context->cow_len = cow_len;
3708 context->ref_tree = ref_tree; 3708 context->ref_tree = ref_tree;
3709 context->ref_root_bh = ref_root_bh;; 3709 context->ref_root_bh = ref_root_bh;
3710 context->cow_object = xv; 3710 context->cow_object = xv;
3711 3711
3712 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd; 3712 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index d545e97d99c3..8ed4d3433199 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -255,7 +255,11 @@ ssize_t part_discard_alignment_show(struct device *dev,
255 struct device_attribute *attr, char *buf) 255 struct device_attribute *attr, char *buf)
256{ 256{
257 struct hd_struct *p = dev_to_part(dev); 257 struct hd_struct *p = dev_to_part(dev);
258 return sprintf(buf, "%u\n", p->discard_alignment); 258 struct gendisk *disk = dev_to_disk(dev);
259
260 return sprintf(buf, "%u\n",
261 queue_limit_discard_alignment(&disk->queue->limits,
262 p->start_sect));
259} 263}
260 264
261ssize_t part_stat_show(struct device *dev, 265ssize_t part_stat_show(struct device *dev,
@@ -449,8 +453,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
449 p->start_sect = start; 453 p->start_sect = start;
450 p->alignment_offset = 454 p->alignment_offset =
451 queue_limit_alignment_offset(&disk->queue->limits, start); 455 queue_limit_alignment_offset(&disk->queue->limits, start);
452 p->discard_alignment =
453 queue_limit_discard_alignment(&disk->queue->limits, start);
454 p->nr_sects = len; 456 p->nr_sects = len;
455 p->partno = partno; 457 p->partno = partno;
456 p->policy = get_disk_ro(disk); 458 p->policy = get_disk_ro(disk);
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index ce4f62440425..af9fdf046769 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -565,7 +565,7 @@ static bool ldm_validate_partition_table(struct parsed_partitions *state)
565 565
566 data = read_part_sector(state, 0, &sect); 566 data = read_part_sector(state, 0, &sect);
567 if (!data) { 567 if (!data) {
568 ldm_crit ("Disk read failed."); 568 ldm_info ("Disk read failed.");
569 return false; 569 return false;
570 } 570 }
571 571
@@ -1335,6 +1335,11 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1335 1335
1336 list_add_tail (&f->list, frags); 1336 list_add_tail (&f->list, frags);
1337found: 1337found:
1338 if (rec >= f->num) {
1339 ldm_error("REC value (%d) exceeds NUM value (%d)", rec, f->num);
1340 return false;
1341 }
1342
1338 if (f->map & (1 << rec)) { 1343 if (f->map & (1 << rec)) {
1339 ldm_error ("Duplicate VBLK, part %d.", rec); 1344 ldm_error ("Duplicate VBLK, part %d.", rec);
1340 f->map &= 0x7F; /* Mark the group as broken */ 1345 f->map &= 0x7F; /* Mark the group as broken */
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index c03e8d3a3a5b..3763b436e69d 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -61,6 +61,14 @@ extern const struct file_operations proc_pagemap_operations;
61extern const struct file_operations proc_net_operations; 61extern const struct file_operations proc_net_operations;
62extern const struct inode_operations proc_net_inode_operations; 62extern const struct inode_operations proc_net_inode_operations;
63 63
64struct proc_maps_private {
65 struct pid *pid;
66 struct task_struct *task;
67#ifdef CONFIG_MMU
68 struct vm_area_struct *tail_vma;
69#endif
70};
71
64void proc_init_inodecache(void); 72void proc_init_inodecache(void);
65 73
66static inline struct pid *proc_pid(struct inode *inode) 74static inline struct pid *proc_pid(struct inode *inode)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 318d8654989b..2c9db29ea358 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -858,7 +858,192 @@ const struct file_operations proc_pagemap_operations = {
858#endif /* CONFIG_PROC_PAGE_MONITOR */ 858#endif /* CONFIG_PROC_PAGE_MONITOR */
859 859
860#ifdef CONFIG_NUMA 860#ifdef CONFIG_NUMA
861extern int show_numa_map(struct seq_file *m, void *v); 861
862struct numa_maps {
863 struct vm_area_struct *vma;
864 unsigned long pages;
865 unsigned long anon;
866 unsigned long active;
867 unsigned long writeback;
868 unsigned long mapcount_max;
869 unsigned long dirty;
870 unsigned long swapcache;
871 unsigned long node[MAX_NUMNODES];
872};
873
874struct numa_maps_private {
875 struct proc_maps_private proc_maps;
876 struct numa_maps md;
877};
878
879static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty)
880{
881 int count = page_mapcount(page);
882
883 md->pages++;
884 if (pte_dirty || PageDirty(page))
885 md->dirty++;
886
887 if (PageSwapCache(page))
888 md->swapcache++;
889
890 if (PageActive(page) || PageUnevictable(page))
891 md->active++;
892
893 if (PageWriteback(page))
894 md->writeback++;
895
896 if (PageAnon(page))
897 md->anon++;
898
899 if (count > md->mapcount_max)
900 md->mapcount_max = count;
901
902 md->node[page_to_nid(page)]++;
903}
904
905static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
906 unsigned long end, struct mm_walk *walk)
907{
908 struct numa_maps *md;
909 spinlock_t *ptl;
910 pte_t *orig_pte;
911 pte_t *pte;
912
913 md = walk->private;
914 orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
915 do {
916 struct page *page;
917 int nid;
918
919 if (!pte_present(*pte))
920 continue;
921
922 page = vm_normal_page(md->vma, addr, *pte);
923 if (!page)
924 continue;
925
926 if (PageReserved(page))
927 continue;
928
929 nid = page_to_nid(page);
930 if (!node_isset(nid, node_states[N_HIGH_MEMORY]))
931 continue;
932
933 gather_stats(page, md, pte_dirty(*pte));
934
935 } while (pte++, addr += PAGE_SIZE, addr != end);
936 pte_unmap_unlock(orig_pte, ptl);
937 return 0;
938}
939#ifdef CONFIG_HUGETLB_PAGE
940static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
941 unsigned long addr, unsigned long end, struct mm_walk *walk)
942{
943 struct numa_maps *md;
944 struct page *page;
945
946 if (pte_none(*pte))
947 return 0;
948
949 page = pte_page(*pte);
950 if (!page)
951 return 0;
952
953 md = walk->private;
954 gather_stats(page, md, pte_dirty(*pte));
955 return 0;
956}
957
958#else
959static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask,
960 unsigned long addr, unsigned long end, struct mm_walk *walk)
961{
962 return 0;
963}
964#endif
965
966/*
967 * Display pages allocated per node and memory policy via /proc.
968 */
969static int show_numa_map(struct seq_file *m, void *v)
970{
971 struct numa_maps_private *numa_priv = m->private;
972 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
973 struct vm_area_struct *vma = v;
974 struct numa_maps *md = &numa_priv->md;
975 struct file *file = vma->vm_file;
976 struct mm_struct *mm = vma->vm_mm;
977 struct mm_walk walk = {};
978 struct mempolicy *pol;
979 int n;
980 char buffer[50];
981
982 if (!mm)
983 return 0;
984
985 /* Ensure we start with an empty set of numa_maps statistics. */
986 memset(md, 0, sizeof(*md));
987
988 md->vma = vma;
989
990 walk.hugetlb_entry = gather_hugetbl_stats;
991 walk.pmd_entry = gather_pte_stats;
992 walk.private = md;
993 walk.mm = mm;
994
995 pol = get_vma_policy(proc_priv->task, vma, vma->vm_start);
996 mpol_to_str(buffer, sizeof(buffer), pol, 0);
997 mpol_cond_put(pol);
998
999 seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1000
1001 if (file) {
1002 seq_printf(m, " file=");
1003 seq_path(m, &file->f_path, "\n\t= ");
1004 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1005 seq_printf(m, " heap");
1006 } else if (vma->vm_start <= mm->start_stack &&
1007 vma->vm_end >= mm->start_stack) {
1008 seq_printf(m, " stack");
1009 }
1010
1011 walk_page_range(vma->vm_start, vma->vm_end, &walk);
1012
1013 if (!md->pages)
1014 goto out;
1015
1016 if (md->anon)
1017 seq_printf(m, " anon=%lu", md->anon);
1018
1019 if (md->dirty)
1020 seq_printf(m, " dirty=%lu", md->dirty);
1021
1022 if (md->pages != md->anon && md->pages != md->dirty)
1023 seq_printf(m, " mapped=%lu", md->pages);
1024
1025 if (md->mapcount_max > 1)
1026 seq_printf(m, " mapmax=%lu", md->mapcount_max);
1027
1028 if (md->swapcache)
1029 seq_printf(m, " swapcache=%lu", md->swapcache);
1030
1031 if (md->active < md->pages && !is_vm_hugetlb_page(vma))
1032 seq_printf(m, " active=%lu", md->active);
1033
1034 if (md->writeback)
1035 seq_printf(m, " writeback=%lu", md->writeback);
1036
1037 for_each_node_state(n, N_HIGH_MEMORY)
1038 if (md->node[n])
1039 seq_printf(m, " N%d=%lu", n, md->node[n]);
1040out:
1041 seq_putc(m, '\n');
1042
1043 if (m->count < m->size)
1044 m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0;
1045 return 0;
1046}
862 1047
863static const struct seq_operations proc_pid_numa_maps_op = { 1048static const struct seq_operations proc_pid_numa_maps_op = {
864 .start = m_start, 1049 .start = m_start,
@@ -869,7 +1054,20 @@ static const struct seq_operations proc_pid_numa_maps_op = {
869 1054
870static int numa_maps_open(struct inode *inode, struct file *file) 1055static int numa_maps_open(struct inode *inode, struct file *file)
871{ 1056{
872 return do_maps_open(inode, file, &proc_pid_numa_maps_op); 1057 struct numa_maps_private *priv;
1058 int ret = -ENOMEM;
1059 priv = kzalloc(sizeof(*priv), GFP_KERNEL);
1060 if (priv) {
1061 priv->proc_maps.pid = proc_pid(inode);
1062 ret = seq_open(file, &proc_pid_numa_maps_op);
1063 if (!ret) {
1064 struct seq_file *m = file->private_data;
1065 m->private = priv;
1066 } else {
1067 kfree(priv);
1068 }
1069 }
1070 return ret;
873} 1071}
874 1072
875const struct file_operations proc_numa_maps_operations = { 1073const struct file_operations proc_numa_maps_operations = {
@@ -878,4 +1076,4 @@ const struct file_operations proc_numa_maps_operations = {
878 .llseek = seq_lseek, 1076 .llseek = seq_lseek,
879 .release = seq_release_private, 1077 .release = seq_release_private,
880}; 1078};
881#endif 1079#endif /* CONFIG_NUMA */
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index f835a25625ff..f2c3ff20ea68 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -152,21 +152,27 @@ EXPORT_SYMBOL_GPL(pstore_register);
152void pstore_get_records(void) 152void pstore_get_records(void)
153{ 153{
154 struct pstore_info *psi = psinfo; 154 struct pstore_info *psi = psinfo;
155 size_t size; 155 ssize_t size;
156 u64 id; 156 u64 id;
157 enum pstore_type_id type; 157 enum pstore_type_id type;
158 struct timespec time; 158 struct timespec time;
159 int failed = 0; 159 int failed = 0, rc;
160 160
161 if (!psi) 161 if (!psi)
162 return; 162 return;
163 163
164 mutex_lock(&psinfo->buf_mutex); 164 mutex_lock(&psinfo->buf_mutex);
165 rc = psi->open(psi);
166 if (rc)
167 goto out;
168
165 while ((size = psi->read(&id, &type, &time)) > 0) { 169 while ((size = psi->read(&id, &type, &time)) > 0) {
166 if (pstore_mkfile(type, psi->name, id, psi->buf, size, 170 if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
167 time, psi->erase)) 171 time, psi->erase))
168 failed++; 172 failed++;
169 } 173 }
174 psi->close(psi);
175out:
170 mutex_unlock(&psinfo->buf_mutex); 176 mutex_unlock(&psinfo->buf_mutex);
171 177
172 if (failed) 178 if (failed)
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index d3c032f5fa0a..5b572c89e6c4 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -691,8 +691,11 @@ static void prune_dqcache(int count)
691 * This is called from kswapd when we think we need some 691 * This is called from kswapd when we think we need some
692 * more memory 692 * more memory
693 */ 693 */
694static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 694static int shrink_dqcache_memory(struct shrinker *shrink,
695 struct shrink_control *sc)
695{ 696{
697 int nr = sc->nr_to_scan;
698
696 if (nr) { 699 if (nr) {
697 spin_lock(&dq_list_lock); 700 spin_lock(&dq_list_lock);
698 prune_dqcache(nr); 701 prune_dqcache(nr);
diff --git a/fs/splice.c b/fs/splice.c
index 50a5d978da16..aa866d309695 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -162,6 +162,14 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
162 .get = generic_pipe_buf_get, 162 .get = generic_pipe_buf_get,
163}; 163};
164 164
165static void wakeup_pipe_readers(struct pipe_inode_info *pipe)
166{
167 smp_mb();
168 if (waitqueue_active(&pipe->wait))
169 wake_up_interruptible(&pipe->wait);
170 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
171}
172
165/** 173/**
166 * splice_to_pipe - fill passed data into a pipe 174 * splice_to_pipe - fill passed data into a pipe
167 * @pipe: pipe to fill 175 * @pipe: pipe to fill
@@ -247,12 +255,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
247 255
248 pipe_unlock(pipe); 256 pipe_unlock(pipe);
249 257
250 if (do_wakeup) { 258 if (do_wakeup)
251 smp_mb(); 259 wakeup_pipe_readers(pipe);
252 if (waitqueue_active(&pipe->wait))
253 wake_up_interruptible(&pipe->wait);
254 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
255 }
256 260
257 while (page_nr < spd_pages) 261 while (page_nr < spd_pages)
258 spd->spd_release(spd, page_nr++); 262 spd->spd_release(spd, page_nr++);
@@ -1892,12 +1896,9 @@ retry:
1892 /* 1896 /*
1893 * If we put data in the output pipe, wakeup any potential readers. 1897 * If we put data in the output pipe, wakeup any potential readers.
1894 */ 1898 */
1895 if (ret > 0) { 1899 if (ret > 0)
1896 smp_mb(); 1900 wakeup_pipe_readers(opipe);
1897 if (waitqueue_active(&opipe->wait)) 1901
1898 wake_up_interruptible(&opipe->wait);
1899 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1900 }
1901 if (input_wakeup) 1902 if (input_wakeup)
1902 wakeup_pipe_writers(ipipe); 1903 wakeup_pipe_writers(ipipe);
1903 1904
@@ -1976,12 +1977,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1976 /* 1977 /*
1977 * If we put data in the output pipe, wakeup any potential readers. 1978 * If we put data in the output pipe, wakeup any potential readers.
1978 */ 1979 */
1979 if (ret > 0) { 1980 if (ret > 0)
1980 smp_mb(); 1981 wakeup_pipe_readers(opipe);
1981 if (waitqueue_active(&opipe->wait))
1982 wake_up_interruptible(&opipe->wait);
1983 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
1984 }
1985 1982
1986 return ret; 1983 return ret;
1987} 1984}
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index efc309fa3035..7797218d0b30 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -42,7 +42,7 @@ config SQUASHFS_LZO
42 select LZO_DECOMPRESS 42 select LZO_DECOMPRESS
43 help 43 help
44 Saying Y here includes support for reading Squashfs file systems 44 Saying Y here includes support for reading Squashfs file systems
45 compressed with LZO compresssion. LZO compression is mainly 45 compressed with LZO compression. LZO compression is mainly
46 aimed at embedded systems with slower CPUs where the overheads 46 aimed at embedded systems with slower CPUs where the overheads
47 of zlib are too high. 47 of zlib are too high.
48 48
@@ -57,7 +57,7 @@ config SQUASHFS_XZ
57 select XZ_DEC 57 select XZ_DEC
58 help 58 help
59 Saying Y here includes support for reading Squashfs file systems 59 Saying Y here includes support for reading Squashfs file systems
60 compressed with XZ compresssion. XZ gives better compression than 60 compressed with XZ compression. XZ gives better compression than
61 the default zlib compression, at the expense of greater CPU and 61 the default zlib compression, at the expense of greater CPU and
62 memory overhead. 62 memory overhead.
63 63
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index c37b520132ff..4b5a3fbb1f1f 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -29,7 +29,7 @@
29 * plus functions layered ontop of the generic cache implementation to 29 * plus functions layered ontop of the generic cache implementation to
30 * access the metadata and fragment caches. 30 * access the metadata and fragment caches.
31 * 31 *
32 * To avoid out of memory and fragmentation isssues with vmalloc the cache 32 * To avoid out of memory and fragmentation issues with vmalloc the cache
33 * uses sequences of kmalloced PAGE_CACHE_SIZE buffers. 33 * uses sequences of kmalloced PAGE_CACHE_SIZE buffers.
34 * 34 *
35 * It should be noted that the cache is not used for file datablocks, these 35 * It should be noted that the cache is not used for file datablocks, these
diff --git a/fs/super.c b/fs/super.c
index 8a06881b1920..c04f7e0b7ed2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -948,8 +948,7 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
948 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE 948 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
949 * but s_maxbytes was an unsigned long long for many releases. Throw 949 * but s_maxbytes was an unsigned long long for many releases. Throw
950 * this warning for a little while to try and catch filesystems that 950 * this warning for a little while to try and catch filesystems that
951 * violate this rule. This warning should be either removed or 951 * violate this rule.
952 * converted to a BUG() in 2.6.34.
953 */ 952 */
954 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " 953 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
955 "negative value (%lld)\n", type->name, sb->s_maxbytes); 954 "negative value (%lld)\n", type->name, sb->s_maxbytes);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index da3fefe91a8f..1ad8c93c1b85 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -24,13 +24,6 @@
24 24
25#include "sysfs.h" 25#include "sysfs.h"
26 26
27/* used in crash dumps to help with debugging */
28static char last_sysfs_file[PATH_MAX];
29void sysfs_printk_last_file(void)
30{
31 printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file);
32}
33
34/* 27/*
35 * There's one sysfs_buffer for each open file and one 28 * There's one sysfs_buffer for each open file and one
36 * sysfs_open_dirent for each sysfs_dirent with one or more open 29 * sysfs_open_dirent for each sysfs_dirent with one or more open
@@ -337,11 +330,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
337 struct sysfs_buffer *buffer; 330 struct sysfs_buffer *buffer;
338 const struct sysfs_ops *ops; 331 const struct sysfs_ops *ops;
339 int error = -EACCES; 332 int error = -EACCES;
340 char *p;
341
342 p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
343 if (!IS_ERR(p))
344 memmove(last_sysfs_file, p, strlen(p) + 1);
345 333
346 /* need attr_sd for attr and ops, its parent for kobj */ 334 /* need attr_sd for attr and ops, its parent for kobj */
347 if (!sysfs_get_active(attr_sd)) 335 if (!sysfs_get_active(attr_sd))
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index c8769dc222d8..194414f8298c 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -101,9 +101,9 @@ int sysfs_create_group(struct kobject *kobj,
101} 101}
102 102
103/** 103/**
104 * sysfs_update_group - given a directory kobject, create an attribute group 104 * sysfs_update_group - given a directory kobject, update an attribute group
105 * @kobj: The kobject to create the group on 105 * @kobj: The kobject to update the group on
106 * @grp: The attribute group to create 106 * @grp: The attribute group to update
107 * 107 *
108 * This function updates an attribute group. Unlike 108 * This function updates an attribute group. Unlike
109 * sysfs_create_group(), it will explicitly not warn or error if any 109 * sysfs_create_group(), it will explicitly not warn or error if any
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 8c4fc1425b3e..f67acbdda5e8 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -22,16 +22,24 @@
22#include <linux/anon_inodes.h> 22#include <linux/anon_inodes.h>
23#include <linux/timerfd.h> 23#include <linux/timerfd.h>
24#include <linux/syscalls.h> 24#include <linux/syscalls.h>
25#include <linux/rcupdate.h>
25 26
26struct timerfd_ctx { 27struct timerfd_ctx {
27 struct hrtimer tmr; 28 struct hrtimer tmr;
28 ktime_t tintv; 29 ktime_t tintv;
30 ktime_t moffs;
29 wait_queue_head_t wqh; 31 wait_queue_head_t wqh;
30 u64 ticks; 32 u64 ticks;
31 int expired; 33 int expired;
32 int clockid; 34 int clockid;
35 struct rcu_head rcu;
36 struct list_head clist;
37 bool might_cancel;
33}; 38};
34 39
40static LIST_HEAD(cancel_list);
41static DEFINE_SPINLOCK(cancel_lock);
42
35/* 43/*
36 * This gets called when the timer event triggers. We set the "expired" 44 * This gets called when the timer event triggers. We set the "expired"
37 * flag, but we do not re-arm the timer (in case it's necessary, 45 * flag, but we do not re-arm the timer (in case it's necessary,
@@ -51,6 +59,63 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
51 return HRTIMER_NORESTART; 59 return HRTIMER_NORESTART;
52} 60}
53 61
62/*
63 * Called when the clock was set to cancel the timers in the cancel
64 * list.
65 */
66void timerfd_clock_was_set(void)
67{
68 ktime_t moffs = ktime_get_monotonic_offset();
69 struct timerfd_ctx *ctx;
70 unsigned long flags;
71
72 rcu_read_lock();
73 list_for_each_entry_rcu(ctx, &cancel_list, clist) {
74 if (!ctx->might_cancel)
75 continue;
76 spin_lock_irqsave(&ctx->wqh.lock, flags);
77 if (ctx->moffs.tv64 != moffs.tv64) {
78 ctx->moffs.tv64 = KTIME_MAX;
79 wake_up_locked(&ctx->wqh);
80 }
81 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
82 }
83 rcu_read_unlock();
84}
85
86static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
87{
88 if (ctx->might_cancel) {
89 ctx->might_cancel = false;
90 spin_lock(&cancel_lock);
91 list_del_rcu(&ctx->clist);
92 spin_unlock(&cancel_lock);
93 }
94}
95
96static bool timerfd_canceled(struct timerfd_ctx *ctx)
97{
98 if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
99 return false;
100 ctx->moffs = ktime_get_monotonic_offset();
101 return true;
102}
103
104static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
105{
106 if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) &&
107 (flags & TFD_TIMER_CANCEL_ON_SET)) {
108 if (!ctx->might_cancel) {
109 ctx->might_cancel = true;
110 spin_lock(&cancel_lock);
111 list_add_rcu(&ctx->clist, &cancel_list);
112 spin_unlock(&cancel_lock);
113 }
114 } else if (ctx->might_cancel) {
115 timerfd_remove_cancel(ctx);
116 }
117}
118
54static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) 119static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
55{ 120{
56 ktime_t remaining; 121 ktime_t remaining;
@@ -59,11 +124,12 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
59 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; 124 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
60} 125}
61 126
62static void timerfd_setup(struct timerfd_ctx *ctx, int flags, 127static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
63 const struct itimerspec *ktmr) 128 const struct itimerspec *ktmr)
64{ 129{
65 enum hrtimer_mode htmode; 130 enum hrtimer_mode htmode;
66 ktime_t texp; 131 ktime_t texp;
132 int clockid = ctx->clockid;
67 133
68 htmode = (flags & TFD_TIMER_ABSTIME) ? 134 htmode = (flags & TFD_TIMER_ABSTIME) ?
69 HRTIMER_MODE_ABS: HRTIMER_MODE_REL; 135 HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
@@ -72,19 +138,24 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
72 ctx->expired = 0; 138 ctx->expired = 0;
73 ctx->ticks = 0; 139 ctx->ticks = 0;
74 ctx->tintv = timespec_to_ktime(ktmr->it_interval); 140 ctx->tintv = timespec_to_ktime(ktmr->it_interval);
75 hrtimer_init(&ctx->tmr, ctx->clockid, htmode); 141 hrtimer_init(&ctx->tmr, clockid, htmode);
76 hrtimer_set_expires(&ctx->tmr, texp); 142 hrtimer_set_expires(&ctx->tmr, texp);
77 ctx->tmr.function = timerfd_tmrproc; 143 ctx->tmr.function = timerfd_tmrproc;
78 if (texp.tv64 != 0) 144 if (texp.tv64 != 0) {
79 hrtimer_start(&ctx->tmr, texp, htmode); 145 hrtimer_start(&ctx->tmr, texp, htmode);
146 if (timerfd_canceled(ctx))
147 return -ECANCELED;
148 }
149 return 0;
80} 150}
81 151
82static int timerfd_release(struct inode *inode, struct file *file) 152static int timerfd_release(struct inode *inode, struct file *file)
83{ 153{
84 struct timerfd_ctx *ctx = file->private_data; 154 struct timerfd_ctx *ctx = file->private_data;
85 155
156 timerfd_remove_cancel(ctx);
86 hrtimer_cancel(&ctx->tmr); 157 hrtimer_cancel(&ctx->tmr);
87 kfree(ctx); 158 kfree_rcu(ctx, rcu);
88 return 0; 159 return 0;
89} 160}
90 161
@@ -118,8 +189,21 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
118 res = -EAGAIN; 189 res = -EAGAIN;
119 else 190 else
120 res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); 191 res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
192
193 /*
194 * If clock has changed, we do not care about the
195 * ticks and we do not rearm the timer. Userspace must
196 * reevaluate anyway.
197 */
198 if (timerfd_canceled(ctx)) {
199 ctx->ticks = 0;
200 ctx->expired = 0;
201 res = -ECANCELED;
202 }
203
121 if (ctx->ticks) { 204 if (ctx->ticks) {
122 ticks = ctx->ticks; 205 ticks = ctx->ticks;
206
123 if (ctx->expired && ctx->tintv.tv64) { 207 if (ctx->expired && ctx->tintv.tv64) {
124 /* 208 /*
125 * If tintv.tv64 != 0, this is a periodic timer that 209 * If tintv.tv64 != 0, this is a periodic timer that
@@ -183,6 +267,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
183 init_waitqueue_head(&ctx->wqh); 267 init_waitqueue_head(&ctx->wqh);
184 ctx->clockid = clockid; 268 ctx->clockid = clockid;
185 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 269 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
270 ctx->moffs = ktime_get_monotonic_offset();
186 271
187 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, 272 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
188 O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); 273 O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
@@ -199,6 +284,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
199 struct file *file; 284 struct file *file;
200 struct timerfd_ctx *ctx; 285 struct timerfd_ctx *ctx;
201 struct itimerspec ktmr, kotmr; 286 struct itimerspec ktmr, kotmr;
287 int ret;
202 288
203 if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) 289 if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
204 return -EFAULT; 290 return -EFAULT;
@@ -213,6 +299,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
213 return PTR_ERR(file); 299 return PTR_ERR(file);
214 ctx = file->private_data; 300 ctx = file->private_data;
215 301
302 timerfd_setup_cancel(ctx, flags);
303
216 /* 304 /*
217 * We need to stop the existing timer before reprogramming 305 * We need to stop the existing timer before reprogramming
218 * it to the new values. 306 * it to the new values.
@@ -240,14 +328,14 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
240 /* 328 /*
241 * Re-program the timer to the new value ... 329 * Re-program the timer to the new value ...
242 */ 330 */
243 timerfd_setup(ctx, flags, &ktmr); 331 ret = timerfd_setup(ctx, flags, &ktmr);
244 332
245 spin_unlock_irq(&ctx->wqh.lock); 333 spin_unlock_irq(&ctx->wqh.lock);
246 fput(file); 334 fput(file);
247 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) 335 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
248 return -EFAULT; 336 return -EFAULT;
249 337
250 return 0; 338 return ret;
251} 339}
252 340
253SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) 341SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 8b3a7da531eb..315de66e52b2 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -106,7 +106,7 @@ static long long get_liability(struct ubifs_info *c)
106 long long liab; 106 long long liab;
107 107
108 spin_lock(&c->space_lock); 108 spin_lock(&c->space_lock);
109 liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth; 109 liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth;
110 spin_unlock(&c->space_lock); 110 spin_unlock(&c->space_lock);
111 return liab; 111 return liab;
112} 112}
@@ -180,7 +180,7 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
180 int idx_lebs; 180 int idx_lebs;
181 long long idx_size; 181 long long idx_size;
182 182
183 idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; 183 idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx;
184 /* And make sure we have thrice the index size of space reserved */ 184 /* And make sure we have thrice the index size of space reserved */
185 idx_size += idx_size << 1; 185 idx_size += idx_size << 1;
186 /* 186 /*
@@ -292,13 +292,13 @@ static int can_use_rp(struct ubifs_info *c)
292 * budgeted index space to the size of the current index, multiplies this by 3, 292 * budgeted index space to the size of the current index, multiplies this by 3,
293 * and makes sure this does not exceed the amount of free LEBs. 293 * and makes sure this does not exceed the amount of free LEBs.
294 * 294 *
295 * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: 295 * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables:
296 * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might 296 * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
297 * be large, because UBIFS does not do any index consolidation as long as 297 * be large, because UBIFS does not do any index consolidation as long as
298 * there is free space. IOW, the index may take a lot of LEBs, but the LEBs 298 * there is free space. IOW, the index may take a lot of LEBs, but the LEBs
299 * will contain a lot of dirt. 299 * will contain a lot of dirt.
300 * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW, 300 * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW,
301 * the index may be consolidated to take up to @c->min_idx_lebs LEBs. 301 * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs.
302 * 302 *
303 * This function returns zero in case of success, and %-ENOSPC in case of 303 * This function returns zero in case of success, and %-ENOSPC in case of
304 * failure. 304 * failure.
@@ -343,13 +343,13 @@ static int do_budget_space(struct ubifs_info *c)
343 c->lst.taken_empty_lebs; 343 c->lst.taken_empty_lebs;
344 if (unlikely(rsvd_idx_lebs > lebs)) { 344 if (unlikely(rsvd_idx_lebs > lebs)) {
345 dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " 345 dbg_budg("out of indexing space: min_idx_lebs %d (old %d), "
346 "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs, 346 "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs,
347 rsvd_idx_lebs); 347 rsvd_idx_lebs);
348 return -ENOSPC; 348 return -ENOSPC;
349 } 349 }
350 350
351 available = ubifs_calc_available(c, min_idx_lebs); 351 available = ubifs_calc_available(c, min_idx_lebs);
352 outstanding = c->budg_data_growth + c->budg_dd_growth; 352 outstanding = c->bi.data_growth + c->bi.dd_growth;
353 353
354 if (unlikely(available < outstanding)) { 354 if (unlikely(available < outstanding)) {
355 dbg_budg("out of data space: available %lld, outstanding %lld", 355 dbg_budg("out of data space: available %lld, outstanding %lld",
@@ -360,7 +360,7 @@ static int do_budget_space(struct ubifs_info *c)
360 if (available - outstanding <= c->rp_size && !can_use_rp(c)) 360 if (available - outstanding <= c->rp_size && !can_use_rp(c))
361 return -ENOSPC; 361 return -ENOSPC;
362 362
363 c->min_idx_lebs = min_idx_lebs; 363 c->bi.min_idx_lebs = min_idx_lebs;
364 return 0; 364 return 0;
365} 365}
366 366
@@ -393,11 +393,11 @@ static int calc_data_growth(const struct ubifs_info *c,
393{ 393{
394 int data_growth; 394 int data_growth;
395 395
396 data_growth = req->new_ino ? c->inode_budget : 0; 396 data_growth = req->new_ino ? c->bi.inode_budget : 0;
397 if (req->new_page) 397 if (req->new_page)
398 data_growth += c->page_budget; 398 data_growth += c->bi.page_budget;
399 if (req->new_dent) 399 if (req->new_dent)
400 data_growth += c->dent_budget; 400 data_growth += c->bi.dent_budget;
401 data_growth += req->new_ino_d; 401 data_growth += req->new_ino_d;
402 return data_growth; 402 return data_growth;
403} 403}
@@ -413,12 +413,12 @@ static int calc_dd_growth(const struct ubifs_info *c,
413{ 413{
414 int dd_growth; 414 int dd_growth;
415 415
416 dd_growth = req->dirtied_page ? c->page_budget : 0; 416 dd_growth = req->dirtied_page ? c->bi.page_budget : 0;
417 417
418 if (req->dirtied_ino) 418 if (req->dirtied_ino)
419 dd_growth += c->inode_budget << (req->dirtied_ino - 1); 419 dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1);
420 if (req->mod_dent) 420 if (req->mod_dent)
421 dd_growth += c->dent_budget; 421 dd_growth += c->bi.dent_budget;
422 dd_growth += req->dirtied_ino_d; 422 dd_growth += req->dirtied_ino_d;
423 return dd_growth; 423 return dd_growth;
424} 424}
@@ -460,19 +460,19 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
460 460
461again: 461again:
462 spin_lock(&c->space_lock); 462 spin_lock(&c->space_lock);
463 ubifs_assert(c->budg_idx_growth >= 0); 463 ubifs_assert(c->bi.idx_growth >= 0);
464 ubifs_assert(c->budg_data_growth >= 0); 464 ubifs_assert(c->bi.data_growth >= 0);
465 ubifs_assert(c->budg_dd_growth >= 0); 465 ubifs_assert(c->bi.dd_growth >= 0);
466 466
467 if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) { 467 if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) {
468 dbg_budg("no space"); 468 dbg_budg("no space");
469 spin_unlock(&c->space_lock); 469 spin_unlock(&c->space_lock);
470 return -ENOSPC; 470 return -ENOSPC;
471 } 471 }
472 472
473 c->budg_idx_growth += idx_growth; 473 c->bi.idx_growth += idx_growth;
474 c->budg_data_growth += data_growth; 474 c->bi.data_growth += data_growth;
475 c->budg_dd_growth += dd_growth; 475 c->bi.dd_growth += dd_growth;
476 476
477 err = do_budget_space(c); 477 err = do_budget_space(c);
478 if (likely(!err)) { 478 if (likely(!err)) {
@@ -484,9 +484,9 @@ again:
484 } 484 }
485 485
486 /* Restore the old values */ 486 /* Restore the old values */
487 c->budg_idx_growth -= idx_growth; 487 c->bi.idx_growth -= idx_growth;
488 c->budg_data_growth -= data_growth; 488 c->bi.data_growth -= data_growth;
489 c->budg_dd_growth -= dd_growth; 489 c->bi.dd_growth -= dd_growth;
490 spin_unlock(&c->space_lock); 490 spin_unlock(&c->space_lock);
491 491
492 if (req->fast) { 492 if (req->fast) {
@@ -506,9 +506,9 @@ again:
506 goto again; 506 goto again;
507 } 507 }
508 dbg_budg("FS is full, -ENOSPC"); 508 dbg_budg("FS is full, -ENOSPC");
509 c->nospace = 1; 509 c->bi.nospace = 1;
510 if (can_use_rp(c) || c->rp_size == 0) 510 if (can_use_rp(c) || c->rp_size == 0)
511 c->nospace_rp = 1; 511 c->bi.nospace_rp = 1;
512 smp_wmb(); 512 smp_wmb();
513 } else 513 } else
514 ubifs_err("cannot budget space, error %d", err); 514 ubifs_err("cannot budget space, error %d", err);
@@ -523,8 +523,8 @@ again:
523 * This function releases the space budgeted by 'ubifs_budget_space()'. Note, 523 * This function releases the space budgeted by 'ubifs_budget_space()'. Note,
524 * since the index changes (which were budgeted for in @req->idx_growth) will 524 * since the index changes (which were budgeted for in @req->idx_growth) will
525 * only be written to the media on commit, this function moves the index budget 525 * only be written to the media on commit, this function moves the index budget
526 * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be 526 * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed
527 * zeroed by the commit operation. 527 * by the commit operation.
528 */ 528 */
529void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) 529void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
530{ 530{
@@ -553,23 +553,23 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
553 if (!req->data_growth && !req->dd_growth) 553 if (!req->data_growth && !req->dd_growth)
554 return; 554 return;
555 555
556 c->nospace = c->nospace_rp = 0; 556 c->bi.nospace = c->bi.nospace_rp = 0;
557 smp_wmb(); 557 smp_wmb();
558 558
559 spin_lock(&c->space_lock); 559 spin_lock(&c->space_lock);
560 c->budg_idx_growth -= req->idx_growth; 560 c->bi.idx_growth -= req->idx_growth;
561 c->budg_uncommitted_idx += req->idx_growth; 561 c->bi.uncommitted_idx += req->idx_growth;
562 c->budg_data_growth -= req->data_growth; 562 c->bi.data_growth -= req->data_growth;
563 c->budg_dd_growth -= req->dd_growth; 563 c->bi.dd_growth -= req->dd_growth;
564 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 564 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
565 565
566 ubifs_assert(c->budg_idx_growth >= 0); 566 ubifs_assert(c->bi.idx_growth >= 0);
567 ubifs_assert(c->budg_data_growth >= 0); 567 ubifs_assert(c->bi.data_growth >= 0);
568 ubifs_assert(c->budg_dd_growth >= 0); 568 ubifs_assert(c->bi.dd_growth >= 0);
569 ubifs_assert(c->min_idx_lebs < c->main_lebs); 569 ubifs_assert(c->bi.min_idx_lebs < c->main_lebs);
570 ubifs_assert(!(c->budg_idx_growth & 7)); 570 ubifs_assert(!(c->bi.idx_growth & 7));
571 ubifs_assert(!(c->budg_data_growth & 7)); 571 ubifs_assert(!(c->bi.data_growth & 7));
572 ubifs_assert(!(c->budg_dd_growth & 7)); 572 ubifs_assert(!(c->bi.dd_growth & 7));
573 spin_unlock(&c->space_lock); 573 spin_unlock(&c->space_lock);
574} 574}
575 575
@@ -586,13 +586,13 @@ void ubifs_convert_page_budget(struct ubifs_info *c)
586{ 586{
587 spin_lock(&c->space_lock); 587 spin_lock(&c->space_lock);
588 /* Release the index growth reservation */ 588 /* Release the index growth reservation */
589 c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; 589 c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
590 /* Release the data growth reservation */ 590 /* Release the data growth reservation */
591 c->budg_data_growth -= c->page_budget; 591 c->bi.data_growth -= c->bi.page_budget;
592 /* Increase the dirty data growth reservation instead */ 592 /* Increase the dirty data growth reservation instead */
593 c->budg_dd_growth += c->page_budget; 593 c->bi.dd_growth += c->bi.page_budget;
594 /* And re-calculate the indexing space reservation */ 594 /* And re-calculate the indexing space reservation */
595 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 595 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
596 spin_unlock(&c->space_lock); 596 spin_unlock(&c->space_lock);
597} 597}
598 598
@@ -612,7 +612,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
612 612
613 memset(&req, 0, sizeof(struct ubifs_budget_req)); 613 memset(&req, 0, sizeof(struct ubifs_budget_req));
614 /* The "no space" flags will be cleared because dd_growth is > 0 */ 614 /* The "no space" flags will be cleared because dd_growth is > 0 */
615 req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); 615 req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8);
616 ubifs_release_budget(c, &req); 616 ubifs_release_budget(c, &req);
617} 617}
618 618
@@ -682,9 +682,9 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c)
682 int rsvd_idx_lebs, lebs; 682 int rsvd_idx_lebs, lebs;
683 long long available, outstanding, free; 683 long long available, outstanding, free;
684 684
685 ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); 685 ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
686 outstanding = c->budg_data_growth + c->budg_dd_growth; 686 outstanding = c->bi.data_growth + c->bi.dd_growth;
687 available = ubifs_calc_available(c, c->min_idx_lebs); 687 available = ubifs_calc_available(c, c->bi.min_idx_lebs);
688 688
689 /* 689 /*
690 * When reporting free space to user-space, UBIFS guarantees that it is 690 * When reporting free space to user-space, UBIFS guarantees that it is
@@ -697,8 +697,8 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c)
697 * Note, the calculations below are similar to what we have in 697 * Note, the calculations below are similar to what we have in
698 * 'do_budget_space()', so refer there for comments. 698 * 'do_budget_space()', so refer there for comments.
699 */ 699 */
700 if (c->min_idx_lebs > c->lst.idx_lebs) 700 if (c->bi.min_idx_lebs > c->lst.idx_lebs)
701 rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; 701 rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
702 else 702 else
703 rsvd_idx_lebs = 0; 703 rsvd_idx_lebs = 0;
704 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - 704 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 1bd01ded7123..87cd0ead8633 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -182,7 +182,7 @@ static int do_commit(struct ubifs_info *c)
182 c->mst_node->root_len = cpu_to_le32(zroot.len); 182 c->mst_node->root_len = cpu_to_le32(zroot.len);
183 c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); 183 c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum);
184 c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); 184 c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs);
185 c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); 185 c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz);
186 c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); 186 c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum);
187 c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); 187 c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs);
188 c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); 188 c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 004d3745dc45..0bb2bcef0de9 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -34,7 +34,6 @@
34#include <linux/moduleparam.h> 34#include <linux/moduleparam.h>
35#include <linux/debugfs.h> 35#include <linux/debugfs.h>
36#include <linux/math64.h> 36#include <linux/math64.h>
37#include <linux/slab.h>
38 37
39#ifdef CONFIG_UBIFS_FS_DEBUG 38#ifdef CONFIG_UBIFS_FS_DEBUG
40 39
@@ -43,15 +42,12 @@ DEFINE_SPINLOCK(dbg_lock);
43static char dbg_key_buf0[128]; 42static char dbg_key_buf0[128];
44static char dbg_key_buf1[128]; 43static char dbg_key_buf1[128];
45 44
46unsigned int ubifs_msg_flags;
47unsigned int ubifs_chk_flags; 45unsigned int ubifs_chk_flags;
48unsigned int ubifs_tst_flags; 46unsigned int ubifs_tst_flags;
49 47
50module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR);
51module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); 48module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR);
52module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); 49module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR);
53 50
54MODULE_PARM_DESC(debug_msgs, "Debug message type flags");
55MODULE_PARM_DESC(debug_chks, "Debug check flags"); 51MODULE_PARM_DESC(debug_chks, "Debug check flags");
56MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); 52MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
57 53
@@ -317,6 +313,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
317 printk(KERN_DEBUG "\tflags %#x\n", sup_flags); 313 printk(KERN_DEBUG "\tflags %#x\n", sup_flags);
318 printk(KERN_DEBUG "\t big_lpt %u\n", 314 printk(KERN_DEBUG "\t big_lpt %u\n",
319 !!(sup_flags & UBIFS_FLG_BIGLPT)); 315 !!(sup_flags & UBIFS_FLG_BIGLPT));
316 printk(KERN_DEBUG "\t space_fixup %u\n",
317 !!(sup_flags & UBIFS_FLG_SPACE_FIXUP));
320 printk(KERN_DEBUG "\tmin_io_size %u\n", 318 printk(KERN_DEBUG "\tmin_io_size %u\n",
321 le32_to_cpu(sup->min_io_size)); 319 le32_to_cpu(sup->min_io_size));
322 printk(KERN_DEBUG "\tleb_size %u\n", 320 printk(KERN_DEBUG "\tleb_size %u\n",
@@ -602,7 +600,7 @@ void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
602 spin_unlock(&dbg_lock); 600 spin_unlock(&dbg_lock);
603} 601}
604 602
605void dbg_dump_budg(struct ubifs_info *c) 603void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
606{ 604{
607 int i; 605 int i;
608 struct rb_node *rb; 606 struct rb_node *rb;
@@ -610,26 +608,42 @@ void dbg_dump_budg(struct ubifs_info *c)
610 struct ubifs_gced_idx_leb *idx_gc; 608 struct ubifs_gced_idx_leb *idx_gc;
611 long long available, outstanding, free; 609 long long available, outstanding, free;
612 610
613 ubifs_assert(spin_is_locked(&c->space_lock)); 611 spin_lock(&c->space_lock);
614 spin_lock(&dbg_lock); 612 spin_lock(&dbg_lock);
615 printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " 613 printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, "
616 "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, 614 "total budget sum %lld\n", current->pid,
617 c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); 615 bi->data_growth + bi->dd_growth,
618 printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " 616 bi->data_growth + bi->dd_growth + bi->idx_growth);
619 "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, 617 printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, "
620 c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth, 618 "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth,
621 c->freeable_cnt); 619 bi->idx_growth);
622 printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " 620 printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, "
623 "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs, 621 "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz,
624 c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); 622 bi->uncommitted_idx);
623 printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n",
624 bi->page_budget, bi->inode_budget, bi->dent_budget);
625 printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n",
626 bi->nospace, bi->nospace_rp);
627 printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
628 c->dark_wm, c->dead_wm, c->max_idx_node_sz);
629
630 if (bi != &c->bi)
631 /*
632 * If we are dumping saved budgeting data, do not print
633 * additional information which is about the current state, not
634 * the old one which corresponded to the saved budgeting data.
635 */
636 goto out_unlock;
637
638 printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
639 c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt);
625 printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " 640 printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
626 "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), 641 "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt),
627 atomic_long_read(&c->dirty_zn_cnt), 642 atomic_long_read(&c->dirty_zn_cnt),
628 atomic_long_read(&c->clean_zn_cnt)); 643 atomic_long_read(&c->clean_zn_cnt));
629 printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
630 c->dark_wm, c->dead_wm, c->max_idx_node_sz);
631 printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", 644 printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n",
632 c->gc_lnum, c->ihead_lnum); 645 c->gc_lnum, c->ihead_lnum);
646
633 /* If we are in R/O mode, journal heads do not exist */ 647 /* If we are in R/O mode, journal heads do not exist */
634 if (c->jheads) 648 if (c->jheads)
635 for (i = 0; i < c->jhead_cnt; i++) 649 for (i = 0; i < c->jhead_cnt; i++)
@@ -648,13 +662,15 @@ void dbg_dump_budg(struct ubifs_info *c)
648 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); 662 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state);
649 663
650 /* Print budgeting predictions */ 664 /* Print budgeting predictions */
651 available = ubifs_calc_available(c, c->min_idx_lebs); 665 available = ubifs_calc_available(c, c->bi.min_idx_lebs);
652 outstanding = c->budg_data_growth + c->budg_dd_growth; 666 outstanding = c->bi.data_growth + c->bi.dd_growth;
653 free = ubifs_get_free_space_nolock(c); 667 free = ubifs_get_free_space_nolock(c);
654 printk(KERN_DEBUG "Budgeting predictions:\n"); 668 printk(KERN_DEBUG "Budgeting predictions:\n");
655 printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", 669 printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n",
656 available, outstanding, free); 670 available, outstanding, free);
671out_unlock:
657 spin_unlock(&dbg_lock); 672 spin_unlock(&dbg_lock);
673 spin_unlock(&c->space_lock);
658} 674}
659 675
660void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) 676void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
@@ -729,7 +745,13 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
729 if (bud->lnum == lp->lnum) { 745 if (bud->lnum == lp->lnum) {
730 int head = 0; 746 int head = 0;
731 for (i = 0; i < c->jhead_cnt; i++) { 747 for (i = 0; i < c->jhead_cnt; i++) {
732 if (lp->lnum == c->jheads[i].wbuf.lnum) { 748 /*
749 * Note, if we are in R/O mode or in the middle
750 * of mounting/re-mounting, the write-buffers do
751 * not exist.
752 */
753 if (c->jheads &&
754 lp->lnum == c->jheads[i].wbuf.lnum) {
733 printk(KERN_CONT ", jhead %s", 755 printk(KERN_CONT ", jhead %s",
734 dbg_jhead(i)); 756 dbg_jhead(i));
735 head = 1; 757 head = 1;
@@ -976,6 +998,8 @@ void dbg_save_space_info(struct ubifs_info *c)
976 998
977 spin_lock(&c->space_lock); 999 spin_lock(&c->space_lock);
978 memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats)); 1000 memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats));
1001 memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info));
1002 d->saved_idx_gc_cnt = c->idx_gc_cnt;
979 1003
980 /* 1004 /*
981 * We use a dirty hack here and zero out @c->freeable_cnt, because it 1005 * We use a dirty hack here and zero out @c->freeable_cnt, because it
@@ -1042,14 +1066,14 @@ int dbg_check_space_info(struct ubifs_info *c)
1042out: 1066out:
1043 ubifs_msg("saved lprops statistics dump"); 1067 ubifs_msg("saved lprops statistics dump");
1044 dbg_dump_lstats(&d->saved_lst); 1068 dbg_dump_lstats(&d->saved_lst);
1045 ubifs_get_lp_stats(c, &lst); 1069 ubifs_msg("saved budgeting info dump");
1046 1070 dbg_dump_budg(c, &d->saved_bi);
1071 ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt);
1047 ubifs_msg("current lprops statistics dump"); 1072 ubifs_msg("current lprops statistics dump");
1073 ubifs_get_lp_stats(c, &lst);
1048 dbg_dump_lstats(&lst); 1074 dbg_dump_lstats(&lst);
1049 1075 ubifs_msg("current budgeting info dump");
1050 spin_lock(&c->space_lock); 1076 dbg_dump_budg(c, &c->bi);
1051 dbg_dump_budg(c);
1052 spin_unlock(&c->space_lock);
1053 dump_stack(); 1077 dump_stack();
1054 return -EINVAL; 1078 return -EINVAL;
1055} 1079}
@@ -1793,6 +1817,8 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
1793 struct rb_node **p, *parent = NULL; 1817 struct rb_node **p, *parent = NULL;
1794 struct fsck_inode *fscki; 1818 struct fsck_inode *fscki;
1795 ino_t inum = key_inum_flash(c, &ino->key); 1819 ino_t inum = key_inum_flash(c, &ino->key);
1820 struct inode *inode;
1821 struct ubifs_inode *ui;
1796 1822
1797 p = &fsckd->inodes.rb_node; 1823 p = &fsckd->inodes.rb_node;
1798 while (*p) { 1824 while (*p) {
@@ -1816,19 +1842,46 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
1816 if (!fscki) 1842 if (!fscki)
1817 return ERR_PTR(-ENOMEM); 1843 return ERR_PTR(-ENOMEM);
1818 1844
1845 inode = ilookup(c->vfs_sb, inum);
1846
1819 fscki->inum = inum; 1847 fscki->inum = inum;
1820 fscki->nlink = le32_to_cpu(ino->nlink); 1848 /*
1821 fscki->size = le64_to_cpu(ino->size); 1849 * If the inode is present in the VFS inode cache, use it instead of
1822 fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); 1850 * the on-flash inode which might be out-of-date. E.g., the size might
1823 fscki->xattr_sz = le32_to_cpu(ino->xattr_size); 1851 * be out-of-date. If we do not do this, the following may happen, for
1824 fscki->xattr_nms = le32_to_cpu(ino->xattr_names); 1852 * example:
1825 fscki->mode = le32_to_cpu(ino->mode); 1853 * 1. A power cut happens
1854 * 2. We mount the file-system R/O, the replay process fixes up the
1855 * inode size in the VFS cache, but on on-flash.
1856 * 3. 'check_leaf()' fails because it hits a data node beyond inode
1857 * size.
1858 */
1859 if (!inode) {
1860 fscki->nlink = le32_to_cpu(ino->nlink);
1861 fscki->size = le64_to_cpu(ino->size);
1862 fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
1863 fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
1864 fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
1865 fscki->mode = le32_to_cpu(ino->mode);
1866 } else {
1867 ui = ubifs_inode(inode);
1868 fscki->nlink = inode->i_nlink;
1869 fscki->size = inode->i_size;
1870 fscki->xattr_cnt = ui->xattr_cnt;
1871 fscki->xattr_sz = ui->xattr_size;
1872 fscki->xattr_nms = ui->xattr_names;
1873 fscki->mode = inode->i_mode;
1874 iput(inode);
1875 }
1876
1826 if (S_ISDIR(fscki->mode)) { 1877 if (S_ISDIR(fscki->mode)) {
1827 fscki->calc_sz = UBIFS_INO_NODE_SZ; 1878 fscki->calc_sz = UBIFS_INO_NODE_SZ;
1828 fscki->calc_cnt = 2; 1879 fscki->calc_cnt = 2;
1829 } 1880 }
1881
1830 rb_link_node(&fscki->rb, parent, p); 1882 rb_link_node(&fscki->rb, parent, p);
1831 rb_insert_color(&fscki->rb, &fsckd->inodes); 1883 rb_insert_color(&fscki->rb, &fsckd->inodes);
1884
1832 return fscki; 1885 return fscki;
1833} 1886}
1834 1887
@@ -2421,7 +2474,8 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
2421 hashb = key_block(c, &sb->key); 2474 hashb = key_block(c, &sb->key);
2422 2475
2423 if (hasha > hashb) { 2476 if (hasha > hashb) {
2424 ubifs_err("larger hash %u goes before %u", hasha, hashb); 2477 ubifs_err("larger hash %u goes before %u",
2478 hasha, hashb);
2425 goto error_dump; 2479 goto error_dump;
2426 } 2480 }
2427 } 2481 }
@@ -2437,14 +2491,12 @@ error_dump:
2437 return 0; 2491 return 0;
2438} 2492}
2439 2493
2440static int invocation_cnt;
2441
2442int dbg_force_in_the_gaps(void) 2494int dbg_force_in_the_gaps(void)
2443{ 2495{
2444 if (!dbg_force_in_the_gaps_enabled) 2496 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
2445 return 0; 2497 return 0;
2446 /* Force in-the-gaps every 8th commit */ 2498
2447 return !((invocation_cnt++) & 0x7); 2499 return !(random32() & 7);
2448} 2500}
2449 2501
2450/* Failure mode for recovery testing */ 2502/* Failure mode for recovery testing */
@@ -2632,7 +2684,7 @@ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
2632 int len, int check) 2684 int len, int check)
2633{ 2685{
2634 if (in_failure_mode(desc)) 2686 if (in_failure_mode(desc))
2635 return -EIO; 2687 return -EROFS;
2636 return ubi_leb_read(desc, lnum, buf, offset, len, check); 2688 return ubi_leb_read(desc, lnum, buf, offset, len, check);
2637} 2689}
2638 2690
@@ -2642,7 +2694,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
2642 int err, failing; 2694 int err, failing;
2643 2695
2644 if (in_failure_mode(desc)) 2696 if (in_failure_mode(desc))
2645 return -EIO; 2697 return -EROFS;
2646 failing = do_fail(desc, lnum, 1); 2698 failing = do_fail(desc, lnum, 1);
2647 if (failing) 2699 if (failing)
2648 cut_data(buf, len); 2700 cut_data(buf, len);
@@ -2650,7 +2702,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
2650 if (err) 2702 if (err)
2651 return err; 2703 return err;
2652 if (failing) 2704 if (failing)
2653 return -EIO; 2705 return -EROFS;
2654 return 0; 2706 return 0;
2655} 2707}
2656 2708
@@ -2660,12 +2712,12 @@ int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
2660 int err; 2712 int err;
2661 2713
2662 if (do_fail(desc, lnum, 1)) 2714 if (do_fail(desc, lnum, 1))
2663 return -EIO; 2715 return -EROFS;
2664 err = ubi_leb_change(desc, lnum, buf, len, dtype); 2716 err = ubi_leb_change(desc, lnum, buf, len, dtype);
2665 if (err) 2717 if (err)
2666 return err; 2718 return err;
2667 if (do_fail(desc, lnum, 1)) 2719 if (do_fail(desc, lnum, 1))
2668 return -EIO; 2720 return -EROFS;
2669 return 0; 2721 return 0;
2670} 2722}
2671 2723
@@ -2674,12 +2726,12 @@ int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum)
2674 int err; 2726 int err;
2675 2727
2676 if (do_fail(desc, lnum, 0)) 2728 if (do_fail(desc, lnum, 0))
2677 return -EIO; 2729 return -EROFS;
2678 err = ubi_leb_erase(desc, lnum); 2730 err = ubi_leb_erase(desc, lnum);
2679 if (err) 2731 if (err)
2680 return err; 2732 return err;
2681 if (do_fail(desc, lnum, 0)) 2733 if (do_fail(desc, lnum, 0))
2682 return -EIO; 2734 return -EROFS;
2683 return 0; 2735 return 0;
2684} 2736}
2685 2737
@@ -2688,19 +2740,19 @@ int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum)
2688 int err; 2740 int err;
2689 2741
2690 if (do_fail(desc, lnum, 0)) 2742 if (do_fail(desc, lnum, 0))
2691 return -EIO; 2743 return -EROFS;
2692 err = ubi_leb_unmap(desc, lnum); 2744 err = ubi_leb_unmap(desc, lnum);
2693 if (err) 2745 if (err)
2694 return err; 2746 return err;
2695 if (do_fail(desc, lnum, 0)) 2747 if (do_fail(desc, lnum, 0))
2696 return -EIO; 2748 return -EROFS;
2697 return 0; 2749 return 0;
2698} 2750}
2699 2751
2700int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) 2752int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum)
2701{ 2753{
2702 if (in_failure_mode(desc)) 2754 if (in_failure_mode(desc))
2703 return -EIO; 2755 return -EROFS;
2704 return ubi_is_mapped(desc, lnum); 2756 return ubi_is_mapped(desc, lnum);
2705} 2757}
2706 2758
@@ -2709,12 +2761,12 @@ int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
2709 int err; 2761 int err;
2710 2762
2711 if (do_fail(desc, lnum, 0)) 2763 if (do_fail(desc, lnum, 0))
2712 return -EIO; 2764 return -EROFS;
2713 err = ubi_leb_map(desc, lnum, dtype); 2765 err = ubi_leb_map(desc, lnum, dtype);
2714 if (err) 2766 if (err)
2715 return err; 2767 return err;
2716 if (do_fail(desc, lnum, 0)) 2768 if (do_fail(desc, lnum, 0))
2717 return -EIO; 2769 return -EROFS;
2718 return 0; 2770 return 0;
2719} 2771}
2720 2772
@@ -2784,7 +2836,7 @@ void dbg_debugfs_exit(void)
2784static int open_debugfs_file(struct inode *inode, struct file *file) 2836static int open_debugfs_file(struct inode *inode, struct file *file)
2785{ 2837{
2786 file->private_data = inode->i_private; 2838 file->private_data = inode->i_private;
2787 return 0; 2839 return nonseekable_open(inode, file);
2788} 2840}
2789 2841
2790static ssize_t write_debugfs_file(struct file *file, const char __user *buf, 2842static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
@@ -2795,18 +2847,15 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
2795 2847
2796 if (file->f_path.dentry == d->dfs_dump_lprops) 2848 if (file->f_path.dentry == d->dfs_dump_lprops)
2797 dbg_dump_lprops(c); 2849 dbg_dump_lprops(c);
2798 else if (file->f_path.dentry == d->dfs_dump_budg) { 2850 else if (file->f_path.dentry == d->dfs_dump_budg)
2799 spin_lock(&c->space_lock); 2851 dbg_dump_budg(c, &c->bi);
2800 dbg_dump_budg(c); 2852 else if (file->f_path.dentry == d->dfs_dump_tnc) {
2801 spin_unlock(&c->space_lock);
2802 } else if (file->f_path.dentry == d->dfs_dump_tnc) {
2803 mutex_lock(&c->tnc_mutex); 2853 mutex_lock(&c->tnc_mutex);
2804 dbg_dump_tnc(c); 2854 dbg_dump_tnc(c);
2805 mutex_unlock(&c->tnc_mutex); 2855 mutex_unlock(&c->tnc_mutex);
2806 } else 2856 } else
2807 return -EINVAL; 2857 return -EINVAL;
2808 2858
2809 *ppos += count;
2810 return count; 2859 return count;
2811} 2860}
2812 2861
@@ -2814,7 +2863,7 @@ static const struct file_operations dfs_fops = {
2814 .open = open_debugfs_file, 2863 .open = open_debugfs_file,
2815 .write = write_debugfs_file, 2864 .write = write_debugfs_file,
2816 .owner = THIS_MODULE, 2865 .owner = THIS_MODULE,
2817 .llseek = default_llseek, 2866 .llseek = no_llseek,
2818}; 2867};
2819 2868
2820/** 2869/**
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index e6493cac193d..a811ac4a26bb 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -31,6 +31,8 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
31 31
32#ifdef CONFIG_UBIFS_FS_DEBUG 32#ifdef CONFIG_UBIFS_FS_DEBUG
33 33
34#include <linux/random.h>
35
34/** 36/**
35 * ubifs_debug_info - per-FS debugging information. 37 * ubifs_debug_info - per-FS debugging information.
36 * @old_zroot: old index root - used by 'dbg_check_old_index()' 38 * @old_zroot: old index root - used by 'dbg_check_old_index()'
@@ -50,13 +52,15 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
50 * @new_ihead_offs: used by debugging to check @c->ihead_offs 52 * @new_ihead_offs: used by debugging to check @c->ihead_offs
51 * 53 *
52 * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') 54 * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()')
53 * @saved_free: saved free space (used by 'dbg_save_space_info()') 55 * @saved_bi: saved budgeting information
56 * @saved_free: saved amount of free space
57 * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt
54 * 58 *
55 * dfs_dir_name: name of debugfs directory containing this file-system's files 59 * @dfs_dir_name: name of debugfs directory containing this file-system's files
56 * dfs_dir: direntry object of the file-system debugfs directory 60 * @dfs_dir: direntry object of the file-system debugfs directory
57 * dfs_dump_lprops: "dump lprops" debugfs knob 61 * @dfs_dump_lprops: "dump lprops" debugfs knob
58 * dfs_dump_budg: "dump budgeting information" debugfs knob 62 * @dfs_dump_budg: "dump budgeting information" debugfs knob
59 * dfs_dump_tnc: "dump TNC" debugfs knob 63 * @dfs_dump_tnc: "dump TNC" debugfs knob
60 */ 64 */
61struct ubifs_debug_info { 65struct ubifs_debug_info {
62 struct ubifs_zbranch old_zroot; 66 struct ubifs_zbranch old_zroot;
@@ -76,7 +80,9 @@ struct ubifs_debug_info {
76 int new_ihead_offs; 80 int new_ihead_offs;
77 81
78 struct ubifs_lp_stats saved_lst; 82 struct ubifs_lp_stats saved_lst;
83 struct ubifs_budg_info saved_bi;
79 long long saved_free; 84 long long saved_free;
85 int saved_idx_gc_cnt;
80 86
81 char dfs_dir_name[100]; 87 char dfs_dir_name[100];
82 struct dentry *dfs_dir; 88 struct dentry *dfs_dir;
@@ -101,23 +107,7 @@ struct ubifs_debug_info {
101 } \ 107 } \
102} while (0) 108} while (0)
103 109
104#define dbg_dump_stack() do { \ 110#define dbg_dump_stack() dump_stack()
105 if (!dbg_failure_mode) \
106 dump_stack(); \
107} while (0)
108
109/* Generic debugging messages */
110#define dbg_msg(fmt, ...) do { \
111 spin_lock(&dbg_lock); \
112 printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \
113 __func__, ##__VA_ARGS__); \
114 spin_unlock(&dbg_lock); \
115} while (0)
116
117#define dbg_do_msg(typ, fmt, ...) do { \
118 if (ubifs_msg_flags & typ) \
119 dbg_msg(fmt, ##__VA_ARGS__); \
120} while (0)
121 111
122#define dbg_err(fmt, ...) do { \ 112#define dbg_err(fmt, ...) do { \
123 spin_lock(&dbg_lock); \ 113 spin_lock(&dbg_lock); \
@@ -137,77 +127,40 @@ const char *dbg_key_str1(const struct ubifs_info *c,
137#define DBGKEY(key) dbg_key_str0(c, (key)) 127#define DBGKEY(key) dbg_key_str0(c, (key))
138#define DBGKEY1(key) dbg_key_str1(c, (key)) 128#define DBGKEY1(key) dbg_key_str1(c, (key))
139 129
140/* General messages */ 130#define ubifs_dbg_msg(type, fmt, ...) do { \
141#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) 131 spin_lock(&dbg_lock); \
132 pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \
133 spin_unlock(&dbg_lock); \
134} while (0)
142 135
136/* Just a debugging messages not related to any specific UBIFS subsystem */
137#define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__)
138/* General messages */
139#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__)
143/* Additional journal messages */ 140/* Additional journal messages */
144#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) 141#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__)
145
146/* Additional TNC messages */ 142/* Additional TNC messages */
147#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) 143#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__)
148
149/* Additional lprops messages */ 144/* Additional lprops messages */
150#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) 145#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__)
151
152/* Additional LEB find messages */ 146/* Additional LEB find messages */
153#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) 147#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__)
154
155/* Additional mount messages */ 148/* Additional mount messages */
156#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) 149#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__)
157
158/* Additional I/O messages */ 150/* Additional I/O messages */
159#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) 151#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__)
160
161/* Additional commit messages */ 152/* Additional commit messages */
162#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) 153#define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__)
163
164/* Additional budgeting messages */ 154/* Additional budgeting messages */
165#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) 155#define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__)
166
167/* Additional log messages */ 156/* Additional log messages */
168#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) 157#define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__)
169
170/* Additional gc messages */ 158/* Additional gc messages */
171#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) 159#define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__)
172
173/* Additional scan messages */ 160/* Additional scan messages */
174#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) 161#define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__)
175
176/* Additional recovery messages */ 162/* Additional recovery messages */
177#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) 163#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
178
179/*
180 * Debugging message type flags.
181 *
182 * UBIFS_MSG_GEN: general messages
183 * UBIFS_MSG_JNL: journal messages
184 * UBIFS_MSG_MNT: mount messages
185 * UBIFS_MSG_CMT: commit messages
186 * UBIFS_MSG_FIND: LEB find messages
187 * UBIFS_MSG_BUDG: budgeting messages
188 * UBIFS_MSG_GC: garbage collection messages
189 * UBIFS_MSG_TNC: TNC messages
190 * UBIFS_MSG_LP: lprops messages
191 * UBIFS_MSG_IO: I/O messages
192 * UBIFS_MSG_LOG: log messages
193 * UBIFS_MSG_SCAN: scan messages
194 * UBIFS_MSG_RCVRY: recovery messages
195 */
196enum {
197 UBIFS_MSG_GEN = 0x1,
198 UBIFS_MSG_JNL = 0x2,
199 UBIFS_MSG_MNT = 0x4,
200 UBIFS_MSG_CMT = 0x8,
201 UBIFS_MSG_FIND = 0x10,
202 UBIFS_MSG_BUDG = 0x20,
203 UBIFS_MSG_GC = 0x40,
204 UBIFS_MSG_TNC = 0x80,
205 UBIFS_MSG_LP = 0x100,
206 UBIFS_MSG_IO = 0x200,
207 UBIFS_MSG_LOG = 0x400,
208 UBIFS_MSG_SCAN = 0x800,
209 UBIFS_MSG_RCVRY = 0x1000,
210};
211 164
212/* 165/*
213 * Debugging check flags. 166 * Debugging check flags.
@@ -233,11 +186,9 @@ enum {
233/* 186/*
234 * Special testing flags. 187 * Special testing flags.
235 * 188 *
236 * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method
237 * UBIFS_TST_RCVRY: failure mode for recovery testing 189 * UBIFS_TST_RCVRY: failure mode for recovery testing
238 */ 190 */
239enum { 191enum {
240 UBIFS_TST_FORCE_IN_THE_GAPS = 0x2,
241 UBIFS_TST_RCVRY = 0x4, 192 UBIFS_TST_RCVRY = 0x4,
242}; 193};
243 194
@@ -262,7 +213,7 @@ void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum,
262 int offs); 213 int offs);
263void dbg_dump_budget_req(const struct ubifs_budget_req *req); 214void dbg_dump_budget_req(const struct ubifs_budget_req *req);
264void dbg_dump_lstats(const struct ubifs_lp_stats *lst); 215void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
265void dbg_dump_budg(struct ubifs_info *c); 216void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi);
266void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); 217void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp);
267void dbg_dump_lprops(struct ubifs_info *c); 218void dbg_dump_lprops(struct ubifs_info *c);
268void dbg_dump_lpt_info(struct ubifs_info *c); 219void dbg_dump_lpt_info(struct ubifs_info *c);
@@ -304,18 +255,16 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head);
304int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); 255int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head);
305 256
306/* Force the use of in-the-gaps method for testing */ 257/* Force the use of in-the-gaps method for testing */
307 258static inline int dbg_force_in_the_gaps_enabled(void)
308#define dbg_force_in_the_gaps_enabled \ 259{
309 (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS) 260 return ubifs_chk_flags & UBIFS_CHK_GEN;
310 261}
311int dbg_force_in_the_gaps(void); 262int dbg_force_in_the_gaps(void);
312 263
313/* Failure mode for recovery testing */ 264/* Failure mode for recovery testing */
314
315#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) 265#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
316 266
317#ifndef UBIFS_DBG_PRESERVE_UBI 267#ifndef UBIFS_DBG_PRESERVE_UBI
318
319#define ubi_leb_read dbg_leb_read 268#define ubi_leb_read dbg_leb_read
320#define ubi_leb_write dbg_leb_write 269#define ubi_leb_write dbg_leb_write
321#define ubi_leb_change dbg_leb_change 270#define ubi_leb_change dbg_leb_change
@@ -323,7 +272,6 @@ int dbg_force_in_the_gaps(void);
323#define ubi_leb_unmap dbg_leb_unmap 272#define ubi_leb_unmap dbg_leb_unmap
324#define ubi_is_mapped dbg_is_mapped 273#define ubi_is_mapped dbg_is_mapped
325#define ubi_leb_map dbg_leb_map 274#define ubi_leb_map dbg_leb_map
326
327#endif 275#endif
328 276
329int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, 277int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
@@ -370,33 +318,33 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
370 __func__, __LINE__, current->pid); \ 318 __func__, __LINE__, current->pid); \
371} while (0) 319} while (0)
372 320
373#define dbg_err(fmt, ...) do { \ 321#define dbg_err(fmt, ...) do { \
374 if (0) \ 322 if (0) \
375 ubifs_err(fmt, ##__VA_ARGS__); \ 323 ubifs_err(fmt, ##__VA_ARGS__); \
376} while (0) 324} while (0)
377 325
378#define dbg_msg(fmt, ...) do { \ 326#define ubifs_dbg_msg(fmt, ...) do { \
379 if (0) \ 327 if (0) \
380 printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ 328 pr_debug(fmt "\n", ##__VA_ARGS__); \
381 current->pid, __func__, ##__VA_ARGS__); \
382} while (0) 329} while (0)
383 330
384#define dbg_dump_stack() 331#define dbg_dump_stack()
385#define ubifs_assert_cmt_locked(c) 332#define ubifs_assert_cmt_locked(c)
386 333
387#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 334#define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
388#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 335#define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
389#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 336#define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
390#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 337#define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
391#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 338#define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
392#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 339#define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
393#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 340#define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
394#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 341#define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
395#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 342#define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
396#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 343#define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
397#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 344#define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
398#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 345#define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
399#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 346#define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
347#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
400 348
401#define DBGKEY(key) ((char *)(key)) 349#define DBGKEY(key) ((char *)(key))
402#define DBGKEY1(key) ((char *)(key)) 350#define DBGKEY1(key) ((char *)(key))
@@ -420,7 +368,9 @@ static inline void
420dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; } 368dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; }
421static inline void 369static inline void
422dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; } 370dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; }
423static inline void dbg_dump_budg(struct ubifs_info *c) { return; } 371static inline void
372dbg_dump_budg(struct ubifs_info *c,
373 const struct ubifs_budg_info *bi) { return; }
424static inline void dbg_dump_lprop(const struct ubifs_info *c, 374static inline void dbg_dump_lprop(const struct ubifs_info *c,
425 const struct ubifs_lprops *lp) { return; } 375 const struct ubifs_lprops *lp) { return; }
426static inline void dbg_dump_lprops(struct ubifs_info *c) { return; } 376static inline void dbg_dump_lprops(struct ubifs_info *c) { return; }
@@ -482,8 +432,8 @@ dbg_check_nondata_nodes_order(struct ubifs_info *c,
482 struct list_head *head) { return 0; } 432 struct list_head *head) { return 0; }
483 433
484static inline int dbg_force_in_the_gaps(void) { return 0; } 434static inline int dbg_force_in_the_gaps(void) { return 0; }
485#define dbg_force_in_the_gaps_enabled 0 435#define dbg_force_in_the_gaps_enabled() 0
486#define dbg_failure_mode 0 436#define dbg_failure_mode 0
487 437
488static inline int dbg_debugfs_init(void) { return 0; } 438static inline int dbg_debugfs_init(void) { return 0; }
489static inline void dbg_debugfs_exit(void) { return; } 439static inline void dbg_debugfs_exit(void) { return; }
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 7217d67a80a6..ef5abd38f0bf 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -603,7 +603,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
603 ubifs_release_budget(c, &req); 603 ubifs_release_budget(c, &req);
604 else { 604 else {
605 /* We've deleted something - clean the "no space" flags */ 605 /* We've deleted something - clean the "no space" flags */
606 c->nospace = c->nospace_rp = 0; 606 c->bi.nospace = c->bi.nospace_rp = 0;
607 smp_wmb(); 607 smp_wmb();
608 } 608 }
609 return 0; 609 return 0;
@@ -693,7 +693,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
693 ubifs_release_budget(c, &req); 693 ubifs_release_budget(c, &req);
694 else { 694 else {
695 /* We've deleted something - clean the "no space" flags */ 695 /* We've deleted something - clean the "no space" flags */
696 c->nospace = c->nospace_rp = 0; 696 c->bi.nospace = c->bi.nospace_rp = 0;
697 smp_wmb(); 697 smp_wmb();
698 } 698 }
699 return 0; 699 return 0;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index b286db79c686..5e7fccfc4b29 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -212,7 +212,7 @@ static void release_new_page_budget(struct ubifs_info *c)
212 */ 212 */
213static void release_existing_page_budget(struct ubifs_info *c) 213static void release_existing_page_budget(struct ubifs_info *c)
214{ 214{
215 struct ubifs_budget_req req = { .dd_growth = c->page_budget}; 215 struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget};
216 216
217 ubifs_release_budget(c, &req); 217 ubifs_release_budget(c, &req);
218} 218}
@@ -971,11 +971,11 @@ static int do_writepage(struct page *page, int len)
971 * the page locked, and it locks @ui_mutex. However, write-back does take inode 971 * the page locked, and it locks @ui_mutex. However, write-back does take inode
972 * @i_mutex, which means other VFS operations may be run on this inode at the 972 * @i_mutex, which means other VFS operations may be run on this inode at the
973 * same time. And the problematic one is truncation to smaller size, from where 973 * same time. And the problematic one is truncation to smaller size, from where
974 * we have to call 'truncate_setsize()', which first changes @inode->i_size, then 974 * we have to call 'truncate_setsize()', which first changes @inode->i_size,
975 * drops the truncated pages. And while dropping the pages, it takes the page 975 * then drops the truncated pages. And while dropping the pages, it takes the
976 * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with 976 * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()'
977 * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This 977 * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'.
978 * means that @inode->i_size is changed while @ui_mutex is unlocked. 978 * This means that @inode->i_size is changed while @ui_mutex is unlocked.
979 * 979 *
980 * XXX(truncate): with the new truncate sequence this is not true anymore, 980 * XXX(truncate): with the new truncate sequence this is not true anymore,
981 * and the calls to truncate_setsize can be move around freely. They should 981 * and the calls to truncate_setsize can be move around freely. They should
@@ -1189,7 +1189,7 @@ out_budg:
1189 if (budgeted) 1189 if (budgeted)
1190 ubifs_release_budget(c, &req); 1190 ubifs_release_budget(c, &req);
1191 else { 1191 else {
1192 c->nospace = c->nospace_rp = 0; 1192 c->bi.nospace = c->bi.nospace_rp = 0;
1193 smp_wmb(); 1193 smp_wmb();
1194 } 1194 }
1195 return err; 1195 return err;
@@ -1312,7 +1312,11 @@ int ubifs_fsync(struct file *file, int datasync)
1312 1312
1313 dbg_gen("syncing inode %lu", inode->i_ino); 1313 dbg_gen("syncing inode %lu", inode->i_ino);
1314 1314
1315 if (inode->i_sb->s_flags & MS_RDONLY) 1315 if (c->ro_mount)
1316 /*
1317 * For some really strange reasons VFS does not filter out
1318 * 'fsync()' for R/O mounted file-systems as per 2.6.39.
1319 */
1316 return 0; 1320 return 0;
1317 1321
1318 /* 1322 /*
@@ -1432,10 +1436,11 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
1432} 1436}
1433 1437
1434/* 1438/*
1435 * mmap()d file has taken write protection fault and is being made 1439 * mmap()d file has taken write protection fault and is being made writable.
1436 * writable. UBIFS must ensure page is budgeted for. 1440 * UBIFS must ensure page is budgeted for.
1437 */ 1441 */
1438static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 1442static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
1443 struct vm_fault *vmf)
1439{ 1444{
1440 struct page *page = vmf->page; 1445 struct page *page = vmf->page;
1441 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 1446 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
@@ -1536,7 +1541,6 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1536{ 1541{
1537 int err; 1542 int err;
1538 1543
1539 /* 'generic_file_mmap()' takes care of NOMMU case */
1540 err = generic_file_mmap(file, vma); 1544 err = generic_file_mmap(file, vma);
1541 if (err) 1545 if (err)
1542 return err; 1546 return err;
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 1d54383d1269..2559d174e004 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -252,8 +252,8 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
252 * But if the index takes fewer LEBs than it is reserved for it, 252 * But if the index takes fewer LEBs than it is reserved for it,
253 * this function must avoid picking those reserved LEBs. 253 * this function must avoid picking those reserved LEBs.
254 */ 254 */
255 if (c->min_idx_lebs >= c->lst.idx_lebs) { 255 if (c->bi.min_idx_lebs >= c->lst.idx_lebs) {
256 rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; 256 rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
257 exclude_index = 1; 257 exclude_index = 1;
258 } 258 }
259 spin_unlock(&c->space_lock); 259 spin_unlock(&c->space_lock);
@@ -276,7 +276,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
276 pick_free = 0; 276 pick_free = 0;
277 } else { 277 } else {
278 spin_lock(&c->space_lock); 278 spin_lock(&c->space_lock);
279 exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs); 279 exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs);
280 spin_unlock(&c->space_lock); 280 spin_unlock(&c->space_lock);
281 } 281 }
282 282
@@ -501,8 +501,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
501 501
502 /* Check if there are enough empty LEBs for commit */ 502 /* Check if there are enough empty LEBs for commit */
503 spin_lock(&c->space_lock); 503 spin_lock(&c->space_lock);
504 if (c->min_idx_lebs > c->lst.idx_lebs) 504 if (c->bi.min_idx_lebs > c->lst.idx_lebs)
505 rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; 505 rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
506 else 506 else
507 rsvd_idx_lebs = 0; 507 rsvd_idx_lebs = 0;
508 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - 508 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 151f10882820..ded29f6224c2 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -100,6 +100,10 @@ static int switch_gc_head(struct ubifs_info *c)
100 if (err) 100 if (err)
101 return err; 101 return err;
102 102
103 err = ubifs_wbuf_sync_nolock(wbuf);
104 if (err)
105 return err;
106
103 err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); 107 err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0);
104 if (err) 108 if (err)
105 return err; 109 return err;
@@ -118,7 +122,7 @@ static int switch_gc_head(struct ubifs_info *c)
118 * This function compares data nodes @a and @b. Returns %1 if @a has greater 122 * This function compares data nodes @a and @b. Returns %1 if @a has greater
119 * inode or block number, and %-1 otherwise. 123 * inode or block number, and %-1 otherwise.
120 */ 124 */
121int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) 125static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
122{ 126{
123 ino_t inuma, inumb; 127 ino_t inuma, inumb;
124 struct ubifs_info *c = priv; 128 struct ubifs_info *c = priv;
@@ -161,7 +165,8 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
161 * first and sorted by length in descending order. Directory entry nodes go 165 * first and sorted by length in descending order. Directory entry nodes go
162 * after inode nodes and are sorted in ascending hash valuer order. 166 * after inode nodes and are sorted in ascending hash valuer order.
163 */ 167 */
164int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) 168static int nondata_nodes_cmp(void *priv, struct list_head *a,
169 struct list_head *b)
165{ 170{
166 ino_t inuma, inumb; 171 ino_t inuma, inumb;
167 struct ubifs_info *c = priv; 172 struct ubifs_info *c = priv;
@@ -473,6 +478,37 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
473 ubifs_assert(c->gc_lnum != lnum); 478 ubifs_assert(c->gc_lnum != lnum);
474 ubifs_assert(wbuf->lnum != lnum); 479 ubifs_assert(wbuf->lnum != lnum);
475 480
481 if (lp->free + lp->dirty == c->leb_size) {
482 /* Special case - a free LEB */
483 dbg_gc("LEB %d is free, return it", lp->lnum);
484 ubifs_assert(!(lp->flags & LPROPS_INDEX));
485
486 if (lp->free != c->leb_size) {
487 /*
488 * Write buffers must be sync'd before unmapping
489 * freeable LEBs, because one of them may contain data
490 * which obsoletes something in 'lp->pnum'.
491 */
492 err = gc_sync_wbufs(c);
493 if (err)
494 return err;
495 err = ubifs_change_one_lp(c, lp->lnum, c->leb_size,
496 0, 0, 0, 0);
497 if (err)
498 return err;
499 }
500 err = ubifs_leb_unmap(c, lp->lnum);
501 if (err)
502 return err;
503
504 if (c->gc_lnum == -1) {
505 c->gc_lnum = lnum;
506 return LEB_RETAINED;
507 }
508
509 return LEB_FREED;
510 }
511
476 /* 512 /*
477 * We scan the entire LEB even though we only really need to scan up to 513 * We scan the entire LEB even though we only really need to scan up to
478 * (c->leb_size - lp->free). 514 * (c->leb_size - lp->free).
@@ -682,37 +718,6 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
682 "(min. space %d)", lp.lnum, lp.free, lp.dirty, 718 "(min. space %d)", lp.lnum, lp.free, lp.dirty,
683 lp.free + lp.dirty, min_space); 719 lp.free + lp.dirty, min_space);
684 720
685 if (lp.free + lp.dirty == c->leb_size) {
686 /* An empty LEB was returned */
687 dbg_gc("LEB %d is free, return it", lp.lnum);
688 /*
689 * ubifs_find_dirty_leb() doesn't return freeable index
690 * LEBs.
691 */
692 ubifs_assert(!(lp.flags & LPROPS_INDEX));
693 if (lp.free != c->leb_size) {
694 /*
695 * Write buffers must be sync'd before
696 * unmapping freeable LEBs, because one of them
697 * may contain data which obsoletes something
698 * in 'lp.pnum'.
699 */
700 ret = gc_sync_wbufs(c);
701 if (ret)
702 goto out;
703 ret = ubifs_change_one_lp(c, lp.lnum,
704 c->leb_size, 0, 0, 0,
705 0);
706 if (ret)
707 goto out;
708 }
709 ret = ubifs_leb_unmap(c, lp.lnum);
710 if (ret)
711 goto out;
712 ret = lp.lnum;
713 break;
714 }
715
716 space_before = c->leb_size - wbuf->offs - wbuf->used; 721 space_before = c->leb_size - wbuf->offs - wbuf->used;
717 if (wbuf->lnum == -1) 722 if (wbuf->lnum == -1)
718 space_before = 0; 723 space_before = 0;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index dfd168b7807e..166951e0dcd3 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -393,7 +393,7 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
393 ubifs_assert(wbuf->size % c->min_io_size == 0); 393 ubifs_assert(wbuf->size % c->min_io_size == 0);
394 ubifs_assert(!c->ro_media && !c->ro_mount); 394 ubifs_assert(!c->ro_media && !c->ro_mount);
395 if (c->leb_size - wbuf->offs >= c->max_write_size) 395 if (c->leb_size - wbuf->offs >= c->max_write_size)
396 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); 396 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
397 397
398 if (c->ro_error) 398 if (c->ro_error)
399 return -EROFS; 399 return -EROFS;
@@ -452,8 +452,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
452 * @dtype: data type 452 * @dtype: data type
453 * 453 *
454 * This function targets the write-buffer to logical eraseblock @lnum:@offs. 454 * This function targets the write-buffer to logical eraseblock @lnum:@offs.
455 * The write-buffer is synchronized if it is not empty. Returns zero in case of 455 * The write-buffer has to be empty. Returns zero in case of success and a
456 * success and a negative error code in case of failure. 456 * negative error code in case of failure.
457 */ 457 */
458int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, 458int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
459 int dtype) 459 int dtype)
@@ -465,13 +465,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
465 ubifs_assert(offs >= 0 && offs <= c->leb_size); 465 ubifs_assert(offs >= 0 && offs <= c->leb_size);
466 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); 466 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
467 ubifs_assert(lnum != wbuf->lnum); 467 ubifs_assert(lnum != wbuf->lnum);
468 468 ubifs_assert(wbuf->used == 0);
469 if (wbuf->used > 0) {
470 int err = ubifs_wbuf_sync_nolock(wbuf);
471
472 if (err)
473 return err;
474 }
475 469
476 spin_lock(&wbuf->lock); 470 spin_lock(&wbuf->lock);
477 wbuf->lnum = lnum; 471 wbuf->lnum = lnum;
@@ -573,7 +567,7 @@ out_timers:
573int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) 567int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
574{ 568{
575 struct ubifs_info *c = wbuf->c; 569 struct ubifs_info *c = wbuf->c;
576 int err, written, n, aligned_len = ALIGN(len, 8), offs; 570 int err, written, n, aligned_len = ALIGN(len, 8);
577 571
578 dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, 572 dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len,
579 dbg_ntype(((struct ubifs_ch *)buf)->node_type), 573 dbg_ntype(((struct ubifs_ch *)buf)->node_type),
@@ -588,7 +582,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
588 ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); 582 ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
589 ubifs_assert(!c->ro_media && !c->ro_mount); 583 ubifs_assert(!c->ro_media && !c->ro_mount);
590 if (c->leb_size - wbuf->offs >= c->max_write_size) 584 if (c->leb_size - wbuf->offs >= c->max_write_size)
591 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); 585 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
592 586
593 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { 587 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
594 err = -ENOSPC; 588 err = -ENOSPC;
@@ -636,7 +630,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
636 goto exit; 630 goto exit;
637 } 631 }
638 632
639 offs = wbuf->offs;
640 written = 0; 633 written = 0;
641 634
642 if (wbuf->used) { 635 if (wbuf->used) {
@@ -653,7 +646,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
653 if (err) 646 if (err)
654 goto out; 647 goto out;
655 648
656 offs += wbuf->size; 649 wbuf->offs += wbuf->size;
657 len -= wbuf->avail; 650 len -= wbuf->avail;
658 aligned_len -= wbuf->avail; 651 aligned_len -= wbuf->avail;
659 written += wbuf->avail; 652 written += wbuf->avail;
@@ -672,7 +665,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
672 if (err) 665 if (err)
673 goto out; 666 goto out;
674 667
675 offs += wbuf->size; 668 wbuf->offs += wbuf->size;
676 len -= wbuf->size; 669 len -= wbuf->size;
677 aligned_len -= wbuf->size; 670 aligned_len -= wbuf->size;
678 written += wbuf->size; 671 written += wbuf->size;
@@ -687,12 +680,13 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
687 n = aligned_len >> c->max_write_shift; 680 n = aligned_len >> c->max_write_shift;
688 if (n) { 681 if (n) {
689 n <<= c->max_write_shift; 682 n <<= c->max_write_shift;
690 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); 683 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum,
691 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, 684 wbuf->offs);
692 wbuf->dtype); 685 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written,
686 wbuf->offs, n, wbuf->dtype);
693 if (err) 687 if (err)
694 goto out; 688 goto out;
695 offs += n; 689 wbuf->offs += n;
696 aligned_len -= n; 690 aligned_len -= n;
697 len -= n; 691 len -= n;
698 written += n; 692 written += n;
@@ -707,7 +701,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
707 */ 701 */
708 memcpy(wbuf->buf, buf + written, len); 702 memcpy(wbuf->buf, buf + written, len);
709 703
710 wbuf->offs = offs;
711 if (c->leb_size - wbuf->offs >= c->max_write_size) 704 if (c->leb_size - wbuf->offs >= c->max_write_size)
712 wbuf->size = c->max_write_size; 705 wbuf->size = c->max_write_size;
713 else 706 else
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index aed25e864227..34b1679e6e3a 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -141,14 +141,8 @@ again:
141 * LEB with some empty space. 141 * LEB with some empty space.
142 */ 142 */
143 lnum = ubifs_find_free_space(c, len, &offs, squeeze); 143 lnum = ubifs_find_free_space(c, len, &offs, squeeze);
144 if (lnum >= 0) { 144 if (lnum >= 0)
145 /* Found an LEB, add it to the journal head */
146 err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
147 if (err)
148 goto out_return;
149 /* A new bud was successfully allocated and added to the log */
150 goto out; 145 goto out;
151 }
152 146
153 err = lnum; 147 err = lnum;
154 if (err != -ENOSPC) 148 if (err != -ENOSPC)
@@ -203,12 +197,23 @@ again:
203 return 0; 197 return 0;
204 } 198 }
205 199
206 err = ubifs_add_bud_to_log(c, jhead, lnum, 0);
207 if (err)
208 goto out_return;
209 offs = 0; 200 offs = 0;
210 201
211out: 202out:
203 /*
204 * Make sure we synchronize the write-buffer before we add the new bud
205 * to the log. Otherwise we may have a power cut after the log
206 * reference node for the last bud (@lnum) is written but before the
207 * write-buffer data are written to the next-to-last bud
208 * (@wbuf->lnum). And the effect would be that the recovery would see
209 * that there is corruption in the next-to-last bud.
210 */
211 err = ubifs_wbuf_sync_nolock(wbuf);
212 if (err)
213 goto out_return;
214 err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
215 if (err)
216 goto out_return;
212 err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); 217 err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype);
213 if (err) 218 if (err)
214 goto out_unlock; 219 goto out_unlock;
@@ -380,10 +385,8 @@ out:
380 if (err == -ENOSPC) { 385 if (err == -ENOSPC) {
381 /* This are some budgeting problems, print useful information */ 386 /* This are some budgeting problems, print useful information */
382 down_write(&c->commit_sem); 387 down_write(&c->commit_sem);
383 spin_lock(&c->space_lock);
384 dbg_dump_stack(); 388 dbg_dump_stack();
385 dbg_dump_budg(c); 389 dbg_dump_budg(c, &c->bi);
386 spin_unlock(&c->space_lock);
387 dbg_dump_lprops(c); 390 dbg_dump_lprops(c);
388 cmt_retries = dbg_check_lprops(c); 391 cmt_retries = dbg_check_lprops(c);
389 up_write(&c->commit_sem); 392 up_write(&c->commit_sem);
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 40fa780ebea7..affea9494ae2 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -100,20 +100,6 @@ struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum)
100} 100}
101 101
102/** 102/**
103 * next_log_lnum - switch to the next log LEB.
104 * @c: UBIFS file-system description object
105 * @lnum: current log LEB
106 */
107static inline int next_log_lnum(const struct ubifs_info *c, int lnum)
108{
109 lnum += 1;
110 if (lnum > c->log_last)
111 lnum = UBIFS_LOG_LNUM;
112
113 return lnum;
114}
115
116/**
117 * empty_log_bytes - calculate amount of empty space in the log. 103 * empty_log_bytes - calculate amount of empty space in the log.
118 * @c: UBIFS file-system description object 104 * @c: UBIFS file-system description object
119 */ 105 */
@@ -257,7 +243,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
257 ref->jhead = cpu_to_le32(jhead); 243 ref->jhead = cpu_to_le32(jhead);
258 244
259 if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { 245 if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
260 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); 246 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
261 c->lhead_offs = 0; 247 c->lhead_offs = 0;
262 } 248 }
263 249
@@ -425,7 +411,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
425 411
426 /* Switch to the next log LEB */ 412 /* Switch to the next log LEB */
427 if (c->lhead_offs) { 413 if (c->lhead_offs) {
428 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); 414 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
429 c->lhead_offs = 0; 415 c->lhead_offs = 0;
430 } 416 }
431 417
@@ -446,7 +432,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
446 432
447 c->lhead_offs += len; 433 c->lhead_offs += len;
448 if (c->lhead_offs == c->leb_size) { 434 if (c->lhead_offs == c->leb_size) {
449 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); 435 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
450 c->lhead_offs = 0; 436 c->lhead_offs = 0;
451 } 437 }
452 438
@@ -533,7 +519,7 @@ int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
533 } 519 }
534 mutex_lock(&c->log_mutex); 520 mutex_lock(&c->log_mutex);
535 for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; 521 for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
536 lnum = next_log_lnum(c, lnum)) { 522 lnum = ubifs_next_log_lnum(c, lnum)) {
537 dbg_log("unmap log LEB %d", lnum); 523 dbg_log("unmap log LEB %d", lnum);
538 err = ubifs_leb_unmap(c, lnum); 524 err = ubifs_leb_unmap(c, lnum);
539 if (err) 525 if (err)
@@ -642,7 +628,7 @@ static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs,
642 err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); 628 err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM);
643 if (err) 629 if (err)
644 return err; 630 return err;
645 *lnum = next_log_lnum(c, *lnum); 631 *lnum = ubifs_next_log_lnum(c, *lnum);
646 *offs = 0; 632 *offs = 0;
647 } 633 }
648 memcpy(buf + *offs, node, len); 634 memcpy(buf + *offs, node, len);
@@ -712,7 +698,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
712 ubifs_scan_destroy(sleb); 698 ubifs_scan_destroy(sleb);
713 if (lnum == c->lhead_lnum) 699 if (lnum == c->lhead_lnum)
714 break; 700 break;
715 lnum = next_log_lnum(c, lnum); 701 lnum = ubifs_next_log_lnum(c, lnum);
716 } 702 }
717 if (offs) { 703 if (offs) {
718 int sz = ALIGN(offs, c->min_io_size); 704 int sz = ALIGN(offs, c->min_io_size);
@@ -732,7 +718,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
732 /* Unmap remaining LEBs */ 718 /* Unmap remaining LEBs */
733 lnum = write_lnum; 719 lnum = write_lnum;
734 do { 720 do {
735 lnum = next_log_lnum(c, lnum); 721 lnum = ubifs_next_log_lnum(c, lnum);
736 err = ubifs_leb_unmap(c, lnum); 722 err = ubifs_leb_unmap(c, lnum);
737 if (err) 723 if (err)
738 return err; 724 return err;
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 0ee0847f2421..667884f4a615 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -1007,21 +1007,11 @@ out:
1007} 1007}
1008 1008
1009/** 1009/**
1010 * struct scan_check_data - data provided to scan callback function.
1011 * @lst: LEB properties statistics
1012 * @err: error code
1013 */
1014struct scan_check_data {
1015 struct ubifs_lp_stats lst;
1016 int err;
1017};
1018
1019/**
1020 * scan_check_cb - scan callback. 1010 * scan_check_cb - scan callback.
1021 * @c: the UBIFS file-system description object 1011 * @c: the UBIFS file-system description object
1022 * @lp: LEB properties to scan 1012 * @lp: LEB properties to scan
1023 * @in_tree: whether the LEB properties are in main memory 1013 * @in_tree: whether the LEB properties are in main memory
1024 * @data: information passed to and from the caller of the scan 1014 * @lst: lprops statistics to update
1025 * 1015 *
1026 * This function returns a code that indicates whether the scan should continue 1016 * This function returns a code that indicates whether the scan should continue
1027 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree 1017 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
@@ -1030,11 +1020,10 @@ struct scan_check_data {
1030 */ 1020 */
1031static int scan_check_cb(struct ubifs_info *c, 1021static int scan_check_cb(struct ubifs_info *c,
1032 const struct ubifs_lprops *lp, int in_tree, 1022 const struct ubifs_lprops *lp, int in_tree,
1033 struct scan_check_data *data) 1023 struct ubifs_lp_stats *lst)
1034{ 1024{
1035 struct ubifs_scan_leb *sleb; 1025 struct ubifs_scan_leb *sleb;
1036 struct ubifs_scan_node *snod; 1026 struct ubifs_scan_node *snod;
1037 struct ubifs_lp_stats *lst = &data->lst;
1038 int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret; 1027 int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret;
1039 void *buf = NULL; 1028 void *buf = NULL;
1040 1029
@@ -1044,7 +1033,7 @@ static int scan_check_cb(struct ubifs_info *c,
1044 if (cat != (lp->flags & LPROPS_CAT_MASK)) { 1033 if (cat != (lp->flags & LPROPS_CAT_MASK)) {
1045 ubifs_err("bad LEB category %d expected %d", 1034 ubifs_err("bad LEB category %d expected %d",
1046 (lp->flags & LPROPS_CAT_MASK), cat); 1035 (lp->flags & LPROPS_CAT_MASK), cat);
1047 goto out; 1036 return -EINVAL;
1048 } 1037 }
1049 } 1038 }
1050 1039
@@ -1078,7 +1067,7 @@ static int scan_check_cb(struct ubifs_info *c,
1078 } 1067 }
1079 if (!found) { 1068 if (!found) {
1080 ubifs_err("bad LPT list (category %d)", cat); 1069 ubifs_err("bad LPT list (category %d)", cat);
1081 goto out; 1070 return -EINVAL;
1082 } 1071 }
1083 } 1072 }
1084 } 1073 }
@@ -1090,45 +1079,40 @@ static int scan_check_cb(struct ubifs_info *c,
1090 if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || 1079 if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) ||
1091 lp != heap->arr[lp->hpos]) { 1080 lp != heap->arr[lp->hpos]) {
1092 ubifs_err("bad LPT heap (category %d)", cat); 1081 ubifs_err("bad LPT heap (category %d)", cat);
1093 goto out; 1082 return -EINVAL;
1094 } 1083 }
1095 } 1084 }
1096 1085
1097 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); 1086 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
1098 if (!buf) { 1087 if (!buf)
1099 ubifs_err("cannot allocate memory to scan LEB %d", lnum); 1088 return -ENOMEM;
1100 goto out; 1089
1090 /*
1091 * After an unclean unmount, empty and freeable LEBs
1092 * may contain garbage - do not scan them.
1093 */
1094 if (lp->free == c->leb_size) {
1095 lst->empty_lebs += 1;
1096 lst->total_free += c->leb_size;
1097 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1098 return LPT_SCAN_CONTINUE;
1099 }
1100 if (lp->free + lp->dirty == c->leb_size &&
1101 !(lp->flags & LPROPS_INDEX)) {
1102 lst->total_free += lp->free;
1103 lst->total_dirty += lp->dirty;
1104 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1105 return LPT_SCAN_CONTINUE;
1101 } 1106 }
1102 1107
1103 sleb = ubifs_scan(c, lnum, 0, buf, 0); 1108 sleb = ubifs_scan(c, lnum, 0, buf, 0);
1104 if (IS_ERR(sleb)) { 1109 if (IS_ERR(sleb)) {
1105 /* 1110 ret = PTR_ERR(sleb);
1106 * After an unclean unmount, empty and freeable LEBs 1111 if (ret == -EUCLEAN) {
1107 * may contain garbage. 1112 dbg_dump_lprops(c);
1108 */ 1113 dbg_dump_budg(c, &c->bi);
1109 if (lp->free == c->leb_size) {
1110 ubifs_err("scan errors were in empty LEB "
1111 "- continuing checking");
1112 lst->empty_lebs += 1;
1113 lst->total_free += c->leb_size;
1114 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1115 ret = LPT_SCAN_CONTINUE;
1116 goto exit;
1117 }
1118
1119 if (lp->free + lp->dirty == c->leb_size &&
1120 !(lp->flags & LPROPS_INDEX)) {
1121 ubifs_err("scan errors were in freeable LEB "
1122 "- continuing checking");
1123 lst->total_free += lp->free;
1124 lst->total_dirty += lp->dirty;
1125 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1126 ret = LPT_SCAN_CONTINUE;
1127 goto exit;
1128 } 1114 }
1129 data->err = PTR_ERR(sleb); 1115 goto out;
1130 ret = LPT_SCAN_STOP;
1131 goto exit;
1132 } 1116 }
1133 1117
1134 is_idx = -1; 1118 is_idx = -1;
@@ -1246,10 +1230,8 @@ static int scan_check_cb(struct ubifs_info *c,
1246 } 1230 }
1247 1231
1248 ubifs_scan_destroy(sleb); 1232 ubifs_scan_destroy(sleb);
1249 ret = LPT_SCAN_CONTINUE;
1250exit:
1251 vfree(buf); 1233 vfree(buf);
1252 return ret; 1234 return LPT_SCAN_CONTINUE;
1253 1235
1254out_print: 1236out_print:
1255 ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " 1237 ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, "
@@ -1258,10 +1240,10 @@ out_print:
1258 dbg_dump_leb(c, lnum); 1240 dbg_dump_leb(c, lnum);
1259out_destroy: 1241out_destroy:
1260 ubifs_scan_destroy(sleb); 1242 ubifs_scan_destroy(sleb);
1243 ret = -EINVAL;
1261out: 1244out:
1262 vfree(buf); 1245 vfree(buf);
1263 data->err = -EINVAL; 1246 return ret;
1264 return LPT_SCAN_STOP;
1265} 1247}
1266 1248
1267/** 1249/**
@@ -1278,8 +1260,7 @@ out:
1278int dbg_check_lprops(struct ubifs_info *c) 1260int dbg_check_lprops(struct ubifs_info *c)
1279{ 1261{
1280 int i, err; 1262 int i, err;
1281 struct scan_check_data data; 1263 struct ubifs_lp_stats lst;
1282 struct ubifs_lp_stats *lst = &data.lst;
1283 1264
1284 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) 1265 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1285 return 0; 1266 return 0;
@@ -1294,29 +1275,23 @@ int dbg_check_lprops(struct ubifs_info *c)
1294 return err; 1275 return err;
1295 } 1276 }
1296 1277
1297 memset(lst, 0, sizeof(struct ubifs_lp_stats)); 1278 memset(&lst, 0, sizeof(struct ubifs_lp_stats));
1298
1299 data.err = 0;
1300 err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, 1279 err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1,
1301 (ubifs_lpt_scan_callback)scan_check_cb, 1280 (ubifs_lpt_scan_callback)scan_check_cb,
1302 &data); 1281 &lst);
1303 if (err && err != -ENOSPC) 1282 if (err && err != -ENOSPC)
1304 goto out; 1283 goto out;
1305 if (data.err) {
1306 err = data.err;
1307 goto out;
1308 }
1309 1284
1310 if (lst->empty_lebs != c->lst.empty_lebs || 1285 if (lst.empty_lebs != c->lst.empty_lebs ||
1311 lst->idx_lebs != c->lst.idx_lebs || 1286 lst.idx_lebs != c->lst.idx_lebs ||
1312 lst->total_free != c->lst.total_free || 1287 lst.total_free != c->lst.total_free ||
1313 lst->total_dirty != c->lst.total_dirty || 1288 lst.total_dirty != c->lst.total_dirty ||
1314 lst->total_used != c->lst.total_used) { 1289 lst.total_used != c->lst.total_used) {
1315 ubifs_err("bad overall accounting"); 1290 ubifs_err("bad overall accounting");
1316 ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " 1291 ubifs_err("calculated: empty_lebs %d, idx_lebs %d, "
1317 "total_free %lld, total_dirty %lld, total_used %lld", 1292 "total_free %lld, total_dirty %lld, total_used %lld",
1318 lst->empty_lebs, lst->idx_lebs, lst->total_free, 1293 lst.empty_lebs, lst.idx_lebs, lst.total_free,
1319 lst->total_dirty, lst->total_used); 1294 lst.total_dirty, lst.total_used);
1320 ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " 1295 ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, "
1321 "total_free %lld, total_dirty %lld, total_used %lld", 1296 "total_free %lld, total_dirty %lld, total_used %lld",
1322 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, 1297 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
@@ -1325,11 +1300,11 @@ int dbg_check_lprops(struct ubifs_info *c)
1325 goto out; 1300 goto out;
1326 } 1301 }
1327 1302
1328 if (lst->total_dead != c->lst.total_dead || 1303 if (lst.total_dead != c->lst.total_dead ||
1329 lst->total_dark != c->lst.total_dark) { 1304 lst.total_dark != c->lst.total_dark) {
1330 ubifs_err("bad dead/dark space accounting"); 1305 ubifs_err("bad dead/dark space accounting");
1331 ubifs_err("calculated: total_dead %lld, total_dark %lld", 1306 ubifs_err("calculated: total_dead %lld, total_dark %lld",
1332 lst->total_dead, lst->total_dark); 1307 lst.total_dead, lst.total_dark);
1333 ubifs_err("read from lprops: total_dead %lld, total_dark %lld", 1308 ubifs_err("read from lprops: total_dead %lld, total_dark %lld",
1334 c->lst.total_dead, c->lst.total_dark); 1309 c->lst.total_dead, c->lst.total_dark);
1335 err = -EINVAL; 1310 err = -EINVAL;
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 0c9c69bd983a..dfcb5748a7dc 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -29,6 +29,12 @@
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include "ubifs.h" 30#include "ubifs.h"
31 31
32#ifdef CONFIG_UBIFS_FS_DEBUG
33static int dbg_populate_lsave(struct ubifs_info *c);
34#else
35#define dbg_populate_lsave(c) 0
36#endif
37
32/** 38/**
33 * first_dirty_cnode - find first dirty cnode. 39 * first_dirty_cnode - find first dirty cnode.
34 * @c: UBIFS file-system description object 40 * @c: UBIFS file-system description object
@@ -586,7 +592,7 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c,
586 if (nnode->nbranch[iip].lnum) 592 if (nnode->nbranch[iip].lnum)
587 break; 593 break;
588 } 594 }
589 } while (iip >= UBIFS_LPT_FANOUT); 595 } while (iip >= UBIFS_LPT_FANOUT);
590 596
591 /* Go right */ 597 /* Go right */
592 nnode = ubifs_get_nnode(c, nnode, iip); 598 nnode = ubifs_get_nnode(c, nnode, iip);
@@ -815,6 +821,10 @@ static void populate_lsave(struct ubifs_info *c)
815 c->lpt_drty_flgs |= LSAVE_DIRTY; 821 c->lpt_drty_flgs |= LSAVE_DIRTY;
816 ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); 822 ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz);
817 } 823 }
824
825 if (dbg_populate_lsave(c))
826 return;
827
818 list_for_each_entry(lprops, &c->empty_list, list) { 828 list_for_each_entry(lprops, &c->empty_list, list) {
819 c->lsave[cnt++] = lprops->lnum; 829 c->lsave[cnt++] = lprops->lnum;
820 if (cnt >= c->lsave_cnt) 830 if (cnt >= c->lsave_cnt)
@@ -1994,4 +2004,47 @@ void dbg_dump_lpt_lebs(const struct ubifs_info *c)
1994 current->pid); 2004 current->pid);
1995} 2005}
1996 2006
2007/**
2008 * dbg_populate_lsave - debugging version of 'populate_lsave()'
2009 * @c: UBIFS file-system description object
2010 *
2011 * This is a debugging version for 'populate_lsave()' which populates lsave
2012 * with random LEBs instead of useful LEBs, which is good for test coverage.
2013 * Returns zero if lsave has not been populated (this debugging feature is
2014 * disabled) an non-zero if lsave has been populated.
2015 */
2016static int dbg_populate_lsave(struct ubifs_info *c)
2017{
2018 struct ubifs_lprops *lprops;
2019 struct ubifs_lpt_heap *heap;
2020 int i;
2021
2022 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
2023 return 0;
2024 if (random32() & 3)
2025 return 0;
2026
2027 for (i = 0; i < c->lsave_cnt; i++)
2028 c->lsave[i] = c->main_first;
2029
2030 list_for_each_entry(lprops, &c->empty_list, list)
2031 c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
2032 list_for_each_entry(lprops, &c->freeable_list, list)
2033 c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
2034 list_for_each_entry(lprops, &c->frdi_idx_list, list)
2035 c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
2036
2037 heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
2038 for (i = 0; i < heap->cnt; i++)
2039 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
2040 heap = &c->lpt_heap[LPROPS_DIRTY - 1];
2041 for (i = 0; i < heap->cnt; i++)
2042 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
2043 heap = &c->lpt_heap[LPROPS_FREE - 1];
2044 for (i = 0; i < heap->cnt; i++)
2045 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
2046
2047 return 1;
2048}
2049
1997#endif /* CONFIG_UBIFS_FS_DEBUG */ 2050#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index 21f47afdacff..278c2382e8c2 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -148,7 +148,7 @@ static int validate_master(const struct ubifs_info *c)
148 } 148 }
149 149
150 main_sz = (long long)c->main_lebs * c->leb_size; 150 main_sz = (long long)c->main_lebs * c->leb_size;
151 if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { 151 if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) {
152 err = 9; 152 err = 9;
153 goto out; 153 goto out;
154 } 154 }
@@ -218,7 +218,7 @@ static int validate_master(const struct ubifs_info *c)
218 } 218 }
219 219
220 if (c->lst.total_dead + c->lst.total_dark + 220 if (c->lst.total_dead + c->lst.total_dark +
221 c->lst.total_used + c->old_idx_sz > main_sz) { 221 c->lst.total_used + c->bi.old_idx_sz > main_sz) {
222 err = 21; 222 err = 21;
223 goto out; 223 goto out;
224 } 224 }
@@ -286,7 +286,7 @@ int ubifs_read_master(struct ubifs_info *c)
286 c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); 286 c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum);
287 c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); 287 c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum);
288 c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); 288 c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs);
289 c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); 289 c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size);
290 c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); 290 c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum);
291 c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); 291 c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs);
292 c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); 292 c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum);
@@ -305,7 +305,7 @@ int ubifs_read_master(struct ubifs_info *c)
305 c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); 305 c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead);
306 c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); 306 c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark);
307 307
308 c->calc_idx_sz = c->old_idx_sz; 308 c->calc_idx_sz = c->bi.old_idx_sz;
309 309
310 if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) 310 if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))
311 c->no_orphs = 1; 311 c->no_orphs = 1;
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index c3de04dc952a..0b5296a9a4c5 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -340,4 +340,21 @@ static inline void ubifs_release_lprops(struct ubifs_info *c)
340 mutex_unlock(&c->lp_mutex); 340 mutex_unlock(&c->lp_mutex);
341} 341}
342 342
343/**
344 * ubifs_next_log_lnum - switch to the next log LEB.
345 * @c: UBIFS file-system description object
346 * @lnum: current log LEB
347 *
348 * This helper function returns the log LEB number which goes next after LEB
349 * 'lnum'.
350 */
351static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum)
352{
353 lnum += 1;
354 if (lnum > c->log_last)
355 lnum = UBIFS_LOG_LNUM;
356
357 return lnum;
358}
359
343#endif /* __UBIFS_MISC_H__ */ 360#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 09df318e368f..bd644bf587a8 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -673,7 +673,8 @@ static int kill_orphans(struct ubifs_info *c)
673 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); 673 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
674 if (IS_ERR(sleb)) { 674 if (IS_ERR(sleb)) {
675 if (PTR_ERR(sleb) == -EUCLEAN) 675 if (PTR_ERR(sleb) == -EUCLEAN)
676 sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); 676 sleb = ubifs_recover_leb(c, lnum, 0,
677 c->sbuf, 0);
677 if (IS_ERR(sleb)) { 678 if (IS_ERR(sleb)) {
678 err = PTR_ERR(sleb); 679 err = PTR_ERR(sleb);
679 break; 680 break;
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 3dbad6fbd1eb..731d9e2e7b50 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -564,13 +564,16 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
564} 564}
565 565
566/** 566/**
567 * drop_incomplete_group - drop nodes from an incomplete group. 567 * drop_last_node - drop the last node or group of nodes.
568 * @sleb: scanned LEB information 568 * @sleb: scanned LEB information
569 * @offs: offset of dropped nodes is returned here 569 * @offs: offset of dropped nodes is returned here
570 * @grouped: non-zero if whole group of nodes have to be dropped
570 * 571 *
571 * This function returns %1 if nodes are dropped and %0 otherwise. 572 * This is a helper function for 'ubifs_recover_leb()' which drops the last
573 * node of the scanned LEB or the last group of nodes if @grouped is not zero.
574 * This function returns %1 if a node was dropped and %0 otherwise.
572 */ 575 */
573static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) 576static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped)
574{ 577{
575 int dropped = 0; 578 int dropped = 0;
576 579
@@ -589,6 +592,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
589 kfree(snod); 592 kfree(snod);
590 sleb->nodes_cnt -= 1; 593 sleb->nodes_cnt -= 1;
591 dropped = 1; 594 dropped = 1;
595 if (!grouped)
596 break;
592 } 597 }
593 return dropped; 598 return dropped;
594} 599}
@@ -609,8 +614,7 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
609struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 614struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
610 int offs, void *sbuf, int grouped) 615 int offs, void *sbuf, int grouped)
611{ 616{
612 int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; 617 int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
613 int empty_chkd = 0, start = offs;
614 struct ubifs_scan_leb *sleb; 618 struct ubifs_scan_leb *sleb;
615 void *buf = sbuf + offs; 619 void *buf = sbuf + offs;
616 620
@@ -620,12 +624,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
620 if (IS_ERR(sleb)) 624 if (IS_ERR(sleb))
621 return sleb; 625 return sleb;
622 626
623 if (sleb->ecc) 627 ubifs_assert(len >= 8);
624 need_clean = 1;
625
626 while (len >= 8) { 628 while (len >= 8) {
627 int ret;
628
629 dbg_scan("look at LEB %d:%d (%d bytes left)", 629 dbg_scan("look at LEB %d:%d (%d bytes left)",
630 lnum, offs, len); 630 lnum, offs, len);
631 631
@@ -635,8 +635,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
635 * Scan quietly until there is an error from which we cannot 635 * Scan quietly until there is an error from which we cannot
636 * recover 636 * recover
637 */ 637 */
638 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); 638 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
639
640 if (ret == SCANNED_A_NODE) { 639 if (ret == SCANNED_A_NODE) {
641 /* A valid node, and not a padding node */ 640 /* A valid node, and not a padding node */
642 struct ubifs_ch *ch = buf; 641 struct ubifs_ch *ch = buf;
@@ -649,70 +648,32 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
649 offs += node_len; 648 offs += node_len;
650 buf += node_len; 649 buf += node_len;
651 len -= node_len; 650 len -= node_len;
652 continue; 651 } else if (ret > 0) {
653 }
654
655 if (ret > 0) {
656 /* Padding bytes or a valid padding node */ 652 /* Padding bytes or a valid padding node */
657 offs += ret; 653 offs += ret;
658 buf += ret; 654 buf += ret;
659 len -= ret; 655 len -= ret;
660 continue; 656 } else if (ret == SCANNED_EMPTY_SPACE ||
661 } 657 ret == SCANNED_GARBAGE ||
662 658 ret == SCANNED_A_BAD_PAD_NODE ||
663 if (ret == SCANNED_EMPTY_SPACE) { 659 ret == SCANNED_A_CORRUPT_NODE) {
664 if (!is_empty(buf, len)) { 660 dbg_rcvry("found corruption - %d", ret);
665 if (!is_last_write(c, buf, offs))
666 break;
667 clean_buf(c, &buf, lnum, &offs, &len);
668 need_clean = 1;
669 }
670 empty_chkd = 1;
671 break; 661 break;
672 } 662 } else {
673 663 dbg_err("unexpected return value %d", ret);
674 if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
675 if (is_last_write(c, buf, offs)) {
676 clean_buf(c, &buf, lnum, &offs, &len);
677 need_clean = 1;
678 empty_chkd = 1;
679 break;
680 }
681
682 if (ret == SCANNED_A_CORRUPT_NODE)
683 if (no_more_nodes(c, buf, len, lnum, offs)) {
684 clean_buf(c, &buf, lnum, &offs, &len);
685 need_clean = 1;
686 empty_chkd = 1;
687 break;
688 }
689
690 if (quiet) {
691 /* Redo the last scan but noisily */
692 quiet = 0;
693 continue;
694 }
695
696 switch (ret) {
697 case SCANNED_GARBAGE:
698 dbg_err("garbage");
699 goto corrupted;
700 case SCANNED_A_CORRUPT_NODE:
701 case SCANNED_A_BAD_PAD_NODE:
702 dbg_err("bad node");
703 goto corrupted;
704 default:
705 dbg_err("unknown");
706 err = -EINVAL; 664 err = -EINVAL;
707 goto error; 665 goto error;
708 } 666 }
709 } 667 }
710 668
711 if (!empty_chkd && !is_empty(buf, len)) { 669 if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
712 if (is_last_write(c, buf, offs)) { 670 if (!is_last_write(c, buf, offs))
713 clean_buf(c, &buf, lnum, &offs, &len); 671 goto corrupted_rescan;
714 need_clean = 1; 672 } else if (ret == SCANNED_A_CORRUPT_NODE) {
715 } else { 673 if (!no_more_nodes(c, buf, len, lnum, offs))
674 goto corrupted_rescan;
675 } else if (!is_empty(buf, len)) {
676 if (!is_last_write(c, buf, offs)) {
716 int corruption = first_non_ff(buf, len); 677 int corruption = first_non_ff(buf, len);
717 678
718 /* 679 /*
@@ -728,29 +689,82 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
728 } 689 }
729 } 690 }
730 691
731 /* Drop nodes from incomplete group */ 692 min_io_unit = round_down(offs, c->min_io_size);
732 if (grouped && drop_incomplete_group(sleb, &offs)) { 693 if (grouped)
733 buf = sbuf + offs; 694 /*
734 len = c->leb_size - offs; 695 * If nodes are grouped, always drop the incomplete group at
735 clean_buf(c, &buf, lnum, &offs, &len); 696 * the end.
736 need_clean = 1; 697 */
737 } 698 drop_last_node(sleb, &offs, 1);
738 699
739 if (offs % c->min_io_size) { 700 /*
740 clean_buf(c, &buf, lnum, &offs, &len); 701 * While we are in the middle of the same min. I/O unit keep dropping
741 need_clean = 1; 702 * nodes. So basically, what we want is to make sure that the last min.
742 } 703 * I/O unit where we saw the corruption is dropped completely with all
704 * the uncorrupted node which may possibly sit there.
705 *
706 * In other words, let's name the min. I/O unit where the corruption
707 * starts B, and the previous min. I/O unit A. The below code tries to
708 * deal with a situation when half of B contains valid nodes or the end
709 * of a valid node, and the second half of B contains corrupted data or
710 * garbage. This means that UBIFS had been writing to B just before the
711 * power cut happened. I do not know how realistic is this scenario
712 * that half of the min. I/O unit had been written successfully and the
713 * other half not, but this is possible in our 'failure mode emulation'
714 * infrastructure at least.
715 *
716 * So what is the problem, why we need to drop those nodes? Whey can't
717 * we just clean-up the second half of B by putting a padding node
718 * there? We can, and this works fine with one exception which was
719 * reproduced with power cut emulation testing and happens extremely
720 * rarely. The description follows, but it is worth noting that that is
721 * only about the GC head, so we could do this trick only if the bud
722 * belongs to the GC head, but it does not seem to be worth an
723 * additional "if" statement.
724 *
725 * So, imagine the file-system is full, we run GC which is moving valid
726 * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head
727 * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X
728 * and will try to continue. Imagine that LEB X is currently the
729 * dirtiest LEB, and the amount of used space in LEB Y is exactly the
730 * same as amount of free space in LEB X.
731 *
732 * And a power cut happens when nodes are moved from LEB X to LEB Y. We
733 * are here trying to recover LEB Y which is the GC head LEB. We find
734 * the min. I/O unit B as described above. Then we clean-up LEB Y by
735 * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function
736 * fails, because it cannot find a dirty LEB which could be GC'd into
737 * LEB Y! Even LEB X does not match because the amount of valid nodes
738 * there does not fit the free space in LEB Y any more! And this is
739 * because of the padding node which we added to LEB Y. The
740 * user-visible effect of this which I once observed and analysed is
741 * that we cannot mount the file-system with -ENOSPC error.
742 *
743 * So obviously, to make sure that situation does not happen we should
744 * free min. I/O unit B in LEB Y completely and the last used min. I/O
745 * unit in LEB Y should be A. This is basically what the below code
746 * tries to do.
747 */
748 while (min_io_unit == round_down(offs, c->min_io_size) &&
749 min_io_unit != offs &&
750 drop_last_node(sleb, &offs, grouped));
751
752 buf = sbuf + offs;
753 len = c->leb_size - offs;
743 754
755 clean_buf(c, &buf, lnum, &offs, &len);
744 ubifs_end_scan(c, sleb, lnum, offs); 756 ubifs_end_scan(c, sleb, lnum, offs);
745 757
746 if (need_clean) { 758 err = fix_unclean_leb(c, sleb, start);
747 err = fix_unclean_leb(c, sleb, start); 759 if (err)
748 if (err) 760 goto error;
749 goto error;
750 }
751 761
752 return sleb; 762 return sleb;
753 763
764corrupted_rescan:
765 /* Re-scan the corrupted data with verbose messages */
766 dbg_err("corruptio %d", ret);
767 ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
754corrupted: 768corrupted:
755 ubifs_scanned_corruption(c, lnum, offs, buf); 769 ubifs_scanned_corruption(c, lnum, offs, buf);
756 err = -EUCLEAN; 770 err = -EUCLEAN;
@@ -1070,6 +1084,53 @@ int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
1070} 1084}
1071 1085
1072/** 1086/**
1087 * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit.
1088 * @c: UBIFS file-system description object
1089 *
1090 * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty
1091 * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns
1092 * zero in case of success and a negative error code in case of failure.
1093 */
1094static int grab_empty_leb(struct ubifs_info *c)
1095{
1096 int lnum, err;
1097
1098 /*
1099 * Note, it is very important to first search for an empty LEB and then
1100 * run the commit, not vice-versa. The reason is that there might be
1101 * only one empty LEB at the moment, the one which has been the
1102 * @c->gc_lnum just before the power cut happened. During the regular
1103 * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
1104 * one but GC can grab it. But at this moment this single empty LEB is
1105 * not marked as taken, so if we run commit - what happens? Right, the
1106 * commit will grab it and write the index there. Remember that the
1107 * index always expands as long as there is free space, and it only
1108 * starts consolidating when we run out of space.
1109 *
1110 * IOW, if we run commit now, we might not be able to find a free LEB
1111 * after this.
1112 */
1113 lnum = ubifs_find_free_leb_for_idx(c);
1114 if (lnum < 0) {
1115 dbg_err("could not find an empty LEB");
1116 dbg_dump_lprops(c);
1117 dbg_dump_budg(c, &c->bi);
1118 return lnum;
1119 }
1120
1121 /* Reset the index flag */
1122 err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1123 LPROPS_INDEX, 0);
1124 if (err)
1125 return err;
1126
1127 c->gc_lnum = lnum;
1128 dbg_rcvry("found empty LEB %d, run commit", lnum);
1129
1130 return ubifs_run_commit(c);
1131}
1132
1133/**
1073 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. 1134 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
1074 * @c: UBIFS file-system description object 1135 * @c: UBIFS file-system description object
1075 * 1136 *
@@ -1091,71 +1152,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1091{ 1152{
1092 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; 1153 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
1093 struct ubifs_lprops lp; 1154 struct ubifs_lprops lp;
1094 int lnum, err; 1155 int err;
1156
1157 dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
1095 1158
1096 c->gc_lnum = -1; 1159 c->gc_lnum = -1;
1097 if (wbuf->lnum == -1) { 1160 if (wbuf->lnum == -1 || wbuf->offs == c->leb_size)
1098 dbg_rcvry("no GC head LEB"); 1161 return grab_empty_leb(c);
1099 goto find_free; 1162
1100 }
1101 /*
1102 * See whether the used space in the dirtiest LEB fits in the GC head
1103 * LEB.
1104 */
1105 if (wbuf->offs == c->leb_size) {
1106 dbg_rcvry("no room in GC head LEB");
1107 goto find_free;
1108 }
1109 err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); 1163 err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
1110 if (err) { 1164 if (err) {
1111 /* 1165 if (err != -ENOSPC)
1112 * There are no dirty or empty LEBs subject to here being
1113 * enough for the index. Try to use
1114 * 'ubifs_find_free_leb_for_idx()', which will return any empty
1115 * LEBs (ignoring index requirements). If the index then
1116 * doesn't have enough LEBs the recovery commit will fail -
1117 * which is the same result anyway i.e. recovery fails. So
1118 * there is no problem ignoring index requirements and just
1119 * grabbing a free LEB since we have already established there
1120 * is not a dirty LEB we could have used instead.
1121 */
1122 if (err == -ENOSPC) {
1123 dbg_rcvry("could not find a dirty LEB");
1124 goto find_free;
1125 }
1126 return err;
1127 }
1128 ubifs_assert(!(lp.flags & LPROPS_INDEX));
1129 lnum = lp.lnum;
1130 if (lp.free + lp.dirty == c->leb_size) {
1131 /* An empty LEB was returned */
1132 if (lp.free != c->leb_size) {
1133 err = ubifs_change_one_lp(c, lnum, c->leb_size,
1134 0, 0, 0, 0);
1135 if (err)
1136 return err;
1137 }
1138 err = ubifs_leb_unmap(c, lnum);
1139 if (err)
1140 return err; 1166 return err;
1141 c->gc_lnum = lnum; 1167
1142 dbg_rcvry("allocated LEB %d for GC", lnum); 1168 dbg_rcvry("could not find a dirty LEB");
1143 /* Run the commit */ 1169 return grab_empty_leb(c);
1144 dbg_rcvry("committing");
1145 return ubifs_run_commit(c);
1146 }
1147 /*
1148 * There was no empty LEB so the used space in the dirtiest LEB must fit
1149 * in the GC head LEB.
1150 */
1151 if (lp.free + lp.dirty < wbuf->offs) {
1152 dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
1153 lnum, wbuf->lnum, wbuf->offs);
1154 err = ubifs_return_leb(c, lnum);
1155 if (err)
1156 return err;
1157 goto find_free;
1158 } 1170 }
1171
1172 ubifs_assert(!(lp.flags & LPROPS_INDEX));
1173 ubifs_assert(lp.free + lp.dirty >= wbuf->offs);
1174
1159 /* 1175 /*
1160 * We run the commit before garbage collection otherwise subsequent 1176 * We run the commit before garbage collection otherwise subsequent
1161 * mounts will see the GC and orphan deletion in a different order. 1177 * mounts will see the GC and orphan deletion in a different order.
@@ -1164,11 +1180,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1164 err = ubifs_run_commit(c); 1180 err = ubifs_run_commit(c);
1165 if (err) 1181 if (err)
1166 return err; 1182 return err;
1167 /* 1183
1168 * The data in the dirtiest LEB fits in the GC head LEB, so do the GC 1184 dbg_rcvry("GC'ing LEB %d", lp.lnum);
1169 * - use locking to keep 'ubifs_assert()' happy.
1170 */
1171 dbg_rcvry("GC'ing LEB %d", lnum);
1172 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 1185 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
1173 err = ubifs_garbage_collect_leb(c, &lp); 1186 err = ubifs_garbage_collect_leb(c, &lp);
1174 if (err >= 0) { 1187 if (err >= 0) {
@@ -1184,37 +1197,17 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1184 err = -EINVAL; 1197 err = -EINVAL;
1185 return err; 1198 return err;
1186 } 1199 }
1187 if (err != LEB_RETAINED) { 1200
1188 dbg_err("GC returned %d", err); 1201 ubifs_assert(err == LEB_RETAINED);
1202 if (err != LEB_RETAINED)
1189 return -EINVAL; 1203 return -EINVAL;
1190 } 1204
1191 err = ubifs_leb_unmap(c, c->gc_lnum); 1205 err = ubifs_leb_unmap(c, c->gc_lnum);
1192 if (err) 1206 if (err)
1193 return err; 1207 return err;
1194 dbg_rcvry("allocated LEB %d for GC", lnum);
1195 return 0;
1196 1208
1197find_free: 1209 dbg_rcvry("allocated LEB %d for GC", lp.lnum);
1198 /* 1210 return 0;
1199 * There is no GC head LEB or the free space in the GC head LEB is too
1200 * small, or there are not dirty LEBs. Allocate gc_lnum by calling
1201 * 'ubifs_find_free_leb_for_idx()' so GC is not run.
1202 */
1203 lnum = ubifs_find_free_leb_for_idx(c);
1204 if (lnum < 0) {
1205 dbg_err("could not find an empty LEB");
1206 return lnum;
1207 }
1208 /* And reset the index flag */
1209 err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1210 LPROPS_INDEX, 0);
1211 if (err)
1212 return err;
1213 c->gc_lnum = lnum;
1214 dbg_rcvry("allocated LEB %d for GC", lnum);
1215 /* Run the commit */
1216 dbg_rcvry("committing");
1217 return ubifs_run_commit(c);
1218} 1211}
1219 1212
1220/** 1213/**
@@ -1456,7 +1449,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1456 err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); 1449 err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
1457 if (err) 1450 if (err)
1458 goto out; 1451 goto out;
1459 dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", 1452 dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
1460 (unsigned long)e->inum, lnum, offs, i_size, e->d_size); 1453 (unsigned long)e->inum, lnum, offs, i_size, e->d_size);
1461 return 0; 1454 return 0;
1462 1455
@@ -1505,20 +1498,27 @@ int ubifs_recover_size(struct ubifs_info *c)
1505 e->i_size = le64_to_cpu(ino->size); 1498 e->i_size = le64_to_cpu(ino->size);
1506 } 1499 }
1507 } 1500 }
1501
1508 if (e->exists && e->i_size < e->d_size) { 1502 if (e->exists && e->i_size < e->d_size) {
1509 if (!e->inode && c->ro_mount) { 1503 if (c->ro_mount) {
1510 /* Fix the inode size and pin it in memory */ 1504 /* Fix the inode size and pin it in memory */
1511 struct inode *inode; 1505 struct inode *inode;
1506 struct ubifs_inode *ui;
1507
1508 ubifs_assert(!e->inode);
1512 1509
1513 inode = ubifs_iget(c->vfs_sb, e->inum); 1510 inode = ubifs_iget(c->vfs_sb, e->inum);
1514 if (IS_ERR(inode)) 1511 if (IS_ERR(inode))
1515 return PTR_ERR(inode); 1512 return PTR_ERR(inode);
1513
1514 ui = ubifs_inode(inode);
1516 if (inode->i_size < e->d_size) { 1515 if (inode->i_size < e->d_size) {
1517 dbg_rcvry("ino %lu size %lld -> %lld", 1516 dbg_rcvry("ino %lu size %lld -> %lld",
1518 (unsigned long)e->inum, 1517 (unsigned long)e->inum,
1519 e->d_size, inode->i_size); 1518 inode->i_size, e->d_size);
1520 inode->i_size = e->d_size; 1519 inode->i_size = e->d_size;
1521 ubifs_inode(inode)->ui_size = e->d_size; 1520 ui->ui_size = e->d_size;
1521 ui->synced_i_size = e->d_size;
1522 e->inode = inode; 1522 e->inode = inode;
1523 this = rb_next(this); 1523 this = rb_next(this);
1524 continue; 1524 continue;
@@ -1533,9 +1533,11 @@ int ubifs_recover_size(struct ubifs_info *c)
1533 iput(e->inode); 1533 iput(e->inode);
1534 } 1534 }
1535 } 1535 }
1536
1536 this = rb_next(this); 1537 this = rb_next(this);
1537 rb_erase(&e->rb, &c->size_tree); 1538 rb_erase(&e->rb, &c->size_tree);
1538 kfree(e); 1539 kfree(e);
1539 } 1540 }
1541
1540 return 0; 1542 return 0;
1541} 1543}
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index d3d6d365bfc1..6617280d1679 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -33,44 +33,32 @@
33 */ 33 */
34 34
35#include "ubifs.h" 35#include "ubifs.h"
36 36#include <linux/list_sort.h>
37/*
38 * Replay flags.
39 *
40 * REPLAY_DELETION: node was deleted
41 * REPLAY_REF: node is a reference node
42 */
43enum {
44 REPLAY_DELETION = 1,
45 REPLAY_REF = 2,
46};
47 37
48/** 38/**
49 * struct replay_entry - replay tree entry. 39 * struct replay_entry - replay list entry.
50 * @lnum: logical eraseblock number of the node 40 * @lnum: logical eraseblock number of the node
51 * @offs: node offset 41 * @offs: node offset
52 * @len: node length 42 * @len: node length
43 * @deletion: non-zero if this entry corresponds to a node deletion
53 * @sqnum: node sequence number 44 * @sqnum: node sequence number
54 * @flags: replay flags 45 * @list: links the replay list
55 * @rb: links the replay tree
56 * @key: node key 46 * @key: node key
57 * @nm: directory entry name 47 * @nm: directory entry name
58 * @old_size: truncation old size 48 * @old_size: truncation old size
59 * @new_size: truncation new size 49 * @new_size: truncation new size
60 * @free: amount of free space in a bud
61 * @dirty: amount of dirty space in a bud from padding and deletion nodes
62 * @jhead: journal head number of the bud
63 * 50 *
64 * UBIFS journal replay must compare node sequence numbers, which means it must 51 * The replay process first scans all buds and builds the replay list, then
65 * build a tree of node information to insert into the TNC. 52 * sorts the replay list in nodes sequence number order, and then inserts all
53 * the replay entries to the TNC.
66 */ 54 */
67struct replay_entry { 55struct replay_entry {
68 int lnum; 56 int lnum;
69 int offs; 57 int offs;
70 int len; 58 int len;
59 unsigned int deletion:1;
71 unsigned long long sqnum; 60 unsigned long long sqnum;
72 int flags; 61 struct list_head list;
73 struct rb_node rb;
74 union ubifs_key key; 62 union ubifs_key key;
75 union { 63 union {
76 struct qstr nm; 64 struct qstr nm;
@@ -78,11 +66,6 @@ struct replay_entry {
78 loff_t old_size; 66 loff_t old_size;
79 loff_t new_size; 67 loff_t new_size;
80 }; 68 };
81 struct {
82 int free;
83 int dirty;
84 int jhead;
85 };
86 }; 69 };
87}; 70};
88 71
@@ -90,57 +73,64 @@ struct replay_entry {
90 * struct bud_entry - entry in the list of buds to replay. 73 * struct bud_entry - entry in the list of buds to replay.
91 * @list: next bud in the list 74 * @list: next bud in the list
92 * @bud: bud description object 75 * @bud: bud description object
93 * @free: free bytes in the bud
94 * @sqnum: reference node sequence number 76 * @sqnum: reference node sequence number
77 * @free: free bytes in the bud
78 * @dirty: dirty bytes in the bud
95 */ 79 */
96struct bud_entry { 80struct bud_entry {
97 struct list_head list; 81 struct list_head list;
98 struct ubifs_bud *bud; 82 struct ubifs_bud *bud;
99 int free;
100 unsigned long long sqnum; 83 unsigned long long sqnum;
84 int free;
85 int dirty;
101}; 86};
102 87
103/** 88/**
104 * set_bud_lprops - set free and dirty space used by a bud. 89 * set_bud_lprops - set free and dirty space used by a bud.
105 * @c: UBIFS file-system description object 90 * @c: UBIFS file-system description object
106 * @r: replay entry of bud 91 * @b: bud entry which describes the bud
92 *
93 * This function makes sure the LEB properties of bud @b are set correctly
94 * after the replay. Returns zero in case of success and a negative error code
95 * in case of failure.
107 */ 96 */
108static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) 97static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
109{ 98{
110 const struct ubifs_lprops *lp; 99 const struct ubifs_lprops *lp;
111 int err = 0, dirty; 100 int err = 0, dirty;
112 101
113 ubifs_get_lprops(c); 102 ubifs_get_lprops(c);
114 103
115 lp = ubifs_lpt_lookup_dirty(c, r->lnum); 104 lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum);
116 if (IS_ERR(lp)) { 105 if (IS_ERR(lp)) {
117 err = PTR_ERR(lp); 106 err = PTR_ERR(lp);
118 goto out; 107 goto out;
119 } 108 }
120 109
121 dirty = lp->dirty; 110 dirty = lp->dirty;
122 if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 111 if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
123 /* 112 /*
124 * The LEB was added to the journal with a starting offset of 113 * The LEB was added to the journal with a starting offset of
125 * zero which means the LEB must have been empty. The LEB 114 * zero which means the LEB must have been empty. The LEB
126 * property values should be lp->free == c->leb_size and 115 * property values should be @lp->free == @c->leb_size and
127 * lp->dirty == 0, but that is not the case. The reason is that 116 * @lp->dirty == 0, but that is not the case. The reason is that
128 * the LEB was garbage collected. The garbage collector resets 117 * the LEB had been garbage collected before it became the bud,
129 * the free and dirty space without recording it anywhere except 118 * and there was not commit inbetween. The garbage collector
130 * lprops, so if there is not a commit then lprops does not have 119 * resets the free and dirty space without recording it
131 * that information next time the file system is mounted. 120 * anywhere except lprops, so if there was no commit then
121 * lprops does not have that information.
132 * 122 *
133 * We do not need to adjust free space because the scan has told 123 * We do not need to adjust free space because the scan has told
134 * us the exact value which is recorded in the replay entry as 124 * us the exact value which is recorded in the replay entry as
135 * r->free. 125 * @b->free.
136 * 126 *
137 * However we do need to subtract from the dirty space the 127 * However we do need to subtract from the dirty space the
138 * amount of space that the garbage collector reclaimed, which 128 * amount of space that the garbage collector reclaimed, which
139 * is the whole LEB minus the amount of space that was free. 129 * is the whole LEB minus the amount of space that was free.
140 */ 130 */
141 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 131 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
142 lp->free, lp->dirty); 132 lp->free, lp->dirty);
143 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 133 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
144 lp->free, lp->dirty); 134 lp->free, lp->dirty);
145 dirty -= c->leb_size - lp->free; 135 dirty -= c->leb_size - lp->free;
146 /* 136 /*
@@ -152,10 +142,10 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
152 */ 142 */
153 if (dirty != 0) 143 if (dirty != 0)
154 dbg_msg("LEB %d lp: %d free %d dirty " 144 dbg_msg("LEB %d lp: %d free %d dirty "
155 "replay: %d free %d dirty", r->lnum, lp->free, 145 "replay: %d free %d dirty", b->bud->lnum,
156 lp->dirty, r->free, r->dirty); 146 lp->free, lp->dirty, b->free, b->dirty);
157 } 147 }
158 lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, 148 lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
159 lp->flags | LPROPS_TAKEN, 0); 149 lp->flags | LPROPS_TAKEN, 0);
160 if (IS_ERR(lp)) { 150 if (IS_ERR(lp)) {
161 err = PTR_ERR(lp); 151 err = PTR_ERR(lp);
@@ -163,8 +153,9 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
163 } 153 }
164 154
165 /* Make sure the journal head points to the latest bud */ 155 /* Make sure the journal head points to the latest bud */
166 err = ubifs_wbuf_seek_nolock(&c->jheads[r->jhead].wbuf, r->lnum, 156 err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf,
167 c->leb_size - r->free, UBI_SHORTTERM); 157 b->bud->lnum, c->leb_size - b->free,
158 UBI_SHORTTERM);
168 159
169out: 160out:
170 ubifs_release_lprops(c); 161 ubifs_release_lprops(c);
@@ -172,6 +163,27 @@ out:
172} 163}
173 164
174/** 165/**
166 * set_buds_lprops - set free and dirty space for all replayed buds.
167 * @c: UBIFS file-system description object
168 *
169 * This function sets LEB properties for all replayed buds. Returns zero in
170 * case of success and a negative error code in case of failure.
171 */
172static int set_buds_lprops(struct ubifs_info *c)
173{
174 struct bud_entry *b;
175 int err;
176
177 list_for_each_entry(b, &c->replay_buds, list) {
178 err = set_bud_lprops(c, b);
179 if (err)
180 return err;
181 }
182
183 return 0;
184}
185
186/**
175 * trun_remove_range - apply a replay entry for a truncation to the TNC. 187 * trun_remove_range - apply a replay entry for a truncation to the TNC.
176 * @c: UBIFS file-system description object 188 * @c: UBIFS file-system description object
177 * @r: replay entry of truncation 189 * @r: replay entry of truncation
@@ -207,24 +219,22 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
207 */ 219 */
208static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 220static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
209{ 221{
210 int err, deletion = ((r->flags & REPLAY_DELETION) != 0); 222 int err;
211 223
212 dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, 224 dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum,
213 r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); 225 r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key));
214 226
215 /* Set c->replay_sqnum to help deal with dangling branches. */ 227 /* Set c->replay_sqnum to help deal with dangling branches. */
216 c->replay_sqnum = r->sqnum; 228 c->replay_sqnum = r->sqnum;
217 229
218 if (r->flags & REPLAY_REF) 230 if (is_hash_key(c, &r->key)) {
219 err = set_bud_lprops(c, r); 231 if (r->deletion)
220 else if (is_hash_key(c, &r->key)) {
221 if (deletion)
222 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 232 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
223 else 233 else
224 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 234 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
225 r->len, &r->nm); 235 r->len, &r->nm);
226 } else { 236 } else {
227 if (deletion) 237 if (r->deletion)
228 switch (key_type(c, &r->key)) { 238 switch (key_type(c, &r->key)) {
229 case UBIFS_INO_KEY: 239 case UBIFS_INO_KEY:
230 { 240 {
@@ -247,7 +257,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
247 return err; 257 return err;
248 258
249 if (c->need_recovery) 259 if (c->need_recovery)
250 err = ubifs_recover_size_accum(c, &r->key, deletion, 260 err = ubifs_recover_size_accum(c, &r->key, r->deletion,
251 r->new_size); 261 r->new_size);
252 } 262 }
253 263
@@ -255,68 +265,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
255} 265}
256 266
257/** 267/**
258 * destroy_replay_tree - destroy the replay. 268 * replay_entries_cmp - compare 2 replay entries.
259 * @c: UBIFS file-system description object 269 * @priv: UBIFS file-system description object
270 * @a: first replay entry
271 * @a: second replay entry
260 * 272 *
261 * Destroy the replay tree. 273 * This is a comparios function for 'list_sort()' which compares 2 replay
274 * entries @a and @b by comparing their sequence numer. Returns %1 if @a has
275 * greater sequence number and %-1 otherwise.
262 */ 276 */
263static void destroy_replay_tree(struct ubifs_info *c) 277static int replay_entries_cmp(void *priv, struct list_head *a,
278 struct list_head *b)
264{ 279{
265 struct rb_node *this = c->replay_tree.rb_node; 280 struct replay_entry *ra, *rb;
266 struct replay_entry *r; 281
267 282 cond_resched();
268 while (this) { 283 if (a == b)
269 if (this->rb_left) { 284 return 0;
270 this = this->rb_left; 285
271 continue; 286 ra = list_entry(a, struct replay_entry, list);
272 } else if (this->rb_right) { 287 rb = list_entry(b, struct replay_entry, list);
273 this = this->rb_right; 288 ubifs_assert(ra->sqnum != rb->sqnum);
274 continue; 289 if (ra->sqnum > rb->sqnum)
275 } 290 return 1;
276 r = rb_entry(this, struct replay_entry, rb); 291 return -1;
277 this = rb_parent(this);
278 if (this) {
279 if (this->rb_left == &r->rb)
280 this->rb_left = NULL;
281 else
282 this->rb_right = NULL;
283 }
284 if (is_hash_key(c, &r->key))
285 kfree(r->nm.name);
286 kfree(r);
287 }
288 c->replay_tree = RB_ROOT;
289} 292}
290 293
291/** 294/**
292 * apply_replay_tree - apply the replay tree to the TNC. 295 * apply_replay_list - apply the replay list to the TNC.
293 * @c: UBIFS file-system description object 296 * @c: UBIFS file-system description object
294 * 297 *
295 * Apply the replay tree. 298 * Apply all entries in the replay list to the TNC. Returns zero in case of
296 * Returns zero in case of success and a negative error code in case of 299 * success and a negative error code in case of failure.
297 * failure.
298 */ 300 */
299static int apply_replay_tree(struct ubifs_info *c) 301static int apply_replay_list(struct ubifs_info *c)
300{ 302{
301 struct rb_node *this = rb_first(&c->replay_tree); 303 struct replay_entry *r;
304 int err;
302 305
303 while (this) { 306 list_sort(c, &c->replay_list, &replay_entries_cmp);
304 struct replay_entry *r;
305 int err;
306 307
308 list_for_each_entry(r, &c->replay_list, list) {
307 cond_resched(); 309 cond_resched();
308 310
309 r = rb_entry(this, struct replay_entry, rb);
310 err = apply_replay_entry(c, r); 311 err = apply_replay_entry(c, r);
311 if (err) 312 if (err)
312 return err; 313 return err;
313 this = rb_next(this);
314 } 314 }
315
315 return 0; 316 return 0;
316} 317}
317 318
318/** 319/**
319 * insert_node - insert a node to the replay tree. 320 * destroy_replay_list - destroy the replay.
321 * @c: UBIFS file-system description object
322 *
323 * Destroy the replay list.
324 */
325static void destroy_replay_list(struct ubifs_info *c)
326{
327 struct replay_entry *r, *tmp;
328
329 list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
330 if (is_hash_key(c, &r->key))
331 kfree(r->nm.name);
332 list_del(&r->list);
333 kfree(r);
334 }
335}
336
337/**
338 * insert_node - insert a node to the replay list
320 * @c: UBIFS file-system description object 339 * @c: UBIFS file-system description object
321 * @lnum: node logical eraseblock number 340 * @lnum: node logical eraseblock number
322 * @offs: node offset 341 * @offs: node offset
@@ -328,39 +347,25 @@ static int apply_replay_tree(struct ubifs_info *c)
328 * @old_size: truncation old size 347 * @old_size: truncation old size
329 * @new_size: truncation new size 348 * @new_size: truncation new size
330 * 349 *
331 * This function inserts a scanned non-direntry node to the replay tree. The 350 * This function inserts a scanned non-direntry node to the replay list. The
332 * replay tree is an RB-tree containing @struct replay_entry elements which are 351 * replay list contains @struct replay_entry elements, and we sort this list in
333 * indexed by the sequence number. The replay tree is applied at the very end 352 * sequence number order before applying it. The replay list is applied at the
334 * of the replay process. Since the tree is sorted in sequence number order, 353 * very end of the replay process. Since the list is sorted in sequence number
335 * the older modifications are applied first. This function returns zero in 354 * order, the older modifications are applied first. This function returns zero
336 * case of success and a negative error code in case of failure. 355 * in case of success and a negative error code in case of failure.
337 */ 356 */
338static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 357static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
339 union ubifs_key *key, unsigned long long sqnum, 358 union ubifs_key *key, unsigned long long sqnum,
340 int deletion, int *used, loff_t old_size, 359 int deletion, int *used, loff_t old_size,
341 loff_t new_size) 360 loff_t new_size)
342{ 361{
343 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
344 struct replay_entry *r; 362 struct replay_entry *r;
345 363
364 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
365
346 if (key_inum(c, key) >= c->highest_inum) 366 if (key_inum(c, key) >= c->highest_inum)
347 c->highest_inum = key_inum(c, key); 367 c->highest_inum = key_inum(c, key);
348 368
349 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
350 while (*p) {
351 parent = *p;
352 r = rb_entry(parent, struct replay_entry, rb);
353 if (sqnum < r->sqnum) {
354 p = &(*p)->rb_left;
355 continue;
356 } else if (sqnum > r->sqnum) {
357 p = &(*p)->rb_right;
358 continue;
359 }
360 ubifs_err("duplicate sqnum in replay");
361 return -EINVAL;
362 }
363
364 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 369 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
365 if (!r) 370 if (!r)
366 return -ENOMEM; 371 return -ENOMEM;
@@ -370,19 +375,18 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
370 r->lnum = lnum; 375 r->lnum = lnum;
371 r->offs = offs; 376 r->offs = offs;
372 r->len = len; 377 r->len = len;
378 r->deletion = !!deletion;
373 r->sqnum = sqnum; 379 r->sqnum = sqnum;
374 r->flags = (deletion ? REPLAY_DELETION : 0); 380 key_copy(c, key, &r->key);
375 r->old_size = old_size; 381 r->old_size = old_size;
376 r->new_size = new_size; 382 r->new_size = new_size;
377 key_copy(c, key, &r->key);
378 383
379 rb_link_node(&r->rb, parent, p); 384 list_add_tail(&r->list, &c->replay_list);
380 rb_insert_color(&r->rb, &c->replay_tree);
381 return 0; 385 return 0;
382} 386}
383 387
384/** 388/**
385 * insert_dent - insert a directory entry node into the replay tree. 389 * insert_dent - insert a directory entry node into the replay list.
386 * @c: UBIFS file-system description object 390 * @c: UBIFS file-system description object
387 * @lnum: node logical eraseblock number 391 * @lnum: node logical eraseblock number
388 * @offs: node offset 392 * @offs: node offset
@@ -394,43 +398,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
394 * @deletion: non-zero if this is a deletion 398 * @deletion: non-zero if this is a deletion
395 * @used: number of bytes in use in a LEB 399 * @used: number of bytes in use in a LEB
396 * 400 *
397 * This function inserts a scanned directory entry node to the replay tree. 401 * This function inserts a scanned directory entry node or an extended
398 * Returns zero in case of success and a negative error code in case of 402 * attribute entry to the replay list. Returns zero in case of success and a
399 * failure. 403 * negative error code in case of failure.
400 *
401 * This function is also used for extended attribute entries because they are
402 * implemented as directory entry nodes.
403 */ 404 */
404static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 405static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
405 union ubifs_key *key, const char *name, int nlen, 406 union ubifs_key *key, const char *name, int nlen,
406 unsigned long long sqnum, int deletion, int *used) 407 unsigned long long sqnum, int deletion, int *used)
407{ 408{
408 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
409 struct replay_entry *r; 409 struct replay_entry *r;
410 char *nbuf; 410 char *nbuf;
411 411
412 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
412 if (key_inum(c, key) >= c->highest_inum) 413 if (key_inum(c, key) >= c->highest_inum)
413 c->highest_inum = key_inum(c, key); 414 c->highest_inum = key_inum(c, key);
414 415
415 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
416 while (*p) {
417 parent = *p;
418 r = rb_entry(parent, struct replay_entry, rb);
419 if (sqnum < r->sqnum) {
420 p = &(*p)->rb_left;
421 continue;
422 }
423 if (sqnum > r->sqnum) {
424 p = &(*p)->rb_right;
425 continue;
426 }
427 ubifs_err("duplicate sqnum in replay");
428 return -EINVAL;
429 }
430
431 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 416 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
432 if (!r) 417 if (!r)
433 return -ENOMEM; 418 return -ENOMEM;
419
434 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 420 nbuf = kmalloc(nlen + 1, GFP_KERNEL);
435 if (!nbuf) { 421 if (!nbuf) {
436 kfree(r); 422 kfree(r);
@@ -442,17 +428,15 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
442 r->lnum = lnum; 428 r->lnum = lnum;
443 r->offs = offs; 429 r->offs = offs;
444 r->len = len; 430 r->len = len;
431 r->deletion = !!deletion;
445 r->sqnum = sqnum; 432 r->sqnum = sqnum;
433 key_copy(c, key, &r->key);
446 r->nm.len = nlen; 434 r->nm.len = nlen;
447 memcpy(nbuf, name, nlen); 435 memcpy(nbuf, name, nlen);
448 nbuf[nlen] = '\0'; 436 nbuf[nlen] = '\0';
449 r->nm.name = nbuf; 437 r->nm.name = nbuf;
450 r->flags = (deletion ? REPLAY_DELETION : 0);
451 key_copy(c, key, &r->key);
452 438
453 ubifs_assert(!*p); 439 list_add_tail(&r->list, &c->replay_list);
454 rb_link_node(&r->rb, parent, p);
455 rb_insert_color(&r->rb, &c->replay_tree);
456 return 0; 440 return 0;
457} 441}
458 442
@@ -489,29 +473,92 @@ int ubifs_validate_entry(struct ubifs_info *c,
489} 473}
490 474
491/** 475/**
476 * is_last_bud - check if the bud is the last in the journal head.
477 * @c: UBIFS file-system description object
478 * @bud: bud description object
479 *
480 * This function checks if bud @bud is the last bud in its journal head. This
481 * information is then used by 'replay_bud()' to decide whether the bud can
482 * have corruptions or not. Indeed, only last buds can be corrupted by power
483 * cuts. Returns %1 if this is the last bud, and %0 if not.
484 */
485static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
486{
487 struct ubifs_jhead *jh = &c->jheads[bud->jhead];
488 struct ubifs_bud *next;
489 uint32_t data;
490 int err;
491
492 if (list_is_last(&bud->list, &jh->buds_list))
493 return 1;
494
495 /*
496 * The following is a quirk to make sure we work correctly with UBIFS
497 * images used with older UBIFS.
498 *
499 * Normally, the last bud will be the last in the journal head's list
500 * of bud. However, there is one exception if the UBIFS image belongs
501 * to older UBIFS. This is fairly unlikely: one would need to use old
502 * UBIFS, then have a power cut exactly at the right point, and then
503 * try to mount this image with new UBIFS.
504 *
505 * The exception is: it is possible to have 2 buds A and B, A goes
506 * before B, and B is the last, bud B is contains no data, and bud A is
507 * corrupted at the end. The reason is that in older versions when the
508 * journal code switched the next bud (from A to B), it first added a
509 * log reference node for the new bud (B), and only after this it
510 * synchronized the write-buffer of current bud (A). But later this was
511 * changed and UBIFS started to always synchronize the write-buffer of
512 * the bud (A) before writing the log reference for the new bud (B).
513 *
514 * But because older UBIFS always synchronized A's write-buffer before
515 * writing to B, we can recognize this exceptional situation but
516 * checking the contents of bud B - if it is empty, then A can be
517 * treated as the last and we can recover it.
518 *
519 * TODO: remove this piece of code in a couple of years (today it is
520 * 16.05.2011).
521 */
522 next = list_entry(bud->list.next, struct ubifs_bud, list);
523 if (!list_is_last(&next->list, &jh->buds_list))
524 return 0;
525
526 err = ubi_read(c->ubi, next->lnum, (char *)&data,
527 next->start, 4);
528 if (err)
529 return 0;
530
531 return data == 0xFFFFFFFF;
532}
533
534/**
492 * replay_bud - replay a bud logical eraseblock. 535 * replay_bud - replay a bud logical eraseblock.
493 * @c: UBIFS file-system description object 536 * @c: UBIFS file-system description object
494 * @lnum: bud logical eraseblock number to replay 537 * @b: bud entry which describes the bud
495 * @offs: bud start offset
496 * @jhead: journal head to which this bud belongs
497 * @free: amount of free space in the bud is returned here
498 * @dirty: amount of dirty space from padding and deletion nodes is returned
499 * here
500 * 538 *
501 * This function returns zero in case of success and a negative error code in 539 * This function replays bud @bud, recovers it if needed, and adds all nodes
502 * case of failure. 540 * from this bud to the replay list. Returns zero in case of success and a
541 * negative error code in case of failure.
503 */ 542 */
504static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 543static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
505 int *free, int *dirty)
506{ 544{
507 int err = 0, used = 0; 545 int is_last = is_last_bud(c, b->bud);
546 int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
508 struct ubifs_scan_leb *sleb; 547 struct ubifs_scan_leb *sleb;
509 struct ubifs_scan_node *snod; 548 struct ubifs_scan_node *snod;
510 struct ubifs_bud *bud;
511 549
512 dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); 550 dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
513 if (c->need_recovery) 551 lnum, b->bud->jhead, offs, is_last);
514 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); 552
553 if (c->need_recovery && is_last)
554 /*
555 * Recover only last LEBs in the journal heads, because power
556 * cuts may cause corruptions only in these LEBs, because only
557 * these LEBs could possibly be written to at the power cut
558 * time.
559 */
560 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf,
561 b->bud->jhead != GCHD);
515 else 562 else
516 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 563 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
517 if (IS_ERR(sleb)) 564 if (IS_ERR(sleb))
@@ -627,15 +674,13 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
627 goto out; 674 goto out;
628 } 675 }
629 676
630 bud = ubifs_search_bud(c, lnum); 677 ubifs_assert(ubifs_search_bud(c, lnum));
631 if (!bud)
632 BUG();
633
634 ubifs_assert(sleb->endpt - offs >= used); 678 ubifs_assert(sleb->endpt - offs >= used);
635 ubifs_assert(sleb->endpt % c->min_io_size == 0); 679 ubifs_assert(sleb->endpt % c->min_io_size == 0);
636 680
637 *dirty = sleb->endpt - offs - used; 681 b->dirty = sleb->endpt - offs - used;
638 *free = c->leb_size - sleb->endpt; 682 b->free = c->leb_size - sleb->endpt;
683 dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free);
639 684
640out: 685out:
641 ubifs_scan_destroy(sleb); 686 ubifs_scan_destroy(sleb);
@@ -649,58 +694,6 @@ out_dump:
649} 694}
650 695
651/** 696/**
652 * insert_ref_node - insert a reference node to the replay tree.
653 * @c: UBIFS file-system description object
654 * @lnum: node logical eraseblock number
655 * @offs: node offset
656 * @sqnum: sequence number
657 * @free: amount of free space in bud
658 * @dirty: amount of dirty space from padding and deletion nodes
659 * @jhead: journal head number for the bud
660 *
661 * This function inserts a reference node to the replay tree and returns zero
662 * in case of success or a negative error code in case of failure.
663 */
664static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
665 unsigned long long sqnum, int free, int dirty,
666 int jhead)
667{
668 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
669 struct replay_entry *r;
670
671 dbg_mnt("add ref LEB %d:%d", lnum, offs);
672 while (*p) {
673 parent = *p;
674 r = rb_entry(parent, struct replay_entry, rb);
675 if (sqnum < r->sqnum) {
676 p = &(*p)->rb_left;
677 continue;
678 } else if (sqnum > r->sqnum) {
679 p = &(*p)->rb_right;
680 continue;
681 }
682 ubifs_err("duplicate sqnum in replay tree");
683 return -EINVAL;
684 }
685
686 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
687 if (!r)
688 return -ENOMEM;
689
690 r->lnum = lnum;
691 r->offs = offs;
692 r->sqnum = sqnum;
693 r->flags = REPLAY_REF;
694 r->free = free;
695 r->dirty = dirty;
696 r->jhead = jhead;
697
698 rb_link_node(&r->rb, parent, p);
699 rb_insert_color(&r->rb, &c->replay_tree);
700 return 0;
701}
702
703/**
704 * replay_buds - replay all buds. 697 * replay_buds - replay all buds.
705 * @c: UBIFS file-system description object 698 * @c: UBIFS file-system description object
706 * 699 *
@@ -710,17 +703,16 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
710static int replay_buds(struct ubifs_info *c) 703static int replay_buds(struct ubifs_info *c)
711{ 704{
712 struct bud_entry *b; 705 struct bud_entry *b;
713 int err, uninitialized_var(free), uninitialized_var(dirty); 706 int err;
707 unsigned long long prev_sqnum = 0;
714 708
715 list_for_each_entry(b, &c->replay_buds, list) { 709 list_for_each_entry(b, &c->replay_buds, list) {
716 err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, 710 err = replay_bud(c, b);
717 &free, &dirty);
718 if (err)
719 return err;
720 err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
721 free, dirty, b->bud->jhead);
722 if (err) 711 if (err)
723 return err; 712 return err;
713
714 ubifs_assert(b->sqnum > prev_sqnum);
715 prev_sqnum = b->sqnum;
724 } 716 }
725 717
726 return 0; 718 return 0;
@@ -1060,25 +1052,29 @@ int ubifs_replay_journal(struct ubifs_info *c)
1060 if (err) 1052 if (err)
1061 goto out; 1053 goto out;
1062 1054
1063 err = apply_replay_tree(c); 1055 err = apply_replay_list(c);
1056 if (err)
1057 goto out;
1058
1059 err = set_buds_lprops(c);
1064 if (err) 1060 if (err)
1065 goto out; 1061 goto out;
1066 1062
1067 /* 1063 /*
1068 * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable 1064 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
1069 * to roughly estimate index growth. Things like @c->min_idx_lebs 1065 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
1070 * depend on it. This means we have to initialize it to make sure 1066 * depend on it. This means we have to initialize it to make sure
1071 * budgeting works properly. 1067 * budgeting works properly.
1072 */ 1068 */
1073 c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); 1069 c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
1074 c->budg_uncommitted_idx *= c->max_idx_node_sz; 1070 c->bi.uncommitted_idx *= c->max_idx_node_sz;
1075 1071
1076 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1072 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1077 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " 1073 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
1078 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1074 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
1079 (unsigned long)c->highest_inum); 1075 (unsigned long)c->highest_inum);
1080out: 1076out:
1081 destroy_replay_tree(c); 1077 destroy_replay_list(c);
1082 destroy_bud_list(c); 1078 destroy_bud_list(c);
1083 c->replaying = 0; 1079 c->replaying = 0;
1084 return err; 1080 return err;
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index bf31b4729e51..c606f010e8df 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -475,7 +475,8 @@ failed:
475 * @c: UBIFS file-system description object 475 * @c: UBIFS file-system description object
476 * 476 *
477 * This function returns a pointer to the superblock node or a negative error 477 * This function returns a pointer to the superblock node or a negative error
478 * code. 478 * code. Note, the user of this function is responsible of kfree()'ing the
479 * returned superblock buffer.
479 */ 480 */
480struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) 481struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
481{ 482{
@@ -616,6 +617,7 @@ int ubifs_read_superblock(struct ubifs_info *c)
616 c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); 617 c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);
617 memcpy(&c->uuid, &sup->uuid, 16); 618 memcpy(&c->uuid, &sup->uuid, 16);
618 c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); 619 c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);
620 c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP);
619 621
620 /* Automatically increase file system size to the maximum size */ 622 /* Automatically increase file system size to the maximum size */
621 c->old_leb_cnt = c->leb_cnt; 623 c->old_leb_cnt = c->leb_cnt;
@@ -650,3 +652,152 @@ out:
650 kfree(sup); 652 kfree(sup);
651 return err; 653 return err;
652} 654}
655
656/**
657 * fixup_leb - fixup/unmap an LEB containing free space.
658 * @c: UBIFS file-system description object
659 * @lnum: the LEB number to fix up
660 * @len: number of used bytes in LEB (starting at offset 0)
661 *
662 * This function reads the contents of the given LEB number @lnum, then fixes
663 * it up, so that empty min. I/O units in the end of LEB are actually erased on
664 * flash (rather than being just all-0xff real data). If the LEB is completely
665 * empty, it is simply unmapped.
666 */
667static int fixup_leb(struct ubifs_info *c, int lnum, int len)
668{
669 int err;
670
671 ubifs_assert(len >= 0);
672 ubifs_assert(len % c->min_io_size == 0);
673 ubifs_assert(len < c->leb_size);
674
675 if (len == 0) {
676 dbg_mnt("unmap empty LEB %d", lnum);
677 return ubi_leb_unmap(c->ubi, lnum);
678 }
679
680 dbg_mnt("fixup LEB %d, data len %d", lnum, len);
681 err = ubi_read(c->ubi, lnum, c->sbuf, 0, len);
682 if (err)
683 return err;
684
685 return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
686}
687
688/**
689 * fixup_free_space - find & remap all LEBs containing free space.
690 * @c: UBIFS file-system description object
691 *
692 * This function walks through all LEBs in the filesystem and fiexes up those
693 * containing free/empty space.
694 */
695static int fixup_free_space(struct ubifs_info *c)
696{
697 int lnum, err = 0;
698 struct ubifs_lprops *lprops;
699
700 ubifs_get_lprops(c);
701
702 /* Fixup LEBs in the master area */
703 for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) {
704 err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz);
705 if (err)
706 goto out;
707 }
708
709 /* Unmap unused log LEBs */
710 lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
711 while (lnum != c->ltail_lnum) {
712 err = fixup_leb(c, lnum, 0);
713 if (err)
714 goto out;
715 lnum = ubifs_next_log_lnum(c, lnum);
716 }
717
718 /* Fixup the current log head */
719 err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
720 if (err)
721 goto out;
722
723 /* Fixup LEBs in the LPT area */
724 for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
725 int free = c->ltab[lnum - c->lpt_first].free;
726
727 if (free > 0) {
728 err = fixup_leb(c, lnum, c->leb_size - free);
729 if (err)
730 goto out;
731 }
732 }
733
734 /* Unmap LEBs in the orphans area */
735 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
736 err = fixup_leb(c, lnum, 0);
737 if (err)
738 goto out;
739 }
740
741 /* Fixup LEBs in the main area */
742 for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
743 lprops = ubifs_lpt_lookup(c, lnum);
744 if (IS_ERR(lprops)) {
745 err = PTR_ERR(lprops);
746 goto out;
747 }
748
749 if (lprops->free > 0) {
750 err = fixup_leb(c, lnum, c->leb_size - lprops->free);
751 if (err)
752 goto out;
753 }
754 }
755
756out:
757 ubifs_release_lprops(c);
758 return err;
759}
760
761/**
762 * ubifs_fixup_free_space - find & fix all LEBs with free space.
763 * @c: UBIFS file-system description object
764 *
765 * This function fixes up LEBs containing free space on first mount, if the
766 * appropriate flag was set when the FS was created. Each LEB with one or more
767 * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure
768 * the free space is actually erased. E.g., this is necessary for some NAND
769 * chips, since the free space may have been programmed like real "0xff" data
770 * (generating a non-0xff ECC), causing future writes to the not-really-erased
771 * NAND pages to behave badly. After the space is fixed up, the superblock flag
772 * is cleared, so that this is skipped for all future mounts.
773 */
774int ubifs_fixup_free_space(struct ubifs_info *c)
775{
776 int err;
777 struct ubifs_sb_node *sup;
778
779 ubifs_assert(c->space_fixup);
780 ubifs_assert(!c->ro_mount);
781
782 ubifs_msg("start fixing up free space");
783
784 err = fixup_free_space(c);
785 if (err)
786 return err;
787
788 sup = ubifs_read_sb_node(c);
789 if (IS_ERR(sup))
790 return PTR_ERR(sup);
791
792 /* Free-space fixup is no longer required */
793 c->space_fixup = 0;
794 sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP);
795
796 err = ubifs_write_sb_node(c, sup);
797 kfree(sup);
798 if (err)
799 return err;
800
801 ubifs_msg("free space fixup complete");
802 return err;
803}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 04ad07f4fcc3..6db0bdaa9f74 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -375,7 +375,7 @@ out:
375 ubifs_release_dirty_inode_budget(c, ui); 375 ubifs_release_dirty_inode_budget(c, ui);
376 else { 376 else {
377 /* We've deleted something - clean the "no space" flags */ 377 /* We've deleted something - clean the "no space" flags */
378 c->nospace = c->nospace_rp = 0; 378 c->bi.nospace = c->bi.nospace_rp = 0;
379 smp_wmb(); 379 smp_wmb();
380 } 380 }
381done: 381done:
@@ -694,11 +694,11 @@ static int init_constants_sb(struct ubifs_info *c)
694 * be compressed and direntries are of the maximum size. 694 * be compressed and direntries are of the maximum size.
695 * 695 *
696 * Note, data, which may be stored in inodes is budgeted separately, so 696 * Note, data, which may be stored in inodes is budgeted separately, so
697 * it is not included into 'c->inode_budget'. 697 * it is not included into 'c->bi.inode_budget'.
698 */ 698 */
699 c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; 699 c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
700 c->inode_budget = UBIFS_INO_NODE_SZ; 700 c->bi.inode_budget = UBIFS_INO_NODE_SZ;
701 c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; 701 c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ;
702 702
703 /* 703 /*
704 * When the amount of flash space used by buds becomes 704 * When the amount of flash space used by buds becomes
@@ -742,7 +742,7 @@ static void init_constants_master(struct ubifs_info *c)
742{ 742{
743 long long tmp64; 743 long long tmp64;
744 744
745 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 745 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
746 c->report_rp_size = ubifs_reported_space(c, c->rp_size); 746 c->report_rp_size = ubifs_reported_space(c, c->rp_size);
747 747
748 /* 748 /*
@@ -1144,8 +1144,8 @@ static int check_free_space(struct ubifs_info *c)
1144{ 1144{
1145 ubifs_assert(c->dark_wm > 0); 1145 ubifs_assert(c->dark_wm > 0);
1146 if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { 1146 if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) {
1147 ubifs_err("insufficient free space to mount in read/write mode"); 1147 ubifs_err("insufficient free space to mount in R/W mode");
1148 dbg_dump_budg(c); 1148 dbg_dump_budg(c, &c->bi);
1149 dbg_dump_lprops(c); 1149 dbg_dump_lprops(c);
1150 return -ENOSPC; 1150 return -ENOSPC;
1151 } 1151 }
@@ -1304,7 +1304,7 @@ static int mount_ubifs(struct ubifs_info *c)
1304 if (err) 1304 if (err)
1305 goto out_lpt; 1305 goto out_lpt;
1306 1306
1307 err = dbg_check_idx_size(c, c->old_idx_sz); 1307 err = dbg_check_idx_size(c, c->bi.old_idx_sz);
1308 if (err) 1308 if (err)
1309 goto out_lpt; 1309 goto out_lpt;
1310 1310
@@ -1313,7 +1313,7 @@ static int mount_ubifs(struct ubifs_info *c)
1313 goto out_journal; 1313 goto out_journal;
1314 1314
1315 /* Calculate 'min_idx_lebs' after journal replay */ 1315 /* Calculate 'min_idx_lebs' after journal replay */
1316 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 1316 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
1317 1317
1318 err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount); 1318 err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount);
1319 if (err) 1319 if (err)
@@ -1396,6 +1396,12 @@ static int mount_ubifs(struct ubifs_info *c)
1396 } else 1396 } else
1397 ubifs_assert(c->lst.taken_empty_lebs > 0); 1397 ubifs_assert(c->lst.taken_empty_lebs > 0);
1398 1398
1399 if (!c->ro_mount && c->space_fixup) {
1400 err = ubifs_fixup_free_space(c);
1401 if (err)
1402 goto out_infos;
1403 }
1404
1399 err = dbg_check_filesystem(c); 1405 err = dbg_check_filesystem(c);
1400 if (err) 1406 if (err)
1401 goto out_infos; 1407 goto out_infos;
@@ -1442,7 +1448,8 @@ static int mount_ubifs(struct ubifs_info *c)
1442 c->main_lebs, c->main_first, c->leb_cnt - 1); 1448 c->main_lebs, c->main_first, c->leb_cnt - 1);
1443 dbg_msg("index LEBs: %d", c->lst.idx_lebs); 1449 dbg_msg("index LEBs: %d", c->lst.idx_lebs);
1444 dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", 1450 dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)",
1445 c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); 1451 c->bi.old_idx_sz, c->bi.old_idx_sz >> 10,
1452 c->bi.old_idx_sz >> 20);
1446 dbg_msg("key hash type: %d", c->key_hash_type); 1453 dbg_msg("key hash type: %d", c->key_hash_type);
1447 dbg_msg("tree fanout: %d", c->fanout); 1454 dbg_msg("tree fanout: %d", c->fanout);
1448 dbg_msg("reserved GC LEB: %d", c->gc_lnum); 1455 dbg_msg("reserved GC LEB: %d", c->gc_lnum);
@@ -1456,7 +1463,7 @@ static int mount_ubifs(struct ubifs_info *c)
1456 dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", 1463 dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu",
1457 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); 1464 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
1458 dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", 1465 dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d",
1459 UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, 1466 UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
1460 UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); 1467 UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout));
1461 dbg_msg("dead watermark: %d", c->dead_wm); 1468 dbg_msg("dead watermark: %d", c->dead_wm);
1462 dbg_msg("dark watermark: %d", c->dark_wm); 1469 dbg_msg("dark watermark: %d", c->dark_wm);
@@ -1584,6 +1591,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1584 } 1591 }
1585 sup->leb_cnt = cpu_to_le32(c->leb_cnt); 1592 sup->leb_cnt = cpu_to_le32(c->leb_cnt);
1586 err = ubifs_write_sb_node(c, sup); 1593 err = ubifs_write_sb_node(c, sup);
1594 kfree(sup);
1587 if (err) 1595 if (err)
1588 goto out; 1596 goto out;
1589 } 1597 }
@@ -1684,6 +1692,13 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1684 */ 1692 */
1685 err = dbg_check_space_info(c); 1693 err = dbg_check_space_info(c);
1686 } 1694 }
1695
1696 if (c->space_fixup) {
1697 err = ubifs_fixup_free_space(c);
1698 if (err)
1699 goto out;
1700 }
1701
1687 mutex_unlock(&c->umount_mutex); 1702 mutex_unlock(&c->umount_mutex);
1688 return err; 1703 return err;
1689 1704
@@ -1766,10 +1781,9 @@ static void ubifs_put_super(struct super_block *sb)
1766 * to write them back because of I/O errors. 1781 * to write them back because of I/O errors.
1767 */ 1782 */
1768 if (!c->ro_error) { 1783 if (!c->ro_error) {
1769 ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); 1784 ubifs_assert(c->bi.idx_growth == 0);
1770 ubifs_assert(c->budg_idx_growth == 0); 1785 ubifs_assert(c->bi.dd_growth == 0);
1771 ubifs_assert(c->budg_dd_growth == 0); 1786 ubifs_assert(c->bi.data_growth == 0);
1772 ubifs_assert(c->budg_data_growth == 0);
1773 } 1787 }
1774 1788
1775 /* 1789 /*
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index de485979ca39..8119b1fd8d94 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -2557,11 +2557,11 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
2557 if (err) { 2557 if (err) {
2558 /* Ensure the znode is dirtied */ 2558 /* Ensure the znode is dirtied */
2559 if (znode->cnext || !ubifs_zn_dirty(znode)) { 2559 if (znode->cnext || !ubifs_zn_dirty(znode)) {
2560 znode = dirty_cow_bottom_up(c, znode); 2560 znode = dirty_cow_bottom_up(c, znode);
2561 if (IS_ERR(znode)) { 2561 if (IS_ERR(znode)) {
2562 err = PTR_ERR(znode); 2562 err = PTR_ERR(znode);
2563 goto out_unlock; 2563 goto out_unlock;
2564 } 2564 }
2565 } 2565 }
2566 err = tnc_delete(c, znode, n); 2566 err = tnc_delete(c, znode, n);
2567 } 2567 }
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 53288e5d604e..41920f357bbf 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -377,15 +377,13 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
377 c->gap_lebs = NULL; 377 c->gap_lebs = NULL;
378 return err; 378 return err;
379 } 379 }
380 if (!dbg_force_in_the_gaps_enabled) { 380 if (dbg_force_in_the_gaps_enabled()) {
381 /* 381 /*
382 * Do not print scary warnings if the debugging 382 * Do not print scary warnings if the debugging
383 * option which forces in-the-gaps is enabled. 383 * option which forces in-the-gaps is enabled.
384 */ 384 */
385 ubifs_err("out of space"); 385 ubifs_warn("out of space");
386 spin_lock(&c->space_lock); 386 dbg_dump_budg(c, &c->bi);
387 dbg_dump_budg(c);
388 spin_unlock(&c->space_lock);
389 dbg_dump_lprops(c); 387 dbg_dump_lprops(c);
390 } 388 }
391 /* Try to commit anyway */ 389 /* Try to commit anyway */
@@ -796,16 +794,16 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
796 spin_lock(&c->space_lock); 794 spin_lock(&c->space_lock);
797 /* 795 /*
798 * Although we have not finished committing yet, update size of the 796 * Although we have not finished committing yet, update size of the
799 * committed index ('c->old_idx_sz') and zero out the index growth 797 * committed index ('c->bi.old_idx_sz') and zero out the index growth
800 * budget. It is OK to do this now, because we've reserved all the 798 * budget. It is OK to do this now, because we've reserved all the
801 * space which is needed to commit the index, and it is save for the 799 * space which is needed to commit the index, and it is save for the
802 * budgeting subsystem to assume the index is already committed, 800 * budgeting subsystem to assume the index is already committed,
803 * even though it is not. 801 * even though it is not.
804 */ 802 */
805 ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); 803 ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
806 c->old_idx_sz = c->calc_idx_sz; 804 c->bi.old_idx_sz = c->calc_idx_sz;
807 c->budg_uncommitted_idx = 0; 805 c->bi.uncommitted_idx = 0;
808 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 806 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
809 spin_unlock(&c->space_lock); 807 spin_unlock(&c->space_lock);
810 mutex_unlock(&c->tnc_mutex); 808 mutex_unlock(&c->tnc_mutex);
811 809
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index 191ca7863fe7..e24380cf46ed 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -408,9 +408,11 @@ enum {
408 * Superblock flags. 408 * Superblock flags.
409 * 409 *
410 * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set 410 * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set
411 * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed
411 */ 412 */
412enum { 413enum {
413 UBIFS_FLG_BIGLPT = 0x02, 414 UBIFS_FLG_BIGLPT = 0x02,
415 UBIFS_FLG_SPACE_FIXUP = 0x04,
414}; 416};
415 417
416/** 418/**
@@ -434,7 +436,7 @@ struct ubifs_ch {
434 __u8 node_type; 436 __u8 node_type;
435 __u8 group_type; 437 __u8 group_type;
436 __u8 padding[2]; 438 __u8 padding[2];
437} __attribute__ ((packed)); 439} __packed;
438 440
439/** 441/**
440 * union ubifs_dev_desc - device node descriptor. 442 * union ubifs_dev_desc - device node descriptor.
@@ -448,7 +450,7 @@ struct ubifs_ch {
448union ubifs_dev_desc { 450union ubifs_dev_desc {
449 __le32 new; 451 __le32 new;
450 __le64 huge; 452 __le64 huge;
451} __attribute__ ((packed)); 453} __packed;
452 454
453/** 455/**
454 * struct ubifs_ino_node - inode node. 456 * struct ubifs_ino_node - inode node.
@@ -509,7 +511,7 @@ struct ubifs_ino_node {
509 __le16 compr_type; 511 __le16 compr_type;
510 __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ 512 __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */
511 __u8 data[]; 513 __u8 data[];
512} __attribute__ ((packed)); 514} __packed;
513 515
514/** 516/**
515 * struct ubifs_dent_node - directory entry node. 517 * struct ubifs_dent_node - directory entry node.
@@ -534,7 +536,7 @@ struct ubifs_dent_node {
534 __le16 nlen; 536 __le16 nlen;
535 __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ 537 __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */
536 __u8 name[]; 538 __u8 name[];
537} __attribute__ ((packed)); 539} __packed;
538 540
539/** 541/**
540 * struct ubifs_data_node - data node. 542 * struct ubifs_data_node - data node.
@@ -555,7 +557,7 @@ struct ubifs_data_node {
555 __le16 compr_type; 557 __le16 compr_type;
556 __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ 558 __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */
557 __u8 data[]; 559 __u8 data[];
558} __attribute__ ((packed)); 560} __packed;
559 561
560/** 562/**
561 * struct ubifs_trun_node - truncation node. 563 * struct ubifs_trun_node - truncation node.
@@ -575,7 +577,7 @@ struct ubifs_trun_node {
575 __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ 577 __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */
576 __le64 old_size; 578 __le64 old_size;
577 __le64 new_size; 579 __le64 new_size;
578} __attribute__ ((packed)); 580} __packed;
579 581
580/** 582/**
581 * struct ubifs_pad_node - padding node. 583 * struct ubifs_pad_node - padding node.
@@ -586,7 +588,7 @@ struct ubifs_trun_node {
586struct ubifs_pad_node { 588struct ubifs_pad_node {
587 struct ubifs_ch ch; 589 struct ubifs_ch ch;
588 __le32 pad_len; 590 __le32 pad_len;
589} __attribute__ ((packed)); 591} __packed;
590 592
591/** 593/**
592 * struct ubifs_sb_node - superblock node. 594 * struct ubifs_sb_node - superblock node.
@@ -644,7 +646,7 @@ struct ubifs_sb_node {
644 __u8 uuid[16]; 646 __u8 uuid[16];
645 __le32 ro_compat_version; 647 __le32 ro_compat_version;
646 __u8 padding2[3968]; 648 __u8 padding2[3968];
647} __attribute__ ((packed)); 649} __packed;
648 650
649/** 651/**
650 * struct ubifs_mst_node - master node. 652 * struct ubifs_mst_node - master node.
@@ -711,7 +713,7 @@ struct ubifs_mst_node {
711 __le32 idx_lebs; 713 __le32 idx_lebs;
712 __le32 leb_cnt; 714 __le32 leb_cnt;
713 __u8 padding[344]; 715 __u8 padding[344];
714} __attribute__ ((packed)); 716} __packed;
715 717
716/** 718/**
717 * struct ubifs_ref_node - logical eraseblock reference node. 719 * struct ubifs_ref_node - logical eraseblock reference node.
@@ -727,7 +729,7 @@ struct ubifs_ref_node {
727 __le32 offs; 729 __le32 offs;
728 __le32 jhead; 730 __le32 jhead;
729 __u8 padding[28]; 731 __u8 padding[28];
730} __attribute__ ((packed)); 732} __packed;
731 733
732/** 734/**
733 * struct ubifs_branch - key/reference/length branch 735 * struct ubifs_branch - key/reference/length branch
@@ -741,7 +743,7 @@ struct ubifs_branch {
741 __le32 offs; 743 __le32 offs;
742 __le32 len; 744 __le32 len;
743 __u8 key[]; 745 __u8 key[];
744} __attribute__ ((packed)); 746} __packed;
745 747
746/** 748/**
747 * struct ubifs_idx_node - indexing node. 749 * struct ubifs_idx_node - indexing node.
@@ -755,7 +757,7 @@ struct ubifs_idx_node {
755 __le16 child_cnt; 757 __le16 child_cnt;
756 __le16 level; 758 __le16 level;
757 __u8 branches[]; 759 __u8 branches[];
758} __attribute__ ((packed)); 760} __packed;
759 761
760/** 762/**
761 * struct ubifs_cs_node - commit start node. 763 * struct ubifs_cs_node - commit start node.
@@ -765,7 +767,7 @@ struct ubifs_idx_node {
765struct ubifs_cs_node { 767struct ubifs_cs_node {
766 struct ubifs_ch ch; 768 struct ubifs_ch ch;
767 __le64 cmt_no; 769 __le64 cmt_no;
768} __attribute__ ((packed)); 770} __packed;
769 771
770/** 772/**
771 * struct ubifs_orph_node - orphan node. 773 * struct ubifs_orph_node - orphan node.
@@ -777,6 +779,6 @@ struct ubifs_orph_node {
777 struct ubifs_ch ch; 779 struct ubifs_ch ch;
778 __le64 cmt_no; 780 __le64 cmt_no;
779 __le64 inos[]; 781 __le64 inos[];
780} __attribute__ ((packed)); 782} __packed;
781 783
782#endif /* __UBIFS_MEDIA_H__ */ 784#endif /* __UBIFS_MEDIA_H__ */
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 8c40ad3c6721..93d1412a06f0 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -389,9 +389,9 @@ struct ubifs_gced_idx_leb {
389 * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses 389 * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
390 * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot 390 * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
391 * make sure @inode->i_size is always changed under @ui_mutex, because it 391 * make sure @inode->i_size is always changed under @ui_mutex, because it
392 * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock 392 * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would
393 * with 'ubifs_writepage()' (see file.c). All the other inode fields are 393 * deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields
394 * changed under @ui_mutex, so they do not need "shadow" fields. Note, one 394 * are changed under @ui_mutex, so they do not need "shadow" fields. Note, one
395 * could consider to rework locking and base it on "shadow" fields. 395 * could consider to rework locking and base it on "shadow" fields.
396 */ 396 */
397struct ubifs_inode { 397struct ubifs_inode {
@@ -937,6 +937,40 @@ struct ubifs_mount_opts {
937 unsigned int compr_type:2; 937 unsigned int compr_type:2;
938}; 938};
939 939
940/**
941 * struct ubifs_budg_info - UBIFS budgeting information.
942 * @idx_growth: amount of bytes budgeted for index growth
943 * @data_growth: amount of bytes budgeted for cached data
944 * @dd_growth: amount of bytes budgeted for cached data that will make
945 * other data dirty
946 * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but
947 * which still have to be taken into account because the index
948 * has not been committed so far
949 * @old_idx_sz: size of index on flash
950 * @min_idx_lebs: minimum number of LEBs required for the index
951 * @nospace: non-zero if the file-system does not have flash space (used as
952 * optimization)
953 * @nospace_rp: the same as @nospace, but additionally means that even reserved
954 * pool is full
955 * @page_budget: budget for a page (constant, nenver changed after mount)
956 * @inode_budget: budget for an inode (constant, nenver changed after mount)
957 * @dent_budget: budget for a directory entry (constant, nenver changed after
958 * mount)
959 */
960struct ubifs_budg_info {
961 long long idx_growth;
962 long long data_growth;
963 long long dd_growth;
964 long long uncommitted_idx;
965 unsigned long long old_idx_sz;
966 int min_idx_lebs;
967 unsigned int nospace:1;
968 unsigned int nospace_rp:1;
969 int page_budget;
970 int inode_budget;
971 int dent_budget;
972};
973
940struct ubifs_debug_info; 974struct ubifs_debug_info;
941 975
942/** 976/**
@@ -980,6 +1014,7 @@ struct ubifs_debug_info;
980 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running 1014 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running
981 * 1015 *
982 * @big_lpt: flag that LPT is too big to write whole during commit 1016 * @big_lpt: flag that LPT is too big to write whole during commit
1017 * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up
983 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during 1018 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
984 * recovery) 1019 * recovery)
985 * @bulk_read: enable bulk-reads 1020 * @bulk_read: enable bulk-reads
@@ -1057,32 +1092,14 @@ struct ubifs_debug_info;
1057 * @dirty_zn_cnt: number of dirty znodes 1092 * @dirty_zn_cnt: number of dirty znodes
1058 * @clean_zn_cnt: number of clean znodes 1093 * @clean_zn_cnt: number of clean znodes
1059 * 1094 *
1060 * @budg_idx_growth: amount of bytes budgeted for index growth 1095 * @space_lock: protects @bi and @lst
1061 * @budg_data_growth: amount of bytes budgeted for cached data 1096 * @lst: lprops statistics
1062 * @budg_dd_growth: amount of bytes budgeted for cached data that will make 1097 * @bi: budgeting information
1063 * other data dirty
1064 * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index,
1065 * but which still have to be taken into account because
1066 * the index has not been committed so far
1067 * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth,
1068 * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst,
1069 * @nospace, and @nospace_rp;
1070 * @min_idx_lebs: minimum number of LEBs required for the index
1071 * @old_idx_sz: size of index on flash
1072 * @calc_idx_sz: temporary variable which is used to calculate new index size 1098 * @calc_idx_sz: temporary variable which is used to calculate new index size
1073 * (contains accurate new index size at end of TNC commit start) 1099 * (contains accurate new index size at end of TNC commit start)
1074 * @lst: lprops statistics
1075 * @nospace: non-zero if the file-system does not have flash space (used as
1076 * optimization)
1077 * @nospace_rp: the same as @nospace, but additionally means that even reserved
1078 * pool is full
1079 *
1080 * @page_budget: budget for a page
1081 * @inode_budget: budget for an inode
1082 * @dent_budget: budget for a directory entry
1083 * 1100 *
1084 * @ref_node_alsz: size of the LEB reference node aligned to the min. flash 1101 * @ref_node_alsz: size of the LEB reference node aligned to the min. flash
1085 * I/O unit 1102 * I/O unit
1086 * @mst_node_alsz: master node aligned size 1103 * @mst_node_alsz: master node aligned size
1087 * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary 1104 * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary
1088 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary 1105 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
@@ -1189,7 +1206,6 @@ struct ubifs_debug_info;
1189 * @replaying: %1 during journal replay 1206 * @replaying: %1 during journal replay
1190 * @mounting: %1 while mounting 1207 * @mounting: %1 while mounting
1191 * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode 1208 * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
1192 * @replay_tree: temporary tree used during journal replay
1193 * @replay_list: temporary list used during journal replay 1209 * @replay_list: temporary list used during journal replay
1194 * @replay_buds: list of buds to replay 1210 * @replay_buds: list of buds to replay
1195 * @cs_sqnum: sequence number of first node in the log (commit start node) 1211 * @cs_sqnum: sequence number of first node in the log (commit start node)
@@ -1238,6 +1254,7 @@ struct ubifs_info {
1238 wait_queue_head_t cmt_wq; 1254 wait_queue_head_t cmt_wq;
1239 1255
1240 unsigned int big_lpt:1; 1256 unsigned int big_lpt:1;
1257 unsigned int space_fixup:1;
1241 unsigned int no_chk_data_crc:1; 1258 unsigned int no_chk_data_crc:1;
1242 unsigned int bulk_read:1; 1259 unsigned int bulk_read:1;
1243 unsigned int default_compr:2; 1260 unsigned int default_compr:2;
@@ -1308,21 +1325,10 @@ struct ubifs_info {
1308 atomic_long_t dirty_zn_cnt; 1325 atomic_long_t dirty_zn_cnt;
1309 atomic_long_t clean_zn_cnt; 1326 atomic_long_t clean_zn_cnt;
1310 1327
1311 long long budg_idx_growth;
1312 long long budg_data_growth;
1313 long long budg_dd_growth;
1314 long long budg_uncommitted_idx;
1315 spinlock_t space_lock; 1328 spinlock_t space_lock;
1316 int min_idx_lebs;
1317 unsigned long long old_idx_sz;
1318 unsigned long long calc_idx_sz;
1319 struct ubifs_lp_stats lst; 1329 struct ubifs_lp_stats lst;
1320 unsigned int nospace:1; 1330 struct ubifs_budg_info bi;
1321 unsigned int nospace_rp:1; 1331 unsigned long long calc_idx_sz;
1322
1323 int page_budget;
1324 int inode_budget;
1325 int dent_budget;
1326 1332
1327 int ref_node_alsz; 1333 int ref_node_alsz;
1328 int mst_node_alsz; 1334 int mst_node_alsz;
@@ -1430,7 +1436,6 @@ struct ubifs_info {
1430 unsigned int replaying:1; 1436 unsigned int replaying:1;
1431 unsigned int mounting:1; 1437 unsigned int mounting:1;
1432 unsigned int remounting_rw:1; 1438 unsigned int remounting_rw:1;
1433 struct rb_root replay_tree;
1434 struct list_head replay_list; 1439 struct list_head replay_list;
1435 struct list_head replay_buds; 1440 struct list_head replay_buds;
1436 unsigned long long cs_sqnum; 1441 unsigned long long cs_sqnum;
@@ -1628,6 +1633,7 @@ int ubifs_write_master(struct ubifs_info *c);
1628int ubifs_read_superblock(struct ubifs_info *c); 1633int ubifs_read_superblock(struct ubifs_info *c);
1629struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); 1634struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c);
1630int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); 1635int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup);
1636int ubifs_fixup_free_space(struct ubifs_info *c);
1631 1637
1632/* replay.c */ 1638/* replay.c */
1633int ubifs_validate_entry(struct ubifs_info *c, 1639int ubifs_validate_entry(struct ubifs_info *c,
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 3299f469e712..16f19f55e63f 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -80,8 +80,8 @@ enum {
80 SECURITY_XATTR, 80 SECURITY_XATTR,
81}; 81};
82 82
83static const struct inode_operations none_inode_operations; 83static const struct inode_operations empty_iops;
84static const struct file_operations none_file_operations; 84static const struct file_operations empty_fops;
85 85
86/** 86/**
87 * create_xattr - create an extended attribute. 87 * create_xattr - create an extended attribute.
@@ -131,8 +131,8 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
131 131
132 /* Re-define all operations to be "nothing" */ 132 /* Re-define all operations to be "nothing" */
133 inode->i_mapping->a_ops = &empty_aops; 133 inode->i_mapping->a_ops = &empty_aops;
134 inode->i_op = &none_inode_operations; 134 inode->i_op = &empty_iops;
135 inode->i_fop = &none_file_operations; 135 inode->i_fop = &empty_fops;
136 136
137 inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; 137 inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA;
138 ui = ubifs_inode(inode); 138 ui = ubifs_inode(inode);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index e765743cf9f3..b4d791a83207 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -409,7 +409,7 @@ out:
409} 409}
410 410
411/** 411/**
412 * ufs_getfrag_bloc() - `get_block_t' function, interface between UFS and 412 * ufs_getfrag_block() - `get_block_t' function, interface between UFS and
413 * readpage, writepage and so on 413 * readpage, writepage and so on
414 */ 414 */
415 415
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 9ef9ed2cfe2e..5e68099db2a5 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -33,7 +33,6 @@
33#include <linux/migrate.h> 33#include <linux/migrate.h>
34#include <linux/backing-dev.h> 34#include <linux/backing-dev.h>
35#include <linux/freezer.h> 35#include <linux/freezer.h>
36#include <linux/list_sort.h>
37 36
38#include "xfs_sb.h" 37#include "xfs_sb.h"
39#include "xfs_inum.h" 38#include "xfs_inum.h"
@@ -709,6 +708,27 @@ xfs_buf_get_empty(
709 return bp; 708 return bp;
710} 709}
711 710
711/*
712 * Return a buffer allocated as an empty buffer and associated to external
713 * memory via xfs_buf_associate_memory() back to it's empty state.
714 */
715void
716xfs_buf_set_empty(
717 struct xfs_buf *bp,
718 size_t len)
719{
720 if (bp->b_pages)
721 _xfs_buf_free_pages(bp);
722
723 bp->b_pages = NULL;
724 bp->b_page_count = 0;
725 bp->b_addr = NULL;
726 bp->b_file_offset = 0;
727 bp->b_buffer_length = bp->b_count_desired = len;
728 bp->b_bn = XFS_BUF_DADDR_NULL;
729 bp->b_flags &= ~XBF_MAPPED;
730}
731
712static inline struct page * 732static inline struct page *
713mem_to_page( 733mem_to_page(
714 void *addr) 734 void *addr)
@@ -1402,12 +1422,12 @@ restart:
1402int 1422int
1403xfs_buftarg_shrink( 1423xfs_buftarg_shrink(
1404 struct shrinker *shrink, 1424 struct shrinker *shrink,
1405 int nr_to_scan, 1425 struct shrink_control *sc)
1406 gfp_t mask)
1407{ 1426{
1408 struct xfs_buftarg *btp = container_of(shrink, 1427 struct xfs_buftarg *btp = container_of(shrink,
1409 struct xfs_buftarg, bt_shrinker); 1428 struct xfs_buftarg, bt_shrinker);
1410 struct xfs_buf *bp; 1429 struct xfs_buf *bp;
1430 int nr_to_scan = sc->nr_to_scan;
1411 LIST_HEAD(dispose); 1431 LIST_HEAD(dispose);
1412 1432
1413 if (!nr_to_scan) 1433 if (!nr_to_scan)
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a9a1c4512645..50a7d5fb3b73 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -178,6 +178,7 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
178 xfs_buf_flags_t); 178 xfs_buf_flags_t);
179 179
180extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); 180extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
181extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
181extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); 182extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
182extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); 183extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
183extern void xfs_buf_hold(xfs_buf_t *); 184extern void xfs_buf_hold(xfs_buf_t *);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index b3486dfa5520..54e623bfbb85 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -586,7 +586,8 @@ xfs_file_compat_ioctl(
586 case XFS_IOC_RESVSP_32: 586 case XFS_IOC_RESVSP_32:
587 case XFS_IOC_UNRESVSP_32: 587 case XFS_IOC_UNRESVSP_32:
588 case XFS_IOC_RESVSP64_32: 588 case XFS_IOC_RESVSP64_32:
589 case XFS_IOC_UNRESVSP64_32: { 589 case XFS_IOC_UNRESVSP64_32:
590 case XFS_IOC_ZERO_RANGE_32: {
590 struct xfs_flock64 bf; 591 struct xfs_flock64 bf;
591 592
592 if (xfs_compat_flock64_copyin(&bf, arg)) 593 if (xfs_compat_flock64_copyin(&bf, arg))
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 08b605792a99..80f4060e8970 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -184,6 +184,7 @@ typedef struct compat_xfs_flock64 {
184#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) 184#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64)
185#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) 185#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64)
186#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) 186#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64)
187#define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64)
187 188
188typedef struct compat_xfs_fsop_geom_v1 { 189typedef struct compat_xfs_fsop_geom_v1 {
189 __u32 blocksize; /* filesystem (data) block size */ 190 __u32 blocksize; /* filesystem (data) block size */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 244be9cbfe78..8633521b3b2e 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -70,6 +70,7 @@
70#include <linux/ctype.h> 70#include <linux/ctype.h>
71#include <linux/writeback.h> 71#include <linux/writeback.h>
72#include <linux/capability.h> 72#include <linux/capability.h>
73#include <linux/list_sort.h>
73 74
74#include <asm/page.h> 75#include <asm/page.h>
75#include <asm/div64.h> 76#include <asm/div64.h>
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
index 9f76cceb678d..bd672def95ac 100644
--- a/fs/xfs/linux-2.6/xfs_message.c
+++ b/fs/xfs/linux-2.6/xfs_message.c
@@ -41,23 +41,6 @@ __xfs_printk(
41 printk("%sXFS: %pV\n", level, vaf); 41 printk("%sXFS: %pV\n", level, vaf);
42} 42}
43 43
44void xfs_printk(
45 const char *level,
46 const struct xfs_mount *mp,
47 const char *fmt, ...)
48{
49 struct va_format vaf;
50 va_list args;
51
52 va_start(args, fmt);
53
54 vaf.fmt = fmt;
55 vaf.va = &args;
56
57 __xfs_printk(level, mp, &vaf);
58 va_end(args);
59}
60
61#define define_xfs_printk_level(func, kern_level) \ 44#define define_xfs_printk_level(func, kern_level) \
62void func(const struct xfs_mount *mp, const char *fmt, ...) \ 45void func(const struct xfs_mount *mp, const char *fmt, ...) \
63{ \ 46{ \
@@ -95,8 +78,7 @@ xfs_alert_tag(
95 int do_panic = 0; 78 int do_panic = 0;
96 79
97 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { 80 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
98 xfs_printk(KERN_ALERT, mp, 81 xfs_alert(mp, "Transforming an alert into a BUG.");
99 "XFS: Transforming an alert into a BUG.");
100 do_panic = 1; 82 do_panic = 1;
101 } 83 }
102 84
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
index f1b3fc1b6c4e..7fb7ea007672 100644
--- a/fs/xfs/linux-2.6/xfs_message.h
+++ b/fs/xfs/linux-2.6/xfs_message.h
@@ -3,9 +3,6 @@
3 3
4struct xfs_mount; 4struct xfs_mount;
5 5
6extern void xfs_printk(const char *level, const struct xfs_mount *mp,
7 const char *fmt, ...)
8 __attribute__ ((format (printf, 3, 4)));
9extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) 6extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
10 __attribute__ ((format (printf, 2, 3))); 7 __attribute__ ((format (printf, 2, 3)));
11extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) 8extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
@@ -28,7 +25,9 @@ extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
28extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) 25extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
29 __attribute__ ((format (printf, 2, 3))); 26 __attribute__ ((format (printf, 2, 3)));
30#else 27#else
31static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) 28static inline void
29__attribute__ ((format (printf, 2, 3)))
30xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
32{ 31{
33} 32}
34#endif 33#endif
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index b38e58d02299..b0aa59e51fd0 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1787,10 +1787,6 @@ init_xfs_fs(void)
1787 if (error) 1787 if (error)
1788 goto out_cleanup_procfs; 1788 goto out_cleanup_procfs;
1789 1789
1790 error = xfs_init_workqueues();
1791 if (error)
1792 goto out_sysctl_unregister;
1793
1794 vfs_initquota(); 1790 vfs_initquota();
1795 1791
1796 error = register_filesystem(&xfs_fs_type); 1792 error = register_filesystem(&xfs_fs_type);
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 3e898a48122d..8ecad5ff9f9b 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -267,6 +267,16 @@ xfs_sync_inode_attr(
267 267
268 error = xfs_iflush(ip, flags); 268 error = xfs_iflush(ip, flags);
269 269
270 /*
271 * We don't want to try again on non-blocking flushes that can't run
272 * again immediately. If an inode really must be written, then that's
273 * what the SYNC_WAIT flag is for.
274 */
275 if (error == EAGAIN) {
276 ASSERT(!(flags & SYNC_WAIT));
277 error = 0;
278 }
279
270 out_unlock: 280 out_unlock:
271 xfs_iunlock(ip, XFS_ILOCK_SHARED); 281 xfs_iunlock(ip, XFS_ILOCK_SHARED);
272 return error; 282 return error;
@@ -1022,13 +1032,14 @@ xfs_reclaim_inodes(
1022static int 1032static int
1023xfs_reclaim_inode_shrink( 1033xfs_reclaim_inode_shrink(
1024 struct shrinker *shrink, 1034 struct shrinker *shrink,
1025 int nr_to_scan, 1035 struct shrink_control *sc)
1026 gfp_t gfp_mask)
1027{ 1036{
1028 struct xfs_mount *mp; 1037 struct xfs_mount *mp;
1029 struct xfs_perag *pag; 1038 struct xfs_perag *pag;
1030 xfs_agnumber_t ag; 1039 xfs_agnumber_t ag;
1031 int reclaimable; 1040 int reclaimable;
1041 int nr_to_scan = sc->nr_to_scan;
1042 gfp_t gfp_mask = sc->gfp_mask;
1032 1043
1033 mp = container_of(shrink, struct xfs_mount, m_inode_shrink); 1044 mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
1034 if (nr_to_scan) { 1045 if (nr_to_scan) {
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 2d0bcb479075..d48b7a579ae1 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -1151,44 +1151,7 @@ TRACE_EVENT(xfs_bunmap,
1151 1151
1152); 1152);
1153 1153
1154#define XFS_BUSY_SYNC \ 1154DECLARE_EVENT_CLASS(xfs_busy_class,
1155 { 0, "async" }, \
1156 { 1, "sync" }
1157
1158TRACE_EVENT(xfs_alloc_busy,
1159 TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno,
1160 xfs_agblock_t agbno, xfs_extlen_t len, int sync),
1161 TP_ARGS(trans, agno, agbno, len, sync),
1162 TP_STRUCT__entry(
1163 __field(dev_t, dev)
1164 __field(struct xfs_trans *, tp)
1165 __field(int, tid)
1166 __field(xfs_agnumber_t, agno)
1167 __field(xfs_agblock_t, agbno)
1168 __field(xfs_extlen_t, len)
1169 __field(int, sync)
1170 ),
1171 TP_fast_assign(
1172 __entry->dev = trans->t_mountp->m_super->s_dev;
1173 __entry->tp = trans;
1174 __entry->tid = trans->t_ticket->t_tid;
1175 __entry->agno = agno;
1176 __entry->agbno = agbno;
1177 __entry->len = len;
1178 __entry->sync = sync;
1179 ),
1180 TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s",
1181 MAJOR(__entry->dev), MINOR(__entry->dev),
1182 __entry->tp,
1183 __entry->tid,
1184 __entry->agno,
1185 __entry->agbno,
1186 __entry->len,
1187 __print_symbolic(__entry->sync, XFS_BUSY_SYNC))
1188
1189);
1190
1191TRACE_EVENT(xfs_alloc_unbusy,
1192 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 1155 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
1193 xfs_agblock_t agbno, xfs_extlen_t len), 1156 xfs_agblock_t agbno, xfs_extlen_t len),
1194 TP_ARGS(mp, agno, agbno, len), 1157 TP_ARGS(mp, agno, agbno, len),
@@ -1210,35 +1173,45 @@ TRACE_EVENT(xfs_alloc_unbusy,
1210 __entry->agbno, 1173 __entry->agbno,
1211 __entry->len) 1174 __entry->len)
1212); 1175);
1176#define DEFINE_BUSY_EVENT(name) \
1177DEFINE_EVENT(xfs_busy_class, name, \
1178 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
1179 xfs_agblock_t agbno, xfs_extlen_t len), \
1180 TP_ARGS(mp, agno, agbno, len))
1181DEFINE_BUSY_EVENT(xfs_alloc_busy);
1182DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
1183DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
1184DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
1185DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
1213 1186
1214#define XFS_BUSY_STATES \ 1187TRACE_EVENT(xfs_alloc_busy_trim,
1215 { 0, "missing" }, \
1216 { 1, "found" }
1217
1218TRACE_EVENT(xfs_alloc_busysearch,
1219 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 1188 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
1220 xfs_agblock_t agbno, xfs_extlen_t len, int found), 1189 xfs_agblock_t agbno, xfs_extlen_t len,
1221 TP_ARGS(mp, agno, agbno, len, found), 1190 xfs_agblock_t tbno, xfs_extlen_t tlen),
1191 TP_ARGS(mp, agno, agbno, len, tbno, tlen),
1222 TP_STRUCT__entry( 1192 TP_STRUCT__entry(
1223 __field(dev_t, dev) 1193 __field(dev_t, dev)
1224 __field(xfs_agnumber_t, agno) 1194 __field(xfs_agnumber_t, agno)
1225 __field(xfs_agblock_t, agbno) 1195 __field(xfs_agblock_t, agbno)
1226 __field(xfs_extlen_t, len) 1196 __field(xfs_extlen_t, len)
1227 __field(int, found) 1197 __field(xfs_agblock_t, tbno)
1198 __field(xfs_extlen_t, tlen)
1228 ), 1199 ),
1229 TP_fast_assign( 1200 TP_fast_assign(
1230 __entry->dev = mp->m_super->s_dev; 1201 __entry->dev = mp->m_super->s_dev;
1231 __entry->agno = agno; 1202 __entry->agno = agno;
1232 __entry->agbno = agbno; 1203 __entry->agbno = agbno;
1233 __entry->len = len; 1204 __entry->len = len;
1234 __entry->found = found; 1205 __entry->tbno = tbno;
1206 __entry->tlen = tlen;
1235 ), 1207 ),
1236 TP_printk("dev %d:%d agno %u agbno %u len %u %s", 1208 TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
1237 MAJOR(__entry->dev), MINOR(__entry->dev), 1209 MAJOR(__entry->dev), MINOR(__entry->dev),
1238 __entry->agno, 1210 __entry->agno,
1239 __entry->agbno, 1211 __entry->agbno,
1240 __entry->len, 1212 __entry->len,
1241 __print_symbolic(__entry->found, XFS_BUSY_STATES)) 1213 __entry->tbno,
1214 __entry->tlen)
1242); 1215);
1243 1216
1244TRACE_EVENT(xfs_trans_commit_lsn, 1217TRACE_EVENT(xfs_trans_commit_lsn,
@@ -1418,7 +1391,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
1418 __entry->wasfromfl, 1391 __entry->wasfromfl,
1419 __entry->isfl, 1392 __entry->isfl,
1420 __entry->userdata, 1393 __entry->userdata,
1421 __entry->firstblock) 1394 (unsigned long long)__entry->firstblock)
1422) 1395)
1423 1396
1424#define DEFINE_ALLOC_EVENT(name) \ 1397#define DEFINE_ALLOC_EVENT(name) \
@@ -1433,11 +1406,14 @@ DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
1433DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); 1406DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
1434DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); 1407DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
1435DEFINE_ALLOC_EVENT(xfs_alloc_near_error); 1408DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
1409DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
1410DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
1436DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); 1411DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
1437DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); 1412DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
1438DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); 1413DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
1439DEFINE_ALLOC_EVENT(xfs_alloc_size_done); 1414DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
1440DEFINE_ALLOC_EVENT(xfs_alloc_size_error); 1415DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
1416DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
1441DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); 1417DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
1442DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); 1418DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
1443DEFINE_ALLOC_EVENT(xfs_alloc_small_done); 1419DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 69228aa8605a..b94dace4e785 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -60,7 +60,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
60 60
61STATIC int xfs_qm_init_quotainos(xfs_mount_t *); 61STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
62STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); 62STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
63STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t); 63STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *);
64 64
65static struct shrinker xfs_qm_shaker = { 65static struct shrinker xfs_qm_shaker = {
66 .shrink = xfs_qm_shake, 66 .shrink = xfs_qm_shake,
@@ -2009,10 +2009,10 @@ xfs_qm_shake_freelist(
2009STATIC int 2009STATIC int
2010xfs_qm_shake( 2010xfs_qm_shake(
2011 struct shrinker *shrink, 2011 struct shrinker *shrink,
2012 int nr_to_scan, 2012 struct shrink_control *sc)
2013 gfp_t gfp_mask)
2014{ 2013{
2015 int ndqused, nfree, n; 2014 int ndqused, nfree, n;
2015 gfp_t gfp_mask = sc->gfp_mask;
2016 2016
2017 if (!kmem_shake_allow(gfp_mask)) 2017 if (!kmem_shake_allow(gfp_mask))
2018 return 0; 2018 return 0;
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 58632cc17f2d..da0a561ffba2 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -187,7 +187,6 @@ struct xfs_busy_extent {
187 xfs_agnumber_t agno; 187 xfs_agnumber_t agno;
188 xfs_agblock_t bno; 188 xfs_agblock_t bno;
189 xfs_extlen_t length; 189 xfs_extlen_t length;
190 xlog_tid_t tid; /* transaction that created this */
191}; 190};
192 191
193/* 192/*
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 27d64d752eab..acdced86413c 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -41,19 +41,13 @@
41#define XFSA_FIXUP_BNO_OK 1 41#define XFSA_FIXUP_BNO_OK 1
42#define XFSA_FIXUP_CNT_OK 2 42#define XFSA_FIXUP_CNT_OK 2
43 43
44/*
45 * Prototypes for per-ag allocation routines
46 */
47
48STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); 44STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
49STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); 45STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
50STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); 46STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
51STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, 47STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
52 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); 48 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
53 49STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *,
54/* 50 xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *);
55 * Internal functions.
56 */
57 51
58/* 52/*
59 * Lookup the record equal to [bno, len] in the btree given by cur. 53 * Lookup the record equal to [bno, len] in the btree given by cur.
@@ -154,19 +148,21 @@ xfs_alloc_compute_aligned(
154 xfs_extlen_t *reslen) /* result length */ 148 xfs_extlen_t *reslen) /* result length */
155{ 149{
156 xfs_agblock_t bno; 150 xfs_agblock_t bno;
157 xfs_extlen_t diff;
158 xfs_extlen_t len; 151 xfs_extlen_t len;
159 152
160 if (args->alignment > 1 && foundlen >= args->minlen) { 153 /* Trim busy sections out of found extent */
161 bno = roundup(foundbno, args->alignment); 154 xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len);
162 diff = bno - foundbno; 155
163 len = diff >= foundlen ? 0 : foundlen - diff; 156 if (args->alignment > 1 && len >= args->minlen) {
157 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
158 xfs_extlen_t diff = aligned_bno - bno;
159
160 *resbno = aligned_bno;
161 *reslen = diff >= len ? 0 : len - diff;
164 } else { 162 } else {
165 bno = foundbno; 163 *resbno = bno;
166 len = foundlen; 164 *reslen = len;
167 } 165 }
168 *resbno = bno;
169 *reslen = len;
170} 166}
171 167
172/* 168/*
@@ -280,7 +276,6 @@ xfs_alloc_fix_minleft(
280 return 1; 276 return 1;
281 agf = XFS_BUF_TO_AGF(args->agbp); 277 agf = XFS_BUF_TO_AGF(args->agbp);
282 diff = be32_to_cpu(agf->agf_freeblks) 278 diff = be32_to_cpu(agf->agf_freeblks)
283 + be32_to_cpu(agf->agf_flcount)
284 - args->len - args->minleft; 279 - args->len - args->minleft;
285 if (diff >= 0) 280 if (diff >= 0)
286 return 1; 281 return 1;
@@ -541,16 +536,8 @@ xfs_alloc_ag_vextent(
541 if (error) 536 if (error)
542 return error; 537 return error;
543 538
544 /* 539 ASSERT(!xfs_alloc_busy_search(args->mp, args->agno,
545 * Search the busylist for these blocks and mark the 540 args->agbno, args->len));
546 * transaction as synchronous if blocks are found. This
547 * avoids the need to block due to a synchronous log
548 * force to ensure correct ordering as the synchronous
549 * transaction will guarantee that for us.
550 */
551 if (xfs_alloc_busy_search(args->mp, args->agno,
552 args->agbno, args->len))
553 xfs_trans_set_sync(args->tp);
554 } 541 }
555 542
556 if (!args->isfl) { 543 if (!args->isfl) {
@@ -577,14 +564,14 @@ xfs_alloc_ag_vextent_exact(
577{ 564{
578 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ 565 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
579 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ 566 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
580 xfs_agblock_t end; /* end of allocated extent */
581 int error; 567 int error;
582 xfs_agblock_t fbno; /* start block of found extent */ 568 xfs_agblock_t fbno; /* start block of found extent */
583 xfs_agblock_t fend; /* end block of found extent */
584 xfs_extlen_t flen; /* length of found extent */ 569 xfs_extlen_t flen; /* length of found extent */
570 xfs_agblock_t tbno; /* start block of trimmed extent */
571 xfs_extlen_t tlen; /* length of trimmed extent */
572 xfs_agblock_t tend; /* end block of trimmed extent */
573 xfs_agblock_t end; /* end of allocated extent */
585 int i; /* success/failure of operation */ 574 int i; /* success/failure of operation */
586 xfs_agblock_t maxend; /* end of maximal extent */
587 xfs_agblock_t minend; /* end of minimal extent */
588 xfs_extlen_t rlen; /* length of returned extent */ 575 xfs_extlen_t rlen; /* length of returned extent */
589 576
590 ASSERT(args->alignment == 1); 577 ASSERT(args->alignment == 1);
@@ -614,14 +601,22 @@ xfs_alloc_ag_vextent_exact(
614 goto error0; 601 goto error0;
615 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 602 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
616 ASSERT(fbno <= args->agbno); 603 ASSERT(fbno <= args->agbno);
617 minend = args->agbno + args->minlen;
618 maxend = args->agbno + args->maxlen;
619 fend = fbno + flen;
620 604
621 /* 605 /*
622 * Give up if the freespace isn't long enough for the minimum request. 606 * Check for overlapping busy extents.
607 */
608 xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen);
609
610 /*
611 * Give up if the start of the extent is busy, or the freespace isn't
612 * long enough for the minimum request.
623 */ 613 */
624 if (fend < minend) 614 if (tbno > args->agbno)
615 goto not_found;
616 if (tlen < args->minlen)
617 goto not_found;
618 tend = tbno + tlen;
619 if (tend < args->agbno + args->minlen)
625 goto not_found; 620 goto not_found;
626 621
627 /* 622 /*
@@ -630,14 +625,14 @@ xfs_alloc_ag_vextent_exact(
630 * 625 *
631 * Fix the length according to mod and prod if given. 626 * Fix the length according to mod and prod if given.
632 */ 627 */
633 end = XFS_AGBLOCK_MIN(fend, maxend); 628 end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen);
634 args->len = end - args->agbno; 629 args->len = end - args->agbno;
635 xfs_alloc_fix_len(args); 630 xfs_alloc_fix_len(args);
636 if (!xfs_alloc_fix_minleft(args)) 631 if (!xfs_alloc_fix_minleft(args))
637 goto not_found; 632 goto not_found;
638 633
639 rlen = args->len; 634 rlen = args->len;
640 ASSERT(args->agbno + rlen <= fend); 635 ASSERT(args->agbno + rlen <= tend);
641 end = args->agbno + rlen; 636 end = args->agbno + rlen;
642 637
643 /* 638 /*
@@ -686,11 +681,11 @@ xfs_alloc_find_best_extent(
686 struct xfs_btree_cur **scur, /* searching cursor */ 681 struct xfs_btree_cur **scur, /* searching cursor */
687 xfs_agblock_t gdiff, /* difference for search comparison */ 682 xfs_agblock_t gdiff, /* difference for search comparison */
688 xfs_agblock_t *sbno, /* extent found by search */ 683 xfs_agblock_t *sbno, /* extent found by search */
689 xfs_extlen_t *slen, 684 xfs_extlen_t *slen, /* extent length */
690 xfs_extlen_t *slena, /* aligned length */ 685 xfs_agblock_t *sbnoa, /* aligned extent found by search */
686 xfs_extlen_t *slena, /* aligned extent length */
691 int dir) /* 0 = search right, 1 = search left */ 687 int dir) /* 0 = search right, 1 = search left */
692{ 688{
693 xfs_agblock_t bno;
694 xfs_agblock_t new; 689 xfs_agblock_t new;
695 xfs_agblock_t sdiff; 690 xfs_agblock_t sdiff;
696 int error; 691 int error;
@@ -708,16 +703,16 @@ xfs_alloc_find_best_extent(
708 if (error) 703 if (error)
709 goto error0; 704 goto error0;
710 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 705 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
711 xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena); 706 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
712 707
713 /* 708 /*
714 * The good extent is closer than this one. 709 * The good extent is closer than this one.
715 */ 710 */
716 if (!dir) { 711 if (!dir) {
717 if (bno >= args->agbno + gdiff) 712 if (*sbnoa >= args->agbno + gdiff)
718 goto out_use_good; 713 goto out_use_good;
719 } else { 714 } else {
720 if (bno <= args->agbno - gdiff) 715 if (*sbnoa <= args->agbno - gdiff)
721 goto out_use_good; 716 goto out_use_good;
722 } 717 }
723 718
@@ -729,8 +724,8 @@ xfs_alloc_find_best_extent(
729 xfs_alloc_fix_len(args); 724 xfs_alloc_fix_len(args);
730 725
731 sdiff = xfs_alloc_compute_diff(args->agbno, args->len, 726 sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
732 args->alignment, *sbno, 727 args->alignment, *sbnoa,
733 *slen, &new); 728 *slena, &new);
734 729
735 /* 730 /*
736 * Choose closer size and invalidate other cursor. 731 * Choose closer size and invalidate other cursor.
@@ -780,7 +775,7 @@ xfs_alloc_ag_vextent_near(
780 xfs_agblock_t gtbnoa; /* aligned ... */ 775 xfs_agblock_t gtbnoa; /* aligned ... */
781 xfs_extlen_t gtdiff; /* difference to right side entry */ 776 xfs_extlen_t gtdiff; /* difference to right side entry */
782 xfs_extlen_t gtlen; /* length of right side entry */ 777 xfs_extlen_t gtlen; /* length of right side entry */
783 xfs_extlen_t gtlena = 0; /* aligned ... */ 778 xfs_extlen_t gtlena; /* aligned ... */
784 xfs_agblock_t gtnew; /* useful start bno of right side */ 779 xfs_agblock_t gtnew; /* useful start bno of right side */
785 int error; /* error code */ 780 int error; /* error code */
786 int i; /* result code, temporary */ 781 int i; /* result code, temporary */
@@ -789,9 +784,10 @@ xfs_alloc_ag_vextent_near(
789 xfs_agblock_t ltbnoa; /* aligned ... */ 784 xfs_agblock_t ltbnoa; /* aligned ... */
790 xfs_extlen_t ltdiff; /* difference to left side entry */ 785 xfs_extlen_t ltdiff; /* difference to left side entry */
791 xfs_extlen_t ltlen; /* length of left side entry */ 786 xfs_extlen_t ltlen; /* length of left side entry */
792 xfs_extlen_t ltlena = 0; /* aligned ... */ 787 xfs_extlen_t ltlena; /* aligned ... */
793 xfs_agblock_t ltnew; /* useful start bno of left side */ 788 xfs_agblock_t ltnew; /* useful start bno of left side */
794 xfs_extlen_t rlen; /* length of returned extent */ 789 xfs_extlen_t rlen; /* length of returned extent */
790 int forced = 0;
795#if defined(DEBUG) && defined(__KERNEL__) 791#if defined(DEBUG) && defined(__KERNEL__)
796 /* 792 /*
797 * Randomly don't execute the first algorithm. 793 * Randomly don't execute the first algorithm.
@@ -800,13 +796,20 @@ xfs_alloc_ag_vextent_near(
800 796
801 dofirst = random32() & 1; 797 dofirst = random32() & 1;
802#endif 798#endif
799
800restart:
801 bno_cur_lt = NULL;
802 bno_cur_gt = NULL;
803 ltlen = 0;
804 gtlena = 0;
805 ltlena = 0;
806
803 /* 807 /*
804 * Get a cursor for the by-size btree. 808 * Get a cursor for the by-size btree.
805 */ 809 */
806 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 810 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
807 args->agno, XFS_BTNUM_CNT); 811 args->agno, XFS_BTNUM_CNT);
808 ltlen = 0; 812
809 bno_cur_lt = bno_cur_gt = NULL;
810 /* 813 /*
811 * See if there are any free extents as big as maxlen. 814 * See if there are any free extents as big as maxlen.
812 */ 815 */
@@ -822,11 +825,13 @@ xfs_alloc_ag_vextent_near(
822 goto error0; 825 goto error0;
823 if (i == 0 || ltlen == 0) { 826 if (i == 0 || ltlen == 0) {
824 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 827 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
828 trace_xfs_alloc_near_noentry(args);
825 return 0; 829 return 0;
826 } 830 }
827 ASSERT(i == 1); 831 ASSERT(i == 1);
828 } 832 }
829 args->wasfromfl = 0; 833 args->wasfromfl = 0;
834
830 /* 835 /*
831 * First algorithm. 836 * First algorithm.
832 * If the requested extent is large wrt the freespaces available 837 * If the requested extent is large wrt the freespaces available
@@ -890,7 +895,7 @@ xfs_alloc_ag_vextent_near(
890 if (args->len < blen) 895 if (args->len < blen)
891 continue; 896 continue;
892 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 897 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
893 args->alignment, ltbno, ltlen, &ltnew); 898 args->alignment, ltbnoa, ltlena, &ltnew);
894 if (ltnew != NULLAGBLOCK && 899 if (ltnew != NULLAGBLOCK &&
895 (args->len > blen || ltdiff < bdiff)) { 900 (args->len > blen || ltdiff < bdiff)) {
896 bdiff = ltdiff; 901 bdiff = ltdiff;
@@ -1042,11 +1047,12 @@ xfs_alloc_ag_vextent_near(
1042 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1047 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1043 xfs_alloc_fix_len(args); 1048 xfs_alloc_fix_len(args);
1044 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1049 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1045 args->alignment, ltbno, ltlen, &ltnew); 1050 args->alignment, ltbnoa, ltlena, &ltnew);
1046 1051
1047 error = xfs_alloc_find_best_extent(args, 1052 error = xfs_alloc_find_best_extent(args,
1048 &bno_cur_lt, &bno_cur_gt, 1053 &bno_cur_lt, &bno_cur_gt,
1049 ltdiff, &gtbno, &gtlen, &gtlena, 1054 ltdiff, &gtbno, &gtlen,
1055 &gtbnoa, &gtlena,
1050 0 /* search right */); 1056 0 /* search right */);
1051 } else { 1057 } else {
1052 ASSERT(gtlena >= args->minlen); 1058 ASSERT(gtlena >= args->minlen);
@@ -1057,11 +1063,12 @@ xfs_alloc_ag_vextent_near(
1057 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); 1063 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1058 xfs_alloc_fix_len(args); 1064 xfs_alloc_fix_len(args);
1059 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1065 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1060 args->alignment, gtbno, gtlen, &gtnew); 1066 args->alignment, gtbnoa, gtlena, &gtnew);
1061 1067
1062 error = xfs_alloc_find_best_extent(args, 1068 error = xfs_alloc_find_best_extent(args,
1063 &bno_cur_gt, &bno_cur_lt, 1069 &bno_cur_gt, &bno_cur_lt,
1064 gtdiff, &ltbno, &ltlen, &ltlena, 1070 gtdiff, &ltbno, &ltlen,
1071 &ltbnoa, &ltlena,
1065 1 /* search left */); 1072 1 /* search left */);
1066 } 1073 }
1067 1074
@@ -1073,6 +1080,12 @@ xfs_alloc_ag_vextent_near(
1073 * If we couldn't get anything, give up. 1080 * If we couldn't get anything, give up.
1074 */ 1081 */
1075 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1082 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1083 if (!forced++) {
1084 trace_xfs_alloc_near_busy(args);
1085 xfs_log_force(args->mp, XFS_LOG_SYNC);
1086 goto restart;
1087 }
1088
1076 trace_xfs_alloc_size_neither(args); 1089 trace_xfs_alloc_size_neither(args);
1077 args->agbno = NULLAGBLOCK; 1090 args->agbno = NULLAGBLOCK;
1078 return 0; 1091 return 0;
@@ -1107,12 +1120,13 @@ xfs_alloc_ag_vextent_near(
1107 return 0; 1120 return 0;
1108 } 1121 }
1109 rlen = args->len; 1122 rlen = args->len;
1110 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, 1123 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1111 ltlen, &ltnew); 1124 ltbnoa, ltlena, &ltnew);
1112 ASSERT(ltnew >= ltbno); 1125 ASSERT(ltnew >= ltbno);
1113 ASSERT(ltnew + rlen <= ltbno + ltlen); 1126 ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1114 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1127 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1115 args->agbno = ltnew; 1128 args->agbno = ltnew;
1129
1116 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1130 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1117 ltnew, rlen, XFSA_FIXUP_BNO_OK))) 1131 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1118 goto error0; 1132 goto error0;
@@ -1155,26 +1169,35 @@ xfs_alloc_ag_vextent_size(
1155 int i; /* temp status variable */ 1169 int i; /* temp status variable */
1156 xfs_agblock_t rbno; /* returned block number */ 1170 xfs_agblock_t rbno; /* returned block number */
1157 xfs_extlen_t rlen; /* length of returned extent */ 1171 xfs_extlen_t rlen; /* length of returned extent */
1172 int forced = 0;
1158 1173
1174restart:
1159 /* 1175 /*
1160 * Allocate and initialize a cursor for the by-size btree. 1176 * Allocate and initialize a cursor for the by-size btree.
1161 */ 1177 */
1162 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1178 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1163 args->agno, XFS_BTNUM_CNT); 1179 args->agno, XFS_BTNUM_CNT);
1164 bno_cur = NULL; 1180 bno_cur = NULL;
1181
1165 /* 1182 /*
1166 * Look for an entry >= maxlen+alignment-1 blocks. 1183 * Look for an entry >= maxlen+alignment-1 blocks.
1167 */ 1184 */
1168 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, 1185 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1169 args->maxlen + args->alignment - 1, &i))) 1186 args->maxlen + args->alignment - 1, &i)))
1170 goto error0; 1187 goto error0;
1188
1171 /* 1189 /*
1172 * If none, then pick up the last entry in the tree unless the 1190 * If none or we have busy extents that we cannot allocate from, then
1173 * tree is empty. 1191 * we have to settle for a smaller extent. In the case that there are
1192 * no large extents, this will return the last entry in the tree unless
1193 * the tree is empty. In the case that there are only busy large
1194 * extents, this will return the largest small extent unless there
1195 * are no smaller extents available.
1174 */ 1196 */
1175 if (!i) { 1197 if (!i || forced > 1) {
1176 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, 1198 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1177 &flen, &i))) 1199 &fbno, &flen, &i);
1200 if (error)
1178 goto error0; 1201 goto error0;
1179 if (i == 0 || flen == 0) { 1202 if (i == 0 || flen == 0) {
1180 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1203 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -1182,22 +1205,56 @@ xfs_alloc_ag_vextent_size(
1182 return 0; 1205 return 0;
1183 } 1206 }
1184 ASSERT(i == 1); 1207 ASSERT(i == 1);
1208 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1209 } else {
1210 /*
1211 * Search for a non-busy extent that is large enough.
1212 * If we are at low space, don't check, or if we fall of
1213 * the end of the btree, turn off the busy check and
1214 * restart.
1215 */
1216 for (;;) {
1217 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1218 if (error)
1219 goto error0;
1220 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1221
1222 xfs_alloc_compute_aligned(args, fbno, flen,
1223 &rbno, &rlen);
1224
1225 if (rlen >= args->maxlen)
1226 break;
1227
1228 error = xfs_btree_increment(cnt_cur, 0, &i);
1229 if (error)
1230 goto error0;
1231 if (i == 0) {
1232 /*
1233 * Our only valid extents must have been busy.
1234 * Make it unbusy by forcing the log out and
1235 * retrying. If we've been here before, forcing
1236 * the log isn't making the extents available,
1237 * which means they have probably been freed in
1238 * this transaction. In that case, we have to
1239 * give up on them and we'll attempt a minlen
1240 * allocation the next time around.
1241 */
1242 xfs_btree_del_cursor(cnt_cur,
1243 XFS_BTREE_NOERROR);
1244 trace_xfs_alloc_size_busy(args);
1245 if (!forced++)
1246 xfs_log_force(args->mp, XFS_LOG_SYNC);
1247 goto restart;
1248 }
1249 }
1185 } 1250 }
1186 /* 1251
1187 * There's a freespace as big as maxlen+alignment-1, get it.
1188 */
1189 else {
1190 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
1191 goto error0;
1192 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1193 }
1194 /* 1252 /*
1195 * In the first case above, we got the last entry in the 1253 * In the first case above, we got the last entry in the
1196 * by-size btree. Now we check to see if the space hits maxlen 1254 * by-size btree. Now we check to see if the space hits maxlen
1197 * once aligned; if not, we search left for something better. 1255 * once aligned; if not, we search left for something better.
1198 * This can't happen in the second case above. 1256 * This can't happen in the second case above.
1199 */ 1257 */
1200 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1201 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1258 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1202 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1259 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1203 (rlen <= flen && rbno + rlen <= fbno + flen), error0); 1260 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
@@ -1251,13 +1308,19 @@ xfs_alloc_ag_vextent_size(
1251 * Fix up the length. 1308 * Fix up the length.
1252 */ 1309 */
1253 args->len = rlen; 1310 args->len = rlen;
1254 xfs_alloc_fix_len(args); 1311 if (rlen < args->minlen) {
1255 if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { 1312 if (!forced++) {
1256 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1313 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1257 trace_xfs_alloc_size_nominleft(args); 1314 trace_xfs_alloc_size_busy(args);
1258 args->agbno = NULLAGBLOCK; 1315 xfs_log_force(args->mp, XFS_LOG_SYNC);
1259 return 0; 1316 goto restart;
1317 }
1318 goto out_nominleft;
1260 } 1319 }
1320 xfs_alloc_fix_len(args);
1321
1322 if (!xfs_alloc_fix_minleft(args))
1323 goto out_nominleft;
1261 rlen = args->len; 1324 rlen = args->len;
1262 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); 1325 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
1263 /* 1326 /*
@@ -1287,6 +1350,12 @@ error0:
1287 if (bno_cur) 1350 if (bno_cur)
1288 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1351 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1289 return error; 1352 return error;
1353
1354out_nominleft:
1355 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1356 trace_xfs_alloc_size_nominleft(args);
1357 args->agbno = NULLAGBLOCK;
1358 return 0;
1290} 1359}
1291 1360
1292/* 1361/*
@@ -1326,6 +1395,9 @@ xfs_alloc_ag_vextent_small(
1326 if (error) 1395 if (error)
1327 goto error0; 1396 goto error0;
1328 if (fbno != NULLAGBLOCK) { 1397 if (fbno != NULLAGBLOCK) {
1398 xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1,
1399 args->userdata);
1400
1329 if (args->userdata) { 1401 if (args->userdata) {
1330 xfs_buf_t *bp; 1402 xfs_buf_t *bp;
1331 1403
@@ -1617,18 +1689,6 @@ xfs_free_ag_extent(
1617 1689
1618 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); 1690 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1619 1691
1620 /*
1621 * Since blocks move to the free list without the coordination
1622 * used in xfs_bmap_finish, we can't allow block to be available
1623 * for reallocation and non-transaction writing (user data)
1624 * until we know that the transaction that moved it to the free
1625 * list is permanently on disk. We track the blocks by declaring
1626 * these blocks as "busy"; the busy list is maintained on a per-ag
1627 * basis and each transaction records which entries should be removed
1628 * when the iclog commits to disk. If a busy block is allocated,
1629 * the iclog is pushed up to the LSN that freed the block.
1630 */
1631 xfs_alloc_busy_insert(tp, agno, bno, len);
1632 return 0; 1692 return 0;
1633 1693
1634 error0: 1694 error0:
@@ -1923,21 +1983,6 @@ xfs_alloc_get_freelist(
1923 xfs_alloc_log_agf(tp, agbp, logflags); 1983 xfs_alloc_log_agf(tp, agbp, logflags);
1924 *bnop = bno; 1984 *bnop = bno;
1925 1985
1926 /*
1927 * As blocks are freed, they are added to the per-ag busy list and
1928 * remain there until the freeing transaction is committed to disk.
1929 * Now that we have allocated blocks, this list must be searched to see
1930 * if a block is being reused. If one is, then the freeing transaction
1931 * must be pushed to disk before this transaction.
1932 *
1933 * We do this by setting the current transaction to a sync transaction
1934 * which guarantees that the freeing transaction is on disk before this
1935 * transaction. This is done instead of a synchronous log force here so
1936 * that we don't sit and wait with the AGF locked in the transaction
1937 * during the log force.
1938 */
1939 if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1))
1940 xfs_trans_set_sync(tp);
1941 return 0; 1986 return 0;
1942} 1987}
1943 1988
@@ -2423,105 +2468,13 @@ xfs_free_extent(
2423 } 2468 }
2424 2469
2425 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); 2470 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2471 if (!error)
2472 xfs_alloc_busy_insert(tp, args.agno, args.agbno, len);
2426error0: 2473error0:
2427 xfs_perag_put(args.pag); 2474 xfs_perag_put(args.pag);
2428 return error; 2475 return error;
2429} 2476}
2430 2477
2431
2432/*
2433 * AG Busy list management
2434 * The busy list contains block ranges that have been freed but whose
2435 * transactions have not yet hit disk. If any block listed in a busy
2436 * list is reused, the transaction that freed it must be forced to disk
2437 * before continuing to use the block.
2438 *
2439 * xfs_alloc_busy_insert - add to the per-ag busy list
2440 * xfs_alloc_busy_clear - remove an item from the per-ag busy list
2441 * xfs_alloc_busy_search - search for a busy extent
2442 */
2443
2444/*
2445 * Insert a new extent into the busy tree.
2446 *
2447 * The busy extent tree is indexed by the start block of the busy extent.
2448 * there can be multiple overlapping ranges in the busy extent tree but only
2449 * ever one entry at a given start block. The reason for this is that
2450 * multi-block extents can be freed, then smaller chunks of that extent
2451 * allocated and freed again before the first transaction commit is on disk.
2452 * If the exact same start block is freed a second time, we have to wait for
2453 * that busy extent to pass out of the tree before the new extent is inserted.
2454 * There are two main cases we have to handle here.
2455 *
2456 * The first case is a transaction that triggers a "free - allocate - free"
2457 * cycle. This can occur during btree manipulations as a btree block is freed
2458 * to the freelist, then allocated from the free list, then freed again. In
2459 * this case, the second extxpnet free is what triggers the duplicate and as
2460 * such the transaction IDs should match. Because the extent was allocated in
2461 * this transaction, the transaction must be marked as synchronous. This is
2462 * true for all cases where the free/alloc/free occurs in the one transaction,
2463 * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case.
2464 * This serves to catch violations of the second case quite effectively.
2465 *
2466 * The second case is where the free/alloc/free occur in different
2467 * transactions. In this case, the thread freeing the extent the second time
2468 * can't mark the extent busy immediately because it is already tracked in a
2469 * transaction that may be committing. When the log commit for the existing
2470 * busy extent completes, the busy extent will be removed from the tree. If we
2471 * allow the second busy insert to continue using that busy extent structure,
2472 * it can be freed before this transaction is safely in the log. Hence our
2473 * only option in this case is to force the log to remove the existing busy
2474 * extent from the list before we insert the new one with the current
2475 * transaction ID.
2476 *
2477 * The problem we are trying to avoid in the free-alloc-free in separate
2478 * transactions is most easily described with a timeline:
2479 *
2480 * Thread 1 Thread 2 Thread 3 xfslogd
2481 * xact alloc
2482 * free X
2483 * mark busy
2484 * commit xact
2485 * free xact
2486 * xact alloc
2487 * alloc X
2488 * busy search
2489 * mark xact sync
2490 * commit xact
2491 * free xact
2492 * force log
2493 * checkpoint starts
2494 * ....
2495 * xact alloc
2496 * free X
2497 * mark busy
2498 * finds match
2499 * *** KABOOM! ***
2500 * ....
2501 * log IO completes
2502 * unbusy X
2503 * checkpoint completes
2504 *
2505 * By issuing a log force in thread 3 @ "KABOOM", the thread will block until
2506 * the checkpoint completes, and the busy extent it matched will have been
2507 * removed from the tree when it is woken. Hence it can then continue safely.
2508 *
2509 * However, to ensure this matching process is robust, we need to use the
2510 * transaction ID for identifying transaction, as delayed logging results in
2511 * the busy extent and transaction lifecycles being different. i.e. the busy
2512 * extent is active for a lot longer than the transaction. Hence the
2513 * transaction structure can be freed and reallocated, then mark the same
2514 * extent busy again in the new transaction. In this case the new transaction
2515 * will have a different tid but can have the same address, and hence we need
2516 * to check against the tid.
2517 *
2518 * Future: for delayed logging, we could avoid the log force if the extent was
2519 * first freed in the current checkpoint sequence. This, however, requires the
2520 * ability to pin the current checkpoint in memory until this transaction
2521 * commits to ensure that both the original free and the current one combine
2522 * logically into the one checkpoint. If the checkpoint sequences are
2523 * different, however, we still need to wait on a log force.
2524 */
2525void 2478void
2526xfs_alloc_busy_insert( 2479xfs_alloc_busy_insert(
2527 struct xfs_trans *tp, 2480 struct xfs_trans *tp,
@@ -2533,9 +2486,7 @@ xfs_alloc_busy_insert(
2533 struct xfs_busy_extent *busyp; 2486 struct xfs_busy_extent *busyp;
2534 struct xfs_perag *pag; 2487 struct xfs_perag *pag;
2535 struct rb_node **rbp; 2488 struct rb_node **rbp;
2536 struct rb_node *parent; 2489 struct rb_node *parent = NULL;
2537 int match;
2538
2539 2490
2540 new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); 2491 new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
2541 if (!new) { 2492 if (!new) {
@@ -2544,7 +2495,7 @@ xfs_alloc_busy_insert(
2544 * block, make this a synchronous transaction to insure that 2495 * block, make this a synchronous transaction to insure that
2545 * the block is not reused before this transaction commits. 2496 * the block is not reused before this transaction commits.
2546 */ 2497 */
2547 trace_xfs_alloc_busy(tp, agno, bno, len, 1); 2498 trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len);
2548 xfs_trans_set_sync(tp); 2499 xfs_trans_set_sync(tp);
2549 return; 2500 return;
2550 } 2501 }
@@ -2552,66 +2503,28 @@ xfs_alloc_busy_insert(
2552 new->agno = agno; 2503 new->agno = agno;
2553 new->bno = bno; 2504 new->bno = bno;
2554 new->length = len; 2505 new->length = len;
2555 new->tid = xfs_log_get_trans_ident(tp);
2556
2557 INIT_LIST_HEAD(&new->list); 2506 INIT_LIST_HEAD(&new->list);
2558 2507
2559 /* trace before insert to be able to see failed inserts */ 2508 /* trace before insert to be able to see failed inserts */
2560 trace_xfs_alloc_busy(tp, agno, bno, len, 0); 2509 trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len);
2561 2510
2562 pag = xfs_perag_get(tp->t_mountp, new->agno); 2511 pag = xfs_perag_get(tp->t_mountp, new->agno);
2563restart:
2564 spin_lock(&pag->pagb_lock); 2512 spin_lock(&pag->pagb_lock);
2565 rbp = &pag->pagb_tree.rb_node; 2513 rbp = &pag->pagb_tree.rb_node;
2566 parent = NULL; 2514 while (*rbp) {
2567 busyp = NULL;
2568 match = 0;
2569 while (*rbp && match >= 0) {
2570 parent = *rbp; 2515 parent = *rbp;
2571 busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); 2516 busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
2572 2517
2573 if (new->bno < busyp->bno) { 2518 if (new->bno < busyp->bno) {
2574 /* may overlap, but exact start block is lower */
2575 rbp = &(*rbp)->rb_left; 2519 rbp = &(*rbp)->rb_left;
2576 if (new->bno + new->length > busyp->bno) 2520 ASSERT(new->bno + new->length <= busyp->bno);
2577 match = busyp->tid == new->tid ? 1 : -1;
2578 } else if (new->bno > busyp->bno) { 2521 } else if (new->bno > busyp->bno) {
2579 /* may overlap, but exact start block is higher */
2580 rbp = &(*rbp)->rb_right; 2522 rbp = &(*rbp)->rb_right;
2581 if (bno < busyp->bno + busyp->length) 2523 ASSERT(bno >= busyp->bno + busyp->length);
2582 match = busyp->tid == new->tid ? 1 : -1;
2583 } else { 2524 } else {
2584 match = busyp->tid == new->tid ? 1 : -1; 2525 ASSERT(0);
2585 break;
2586 } 2526 }
2587 } 2527 }
2588 if (match < 0) {
2589 /* overlap marked busy in different transaction */
2590 spin_unlock(&pag->pagb_lock);
2591 xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
2592 goto restart;
2593 }
2594 if (match > 0) {
2595 /*
2596 * overlap marked busy in same transaction. Update if exact
2597 * start block match, otherwise combine the busy extents into
2598 * a single range.
2599 */
2600 if (busyp->bno == new->bno) {
2601 busyp->length = max(busyp->length, new->length);
2602 spin_unlock(&pag->pagb_lock);
2603 ASSERT(tp->t_flags & XFS_TRANS_SYNC);
2604 xfs_perag_put(pag);
2605 kmem_free(new);
2606 return;
2607 }
2608 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2609 new->length = max(busyp->bno + busyp->length,
2610 new->bno + new->length) -
2611 min(busyp->bno, new->bno);
2612 new->bno = min(busyp->bno, new->bno);
2613 } else
2614 busyp = NULL;
2615 2528
2616 rb_link_node(&new->rb_node, parent, rbp); 2529 rb_link_node(&new->rb_node, parent, rbp);
2617 rb_insert_color(&new->rb_node, &pag->pagb_tree); 2530 rb_insert_color(&new->rb_node, &pag->pagb_tree);
@@ -2619,7 +2532,6 @@ restart:
2619 list_add(&new->list, &tp->t_busy); 2532 list_add(&new->list, &tp->t_busy);
2620 spin_unlock(&pag->pagb_lock); 2533 spin_unlock(&pag->pagb_lock);
2621 xfs_perag_put(pag); 2534 xfs_perag_put(pag);
2622 kmem_free(busyp);
2623} 2535}
2624 2536
2625/* 2537/*
@@ -2668,31 +2580,443 @@ xfs_alloc_busy_search(
2668 } 2580 }
2669 } 2581 }
2670 spin_unlock(&pag->pagb_lock); 2582 spin_unlock(&pag->pagb_lock);
2671 trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match);
2672 xfs_perag_put(pag); 2583 xfs_perag_put(pag);
2673 return match; 2584 return match;
2674} 2585}
2675 2586
2587/*
2588 * The found free extent [fbno, fend] overlaps part or all of the given busy
2589 * extent. If the overlap covers the beginning, the end, or all of the busy
2590 * extent, the overlapping portion can be made unbusy and used for the
2591 * allocation. We can't split a busy extent because we can't modify a
2592 * transaction/CIL context busy list, but we can update an entries block
2593 * number or length.
2594 *
2595 * Returns true if the extent can safely be reused, or false if the search
2596 * needs to be restarted.
2597 */
2598STATIC bool
2599xfs_alloc_busy_update_extent(
2600 struct xfs_mount *mp,
2601 struct xfs_perag *pag,
2602 struct xfs_busy_extent *busyp,
2603 xfs_agblock_t fbno,
2604 xfs_extlen_t flen,
2605 bool userdata)
2606{
2607 xfs_agblock_t fend = fbno + flen;
2608 xfs_agblock_t bbno = busyp->bno;
2609 xfs_agblock_t bend = bbno + busyp->length;
2610
2611 /*
2612 * If there is a busy extent overlapping a user allocation, we have
2613 * no choice but to force the log and retry the search.
2614 *
2615 * Fortunately this does not happen during normal operation, but
2616 * only if the filesystem is very low on space and has to dip into
2617 * the AGFL for normal allocations.
2618 */
2619 if (userdata)
2620 goto out_force_log;
2621
2622 if (bbno < fbno && bend > fend) {
2623 /*
2624 * Case 1:
2625 * bbno bend
2626 * +BBBBBBBBBBBBBBBBB+
2627 * +---------+
2628 * fbno fend
2629 */
2630
2631 /*
2632 * We would have to split the busy extent to be able to track
2633 * it correct, which we cannot do because we would have to
2634 * modify the list of busy extents attached to the transaction
2635 * or CIL context, which is immutable.
2636 *
2637 * Force out the log to clear the busy extent and retry the
2638 * search.
2639 */
2640 goto out_force_log;
2641 } else if (bbno >= fbno && bend <= fend) {
2642 /*
2643 * Case 2:
2644 * bbno bend
2645 * +BBBBBBBBBBBBBBBBB+
2646 * +-----------------+
2647 * fbno fend
2648 *
2649 * Case 3:
2650 * bbno bend
2651 * +BBBBBBBBBBBBBBBBB+
2652 * +--------------------------+
2653 * fbno fend
2654 *
2655 * Case 4:
2656 * bbno bend
2657 * +BBBBBBBBBBBBBBBBB+
2658 * +--------------------------+
2659 * fbno fend
2660 *
2661 * Case 5:
2662 * bbno bend
2663 * +BBBBBBBBBBBBBBBBB+
2664 * +-----------------------------------+
2665 * fbno fend
2666 *
2667 */
2668
2669 /*
2670 * The busy extent is fully covered by the extent we are
2671 * allocating, and can simply be removed from the rbtree.
2672 * However we cannot remove it from the immutable list
2673 * tracking busy extents in the transaction or CIL context,
2674 * so set the length to zero to mark it invalid.
2675 *
2676 * We also need to restart the busy extent search from the
2677 * tree root, because erasing the node can rearrange the
2678 * tree topology.
2679 */
2680 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2681 busyp->length = 0;
2682 return false;
2683 } else if (fend < bend) {
2684 /*
2685 * Case 6:
2686 * bbno bend
2687 * +BBBBBBBBBBBBBBBBB+
2688 * +---------+
2689 * fbno fend
2690 *
2691 * Case 7:
2692 * bbno bend
2693 * +BBBBBBBBBBBBBBBBB+
2694 * +------------------+
2695 * fbno fend
2696 *
2697 */
2698 busyp->bno = fend;
2699 } else if (bbno < fbno) {
2700 /*
2701 * Case 8:
2702 * bbno bend
2703 * +BBBBBBBBBBBBBBBBB+
2704 * +-------------+
2705 * fbno fend
2706 *
2707 * Case 9:
2708 * bbno bend
2709 * +BBBBBBBBBBBBBBBBB+
2710 * +----------------------+
2711 * fbno fend
2712 */
2713 busyp->length = fbno - busyp->bno;
2714 } else {
2715 ASSERT(0);
2716 }
2717
2718 trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen);
2719 return true;
2720
2721out_force_log:
2722 spin_unlock(&pag->pagb_lock);
2723 xfs_log_force(mp, XFS_LOG_SYNC);
2724 trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen);
2725 spin_lock(&pag->pagb_lock);
2726 return false;
2727}
2728
2729
2730/*
2731 * For a given extent [fbno, flen], make sure we can reuse it safely.
2732 */
2676void 2733void
2677xfs_alloc_busy_clear( 2734xfs_alloc_busy_reuse(
2678 struct xfs_mount *mp, 2735 struct xfs_mount *mp,
2679 struct xfs_busy_extent *busyp) 2736 xfs_agnumber_t agno,
2737 xfs_agblock_t fbno,
2738 xfs_extlen_t flen,
2739 bool userdata)
2680{ 2740{
2681 struct xfs_perag *pag; 2741 struct xfs_perag *pag;
2742 struct rb_node *rbp;
2682 2743
2683 trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno, 2744 ASSERT(flen > 0);
2684 busyp->length);
2685 2745
2686 ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno, 2746 pag = xfs_perag_get(mp, agno);
2687 busyp->length) == 1); 2747 spin_lock(&pag->pagb_lock);
2748restart:
2749 rbp = pag->pagb_tree.rb_node;
2750 while (rbp) {
2751 struct xfs_busy_extent *busyp =
2752 rb_entry(rbp, struct xfs_busy_extent, rb_node);
2753 xfs_agblock_t bbno = busyp->bno;
2754 xfs_agblock_t bend = bbno + busyp->length;
2688 2755
2689 list_del_init(&busyp->list); 2756 if (fbno + flen <= bbno) {
2757 rbp = rbp->rb_left;
2758 continue;
2759 } else if (fbno >= bend) {
2760 rbp = rbp->rb_right;
2761 continue;
2762 }
2690 2763
2691 pag = xfs_perag_get(mp, busyp->agno); 2764 if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen,
2692 spin_lock(&pag->pagb_lock); 2765 userdata))
2693 rb_erase(&busyp->rb_node, &pag->pagb_tree); 2766 goto restart;
2767 }
2694 spin_unlock(&pag->pagb_lock); 2768 spin_unlock(&pag->pagb_lock);
2695 xfs_perag_put(pag); 2769 xfs_perag_put(pag);
2770}
2771
2772/*
2773 * For a given extent [fbno, flen], search the busy extent list to find a
2774 * subset of the extent that is not busy. If *rlen is smaller than
2775 * args->minlen no suitable extent could be found, and the higher level
2776 * code needs to force out the log and retry the allocation.
2777 */
2778STATIC void
2779xfs_alloc_busy_trim(
2780 struct xfs_alloc_arg *args,
2781 xfs_agblock_t bno,
2782 xfs_extlen_t len,
2783 xfs_agblock_t *rbno,
2784 xfs_extlen_t *rlen)
2785{
2786 xfs_agblock_t fbno;
2787 xfs_extlen_t flen;
2788 struct rb_node *rbp;
2789
2790 ASSERT(len > 0);
2696 2791
2792 spin_lock(&args->pag->pagb_lock);
2793restart:
2794 fbno = bno;
2795 flen = len;
2796 rbp = args->pag->pagb_tree.rb_node;
2797 while (rbp && flen >= args->minlen) {
2798 struct xfs_busy_extent *busyp =
2799 rb_entry(rbp, struct xfs_busy_extent, rb_node);
2800 xfs_agblock_t fend = fbno + flen;
2801 xfs_agblock_t bbno = busyp->bno;
2802 xfs_agblock_t bend = bbno + busyp->length;
2803
2804 if (fend <= bbno) {
2805 rbp = rbp->rb_left;
2806 continue;
2807 } else if (fbno >= bend) {
2808 rbp = rbp->rb_right;
2809 continue;
2810 }
2811
2812 /*
2813 * If this is a metadata allocation, try to reuse the busy
2814 * extent instead of trimming the allocation.
2815 */
2816 if (!args->userdata) {
2817 if (!xfs_alloc_busy_update_extent(args->mp, args->pag,
2818 busyp, fbno, flen,
2819 false))
2820 goto restart;
2821 continue;
2822 }
2823
2824 if (bbno <= fbno) {
2825 /* start overlap */
2826
2827 /*
2828 * Case 1:
2829 * bbno bend
2830 * +BBBBBBBBBBBBBBBBB+
2831 * +---------+
2832 * fbno fend
2833 *
2834 * Case 2:
2835 * bbno bend
2836 * +BBBBBBBBBBBBBBBBB+
2837 * +-------------+
2838 * fbno fend
2839 *
2840 * Case 3:
2841 * bbno bend
2842 * +BBBBBBBBBBBBBBBBB+
2843 * +-------------+
2844 * fbno fend
2845 *
2846 * Case 4:
2847 * bbno bend
2848 * +BBBBBBBBBBBBBBBBB+
2849 * +-----------------+
2850 * fbno fend
2851 *
2852 * No unbusy region in extent, return failure.
2853 */
2854 if (fend <= bend)
2855 goto fail;
2856
2857 /*
2858 * Case 5:
2859 * bbno bend
2860 * +BBBBBBBBBBBBBBBBB+
2861 * +----------------------+
2862 * fbno fend
2863 *
2864 * Case 6:
2865 * bbno bend
2866 * +BBBBBBBBBBBBBBBBB+
2867 * +--------------------------+
2868 * fbno fend
2869 *
2870 * Needs to be trimmed to:
2871 * +-------+
2872 * fbno fend
2873 */
2874 fbno = bend;
2875 } else if (bend >= fend) {
2876 /* end overlap */
2877
2878 /*
2879 * Case 7:
2880 * bbno bend
2881 * +BBBBBBBBBBBBBBBBB+
2882 * +------------------+
2883 * fbno fend
2884 *
2885 * Case 8:
2886 * bbno bend
2887 * +BBBBBBBBBBBBBBBBB+
2888 * +--------------------------+
2889 * fbno fend
2890 *
2891 * Needs to be trimmed to:
2892 * +-------+
2893 * fbno fend
2894 */
2895 fend = bbno;
2896 } else {
2897 /* middle overlap */
2898
2899 /*
2900 * Case 9:
2901 * bbno bend
2902 * +BBBBBBBBBBBBBBBBB+
2903 * +-----------------------------------+
2904 * fbno fend
2905 *
2906 * Can be trimmed to:
2907 * +-------+ OR +-------+
2908 * fbno fend fbno fend
2909 *
2910 * Backward allocation leads to significant
2911 * fragmentation of directories, which degrades
2912 * directory performance, therefore we always want to
2913 * choose the option that produces forward allocation
2914 * patterns.
2915 * Preferring the lower bno extent will make the next
2916 * request use "fend" as the start of the next
2917 * allocation; if the segment is no longer busy at
2918 * that point, we'll get a contiguous allocation, but
2919 * even if it is still busy, we will get a forward
2920 * allocation.
2921 * We try to avoid choosing the segment at "bend",
2922 * because that can lead to the next allocation
2923 * taking the segment at "fbno", which would be a
2924 * backward allocation. We only use the segment at
2925 * "fbno" if it is much larger than the current
2926 * requested size, because in that case there's a
2927 * good chance subsequent allocations will be
2928 * contiguous.
2929 */
2930 if (bbno - fbno >= args->maxlen) {
2931 /* left candidate fits perfect */
2932 fend = bbno;
2933 } else if (fend - bend >= args->maxlen * 4) {
2934 /* right candidate has enough free space */
2935 fbno = bend;
2936 } else if (bbno - fbno >= args->minlen) {
2937 /* left candidate fits minimum requirement */
2938 fend = bbno;
2939 } else {
2940 goto fail;
2941 }
2942 }
2943
2944 flen = fend - fbno;
2945 }
2946 spin_unlock(&args->pag->pagb_lock);
2947
2948 if (fbno != bno || flen != len) {
2949 trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len,
2950 fbno, flen);
2951 }
2952 *rbno = fbno;
2953 *rlen = flen;
2954 return;
2955fail:
2956 /*
2957 * Return a zero extent length as failure indications. All callers
2958 * re-check if the trimmed extent satisfies the minlen requirement.
2959 */
2960 spin_unlock(&args->pag->pagb_lock);
2961 trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
2962 *rbno = fbno;
2963 *rlen = 0;
2964}
2965
2966static void
2967xfs_alloc_busy_clear_one(
2968 struct xfs_mount *mp,
2969 struct xfs_perag *pag,
2970 struct xfs_busy_extent *busyp)
2971{
2972 if (busyp->length) {
2973 trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno,
2974 busyp->length);
2975 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2976 }
2977
2978 list_del_init(&busyp->list);
2697 kmem_free(busyp); 2979 kmem_free(busyp);
2698} 2980}
2981
2982void
2983xfs_alloc_busy_clear(
2984 struct xfs_mount *mp,
2985 struct list_head *list)
2986{
2987 struct xfs_busy_extent *busyp, *n;
2988 struct xfs_perag *pag = NULL;
2989 xfs_agnumber_t agno = NULLAGNUMBER;
2990
2991 list_for_each_entry_safe(busyp, n, list, list) {
2992 if (busyp->agno != agno) {
2993 if (pag) {
2994 spin_unlock(&pag->pagb_lock);
2995 xfs_perag_put(pag);
2996 }
2997 pag = xfs_perag_get(mp, busyp->agno);
2998 spin_lock(&pag->pagb_lock);
2999 agno = busyp->agno;
3000 }
3001
3002 xfs_alloc_busy_clear_one(mp, pag, busyp);
3003 }
3004
3005 if (pag) {
3006 spin_unlock(&pag->pagb_lock);
3007 xfs_perag_put(pag);
3008 }
3009}
3010
3011/*
3012 * Callback for list_sort to sort busy extents by the AG they reside in.
3013 */
3014int
3015xfs_busy_extent_ag_cmp(
3016 void *priv,
3017 struct list_head *a,
3018 struct list_head *b)
3019{
3020 return container_of(a, struct xfs_busy_extent, list)->agno -
3021 container_of(b, struct xfs_busy_extent, list)->agno;
3022}
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index d0b3bc72005b..240ad288f2f9 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -140,11 +140,24 @@ xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno,
140 xfs_agblock_t bno, xfs_extlen_t len); 140 xfs_agblock_t bno, xfs_extlen_t len);
141 141
142void 142void
143xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); 143xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list);
144 144
145int 145int
146xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, 146xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
147 xfs_agblock_t bno, xfs_extlen_t len); 147 xfs_agblock_t bno, xfs_extlen_t len);
148
149void
150xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
151 xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
152
153int
154xfs_busy_extent_ag_cmp(void *priv, struct list_head *a, struct list_head *b);
155
156static inline void xfs_alloc_busy_sort(struct list_head *list)
157{
158 list_sort(NULL, list, xfs_busy_extent_ag_cmp);
159}
160
148#endif /* __KERNEL__ */ 161#endif /* __KERNEL__ */
149 162
150/* 163/*
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 3916925e2584..8b469d53599f 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -95,6 +95,8 @@ xfs_allocbt_alloc_block(
95 return 0; 95 return 0;
96 } 96 }
97 97
98 xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
99
98 xfs_trans_agbtree_delta(cur->bc_tp, 1); 100 xfs_trans_agbtree_delta(cur->bc_tp, 1);
99 new->s = cpu_to_be32(bno); 101 new->s = cpu_to_be32(bno);
100 102
@@ -118,17 +120,6 @@ xfs_allocbt_free_block(
118 if (error) 120 if (error)
119 return error; 121 return error;
120 122
121 /*
122 * Since blocks move to the free list without the coordination used in
123 * xfs_bmap_finish, we can't allow block to be available for
124 * reallocation and non-transaction writing (user data) until we know
125 * that the transaction that moved it to the free list is permanently
126 * on disk. We track the blocks by declaring these blocks as "busy";
127 * the busy list is maintained on a per-ag basis and each transaction
128 * records which entries should be removed when the iclog commits to
129 * disk. If a busy block is allocated, the iclog is pushed up to the
130 * LSN that freed the block.
131 */
132 xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); 123 xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
133 xfs_trans_agbtree_delta(cur->bc_tp, -1); 124 xfs_trans_agbtree_delta(cur->bc_tp, -1);
134 return 0; 125 return 0;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index be628677c288..9a84a85c03b1 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -202,7 +202,7 @@ xfs_swap_extents(
202 xfs_inode_t *tip, /* tmp inode */ 202 xfs_inode_t *tip, /* tmp inode */
203 xfs_swapext_t *sxp) 203 xfs_swapext_t *sxp)
204{ 204{
205 xfs_mount_t *mp; 205 xfs_mount_t *mp = ip->i_mount;
206 xfs_trans_t *tp; 206 xfs_trans_t *tp;
207 xfs_bstat_t *sbp = &sxp->sx_stat; 207 xfs_bstat_t *sbp = &sxp->sx_stat;
208 xfs_ifork_t *tempifp, *ifp, *tifp; 208 xfs_ifork_t *tempifp, *ifp, *tifp;
@@ -212,16 +212,12 @@ xfs_swap_extents(
212 int taforkblks = 0; 212 int taforkblks = 0;
213 __uint64_t tmp; 213 __uint64_t tmp;
214 214
215 mp = ip->i_mount;
216
217 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); 215 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
218 if (!tempifp) { 216 if (!tempifp) {
219 error = XFS_ERROR(ENOMEM); 217 error = XFS_ERROR(ENOMEM);
220 goto out; 218 goto out;
221 } 219 }
222 220
223 sbp = &sxp->sx_stat;
224
225 /* 221 /*
226 * we have to do two separate lock calls here to keep lockdep 222 * we have to do two separate lock calls here to keep lockdep
227 * happy. If we try to get all the locks in one call, lock will 223 * happy. If we try to get all the locks in one call, lock will
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a37480a6e023..c8e3349c287c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1354,7 +1354,7 @@ xfs_itruncate_start(
1354 return 0; 1354 return 0;
1355 } 1355 }
1356 last_byte = xfs_file_last_byte(ip); 1356 last_byte = xfs_file_last_byte(ip);
1357 trace_xfs_itruncate_start(ip, flags, new_size, toss_start, last_byte); 1357 trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte);
1358 if (last_byte > toss_start) { 1358 if (last_byte > toss_start) {
1359 if (flags & XFS_ITRUNC_DEFINITE) { 1359 if (flags & XFS_ITRUNC_DEFINITE) {
1360 xfs_tosspages(ip, toss_start, 1360 xfs_tosspages(ip, toss_start,
@@ -1470,7 +1470,7 @@ xfs_itruncate_finish(
1470 * file but the log buffers containing the free and reallocation 1470 * file but the log buffers containing the free and reallocation
1471 * don't, then we'd end up with garbage in the blocks being freed. 1471 * don't, then we'd end up with garbage in the blocks being freed.
1472 * As long as we make the new_size permanent before actually 1472 * As long as we make the new_size permanent before actually
1473 * freeing any blocks it doesn't matter if they get writtten to. 1473 * freeing any blocks it doesn't matter if they get written to.
1474 * 1474 *
1475 * The callers must signal into us whether or not the size 1475 * The callers must signal into us whether or not the size
1476 * setting here must be synchronous. There are a few cases 1476 * setting here must be synchronous. There are a few cases
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 576fdfe81d60..09983a3344a5 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -970,7 +970,6 @@ xfs_iflush_abort(
970{ 970{
971 xfs_inode_log_item_t *iip = ip->i_itemp; 971 xfs_inode_log_item_t *iip = ip->i_itemp;
972 972
973 iip = ip->i_itemp;
974 if (iip) { 973 if (iip) {
975 struct xfs_ail *ailp = iip->ili_item.li_ailp; 974 struct xfs_ail *ailp = iip->ili_item.li_ailp;
976 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { 975 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index b612ce4520ae..211930246f20 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1449,6 +1449,13 @@ xlog_dealloc_log(xlog_t *log)
1449 1449
1450 xlog_cil_destroy(log); 1450 xlog_cil_destroy(log);
1451 1451
1452 /*
1453 * always need to ensure that the extra buffer does not point to memory
1454 * owned by another log buffer before we free it.
1455 */
1456 xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size);
1457 xfs_buf_free(log->l_xbuf);
1458
1452 iclog = log->l_iclog; 1459 iclog = log->l_iclog;
1453 for (i=0; i<log->l_iclog_bufs; i++) { 1460 for (i=0; i<log->l_iclog_bufs; i++) {
1454 xfs_buf_free(iclog->ic_bp); 1461 xfs_buf_free(iclog->ic_bp);
@@ -1458,7 +1465,6 @@ xlog_dealloc_log(xlog_t *log)
1458 } 1465 }
1459 spinlock_destroy(&log->l_icloglock); 1466 spinlock_destroy(&log->l_icloglock);
1460 1467
1461 xfs_buf_free(log->l_xbuf);
1462 log->l_mp->m_log = NULL; 1468 log->l_mp->m_log = NULL;
1463 kmem_free(log); 1469 kmem_free(log);
1464} /* xlog_dealloc_log */ 1470} /* xlog_dealloc_log */
@@ -3248,13 +3254,6 @@ xfs_log_ticket_get(
3248 return ticket; 3254 return ticket;
3249} 3255}
3250 3256
3251xlog_tid_t
3252xfs_log_get_trans_ident(
3253 struct xfs_trans *tp)
3254{
3255 return tp->t_ticket->t_tid;
3256}
3257
3258/* 3257/*
3259 * Allocate and initialise a new log ticket. 3258 * Allocate and initialise a new log ticket.
3260 */ 3259 */
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 3bd3291ef8d2..78c9039994af 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -189,8 +189,6 @@ void xlog_iodone(struct xfs_buf *);
189struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); 189struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
190void xfs_log_ticket_put(struct xlog_ticket *ticket); 190void xfs_log_ticket_put(struct xlog_ticket *ticket);
191 191
192xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
193
194void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 192void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
195 struct xfs_log_vec *log_vector, 193 struct xfs_log_vec *log_vector,
196 xfs_lsn_t *commit_lsn, int flags); 194 xfs_lsn_t *commit_lsn, int flags);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 9ca59be08977..7d56e88a3f0e 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -361,13 +361,12 @@ xlog_cil_committed(
361 int abort) 361 int abort)
362{ 362{
363 struct xfs_cil_ctx *ctx = args; 363 struct xfs_cil_ctx *ctx = args;
364 struct xfs_busy_extent *busyp, *n;
365 364
366 xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, 365 xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
367 ctx->start_lsn, abort); 366 ctx->start_lsn, abort);
368 367
369 list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) 368 xfs_alloc_busy_sort(&ctx->busy_extents);
370 xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); 369 xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, &ctx->busy_extents);
371 370
372 spin_lock(&ctx->cil->xc_cil_lock); 371 spin_lock(&ctx->cil->xc_cil_lock);
373 list_del(&ctx->committing); 372 list_del(&ctx->committing);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 5864850e9e34..2d3b6a498d63 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -146,6 +146,8 @@ static inline uint xlog_get_client_id(__be32 i)
146 shutdown */ 146 shutdown */
147#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ 147#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */
148 148
149typedef __uint32_t xlog_tid_t;
150
149#ifdef __KERNEL__ 151#ifdef __KERNEL__
150/* 152/*
151 * Below are states for covering allocation transactions. 153 * Below are states for covering allocation transactions.
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 5cc464a17c93..04142caedb2b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -205,6 +205,35 @@ xlog_bread(
205} 205}
206 206
207/* 207/*
208 * Read at an offset into the buffer. Returns with the buffer in it's original
209 * state regardless of the result of the read.
210 */
211STATIC int
212xlog_bread_offset(
213 xlog_t *log,
214 xfs_daddr_t blk_no, /* block to read from */
215 int nbblks, /* blocks to read */
216 xfs_buf_t *bp,
217 xfs_caddr_t offset)
218{
219 xfs_caddr_t orig_offset = XFS_BUF_PTR(bp);
220 int orig_len = bp->b_buffer_length;
221 int error, error2;
222
223 error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks));
224 if (error)
225 return error;
226
227 error = xlog_bread_noalign(log, blk_no, nbblks, bp);
228
229 /* must reset buffer pointer even on error */
230 error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len);
231 if (error)
232 return error;
233 return error2;
234}
235
236/*
208 * Write out the buffer at the given block for the given number of blocks. 237 * Write out the buffer at the given block for the given number of blocks.
209 * The buffer is kept locked across the write and is returned locked. 238 * The buffer is kept locked across the write and is returned locked.
210 * This can only be used for synchronous log writes. 239 * This can only be used for synchronous log writes.
@@ -1229,20 +1258,12 @@ xlog_write_log_records(
1229 */ 1258 */
1230 ealign = round_down(end_block, sectbb); 1259 ealign = round_down(end_block, sectbb);
1231 if (j == 0 && (start_block + endcount > ealign)) { 1260 if (j == 0 && (start_block + endcount > ealign)) {
1232 offset = XFS_BUF_PTR(bp); 1261 offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block);
1233 balign = BBTOB(ealign - start_block); 1262 error = xlog_bread_offset(log, ealign, sectbb,
1234 error = XFS_BUF_SET_PTR(bp, offset + balign, 1263 bp, offset);
1235 BBTOB(sectbb));
1236 if (error) 1264 if (error)
1237 break; 1265 break;
1238 1266
1239 error = xlog_bread_noalign(log, ealign, sectbb, bp);
1240 if (error)
1241 break;
1242
1243 error = XFS_BUF_SET_PTR(bp, offset, bufblks);
1244 if (error)
1245 break;
1246 } 1267 }
1247 1268
1248 offset = xlog_align(log, start_block, endcount, bp); 1269 offset = xlog_align(log, start_block, endcount, bp);
@@ -3448,19 +3469,9 @@ xlog_do_recovery_pass(
3448 * - order is important. 3469 * - order is important.
3449 */ 3470 */
3450 wrapped_hblks = hblks - split_hblks; 3471 wrapped_hblks = hblks - split_hblks;
3451 error = XFS_BUF_SET_PTR(hbp, 3472 error = xlog_bread_offset(log, 0,
3452 offset + BBTOB(split_hblks), 3473 wrapped_hblks, hbp,
3453 BBTOB(hblks - split_hblks)); 3474 offset + BBTOB(split_hblks));
3454 if (error)
3455 goto bread_err2;
3456
3457 error = xlog_bread_noalign(log, 0,
3458 wrapped_hblks, hbp);
3459 if (error)
3460 goto bread_err2;
3461
3462 error = XFS_BUF_SET_PTR(hbp, offset,
3463 BBTOB(hblks));
3464 if (error) 3475 if (error)
3465 goto bread_err2; 3476 goto bread_err2;
3466 } 3477 }
@@ -3511,19 +3522,9 @@ xlog_do_recovery_pass(
3511 * _first_, then the log start (LR header end) 3522 * _first_, then the log start (LR header end)
3512 * - order is important. 3523 * - order is important.
3513 */ 3524 */
3514 error = XFS_BUF_SET_PTR(dbp, 3525 error = xlog_bread_offset(log, 0,
3515 offset + BBTOB(split_bblks), 3526 bblks - split_bblks, hbp,
3516 BBTOB(bblks - split_bblks)); 3527 offset + BBTOB(split_bblks));
3517 if (error)
3518 goto bread_err2;
3519
3520 error = xlog_bread_noalign(log, wrapped_hblks,
3521 bblks - split_bblks,
3522 dbp);
3523 if (error)
3524 goto bread_err2;
3525
3526 error = XFS_BUF_SET_PTR(dbp, offset, h_size);
3527 if (error) 3528 if (error)
3528 goto bread_err2; 3529 goto bread_err2;
3529 } 3530 }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index bb3f9a7b24ed..b49b82363d20 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1900,7 +1900,7 @@ xfs_mod_incore_sb_batch(
1900 uint nmsb, 1900 uint nmsb,
1901 int rsvd) 1901 int rsvd)
1902{ 1902{
1903 xfs_mod_sb_t *msbp = &msb[0]; 1903 xfs_mod_sb_t *msbp;
1904 int error = 0; 1904 int error = 0;
1905 1905
1906 /* 1906 /*
@@ -1910,7 +1910,7 @@ xfs_mod_incore_sb_batch(
1910 * changes will be atomic. 1910 * changes will be atomic.
1911 */ 1911 */
1912 spin_lock(&mp->m_sb_lock); 1912 spin_lock(&mp->m_sb_lock);
1913 for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { 1913 for (msbp = msb; msbp < (msb + nmsb); msbp++) {
1914 ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || 1914 ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
1915 msbp->msb_field > XFS_SBS_FDBLOCKS); 1915 msbp->msb_field > XFS_SBS_FDBLOCKS);
1916 1916
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 76922793f64f..d1f24858ccc4 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -608,10 +608,8 @@ STATIC void
608xfs_trans_free( 608xfs_trans_free(
609 struct xfs_trans *tp) 609 struct xfs_trans *tp)
610{ 610{
611 struct xfs_busy_extent *busyp, *n; 611 xfs_alloc_busy_sort(&tp->t_busy);
612 612 xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy);
613 list_for_each_entry_safe(busyp, n, &tp->t_busy, list)
614 xfs_alloc_busy_clear(tp->t_mountp, busyp);
615 613
616 atomic_dec(&tp->t_mountp->m_active_trans); 614 atomic_dec(&tp->t_mountp->m_active_trans);
617 xfs_trans_free_dqinfo(tp); 615 xfs_trans_free_dqinfo(tp);
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 26d1867d8156..65584b55607d 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -73,8 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */
73typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ 73typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
74typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ 74typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */
75 75
76typedef __uint32_t xlog_tid_t; /* transaction ID type */
77
78/* 76/*
79 * These types are 64 bits on disk but are either 32 or 64 bits in memory. 77 * These types are 64 bits on disk but are either 32 or 64 bits in memory.
80 * Disk based types: 78 * Disk based types: