aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c17
-rw-r--r--fs/btrfs/extent_io.c1
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/cifs/Kconfig35
-rw-r--r--fs/cifs/Makefile2
-rw-r--r--fs/cifs/README12
-rw-r--r--fs/cifs/cifs_debug.c2
-rw-r--r--fs/cifs/cifs_fs_sb.h4
-rw-r--r--fs/cifs/cifs_unicode.h3
-rw-r--r--fs/cifs/cifsacl.c483
-rw-r--r--fs/cifs/cifsacl.h25
-rw-r--r--fs/cifs/cifsencrypt.c12
-rw-r--r--fs/cifs/cifsfs.c119
-rw-r--r--fs/cifs/cifsfs.h20
-rw-r--r--fs/cifs/cifsglob.h10
-rw-r--r--fs/cifs/cifspdu.h37
-rw-r--r--fs/cifs/cifsproto.h30
-rw-r--r--fs/cifs/cifssmb.c377
-rw-r--r--fs/cifs/connect.c279
-rw-r--r--fs/cifs/export.c4
-rw-r--r--fs/cifs/file.c167
-rw-r--r--fs/cifs/inode.c129
-rw-r--r--fs/cifs/misc.c12
-rw-r--r--fs/cifs/netmisc.c7
-rw-r--r--fs/cifs/sess.c9
-rw-r--r--fs/cifs/smbdes.c418
-rw-r--r--fs/cifs/smbencrypt.c124
-rw-r--r--fs/cifs/transport.c66
-rw-r--r--fs/cifs/xattr.c20
-rw-r--r--fs/compat.c235
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/debugfs/file.c19
-rw-r--r--fs/exec.c127
-rw-r--r--fs/fat/cache.c7
-rw-r--r--fs/fat/dir.c32
-rw-r--r--fs/fat/fat.h15
-rw-r--r--fs/fat/fatent.c4
-rw-r--r--fs/fat/inode.c74
-rw-r--r--fs/fat/misc.c44
-rw-r--r--fs/fat/namei_msdos.c4
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/freevxfs/vxfs_inode.c2
-rw-r--r--fs/gfs2/Makefile4
-rw-r--r--fs/gfs2/aops.c8
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/dir.c197
-rw-r--r--fs/gfs2/dir.h4
-rw-r--r--fs/gfs2/export.c2
-rw-r--r--fs/gfs2/file.c46
-rw-r--r--fs/gfs2/glock.c94
-rw-r--r--fs/gfs2/glock.h3
-rw-r--r--fs/gfs2/glops.c172
-rw-r--r--fs/gfs2/glops.h2
-rw-r--r--fs/gfs2/incore.h8
-rw-r--r--fs/gfs2/inode.c1510
-rw-r--r--fs/gfs2/inode.h8
-rw-r--r--fs/gfs2/log.c208
-rw-r--r--fs/gfs2/log.h2
-rw-r--r--fs/gfs2/lops.c39
-rw-r--r--fs/gfs2/main.c1
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/meta_io.h2
-rw-r--r--fs/gfs2/ops_fstype.c32
-rw-r--r--fs/gfs2/ops_inode.c1344
-rw-r--r--fs/gfs2/rgrp.c24
-rw-r--r--fs/gfs2/super.c138
-rw-r--r--fs/gfs2/sys.c6
-rw-r--r--fs/gfs2/trace_gfs2.h38
-rw-r--r--fs/inode.c1
-rw-r--r--fs/logfs/dev_bdev.c1
-rw-r--r--fs/logfs/readwrite.c2
-rw-r--r--fs/namei.c2
-rw-r--r--fs/nfsd/stats.c2
-rw-r--r--fs/nilfs2/alloc.c12
-rw-r--r--fs/nilfs2/bmap.c4
-rw-r--r--fs/nilfs2/btnode.c19
-rw-r--r--fs/nilfs2/btnode.h4
-rw-r--r--fs/nilfs2/btree.c38
-rw-r--r--fs/nilfs2/cpfile.c24
-rw-r--r--fs/nilfs2/dat.c4
-rw-r--r--fs/nilfs2/file.c1
-rw-r--r--fs/nilfs2/gcinode.c25
-rw-r--r--fs/nilfs2/ifile.c4
-rw-r--r--fs/nilfs2/inode.c23
-rw-r--r--fs/nilfs2/ioctl.c61
-rw-r--r--fs/nilfs2/mdt.c8
-rw-r--r--fs/nilfs2/mdt.h9
-rw-r--r--fs/nilfs2/nilfs.h7
-rw-r--r--fs/nilfs2/page.c79
-rw-r--r--fs/nilfs2/page.h7
-rw-r--r--fs/nilfs2/recovery.c12
-rw-r--r--fs/nilfs2/segbuf.c17
-rw-r--r--fs/nilfs2/segment.c190
-rw-r--r--fs/nilfs2/segment.h2
-rw-r--r--fs/nilfs2/sufile.c274
-rw-r--r--fs/nilfs2/sufile.h4
-rw-r--r--fs/nilfs2/super.c131
-rw-r--r--fs/nilfs2/the_nilfs.c24
-rw-r--r--fs/nilfs2/the_nilfs.h2
-rw-r--r--fs/ocfs2/refcounttree.c2
-rw-r--r--fs/partitions/ldm.c7
-rw-r--r--fs/pstore/platform.c12
-rw-r--r--fs/squashfs/Kconfig4
-rw-r--r--fs/squashfs/cache.c2
-rw-r--r--fs/super.c3
-rw-r--r--fs/sysfs/file.c12
-rw-r--r--fs/sysfs/group.c6
-rw-r--r--fs/timerfd.c102
-rw-r--r--fs/ufs/inode.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c22
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_message.c20
-rw-r--r--fs/xfs/linux-2.6/xfs_message.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h76
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_alloc.c844
-rw-r--r--fs/xfs/xfs_alloc.h15
-rw-r--r--fs/xfs/xfs_alloc_btree.c13
-rw-r--r--fs/xfs/xfs_dfrag.c6
-rw-r--r--fs/xfs/xfs_inode.c4
-rw-r--r--fs/xfs/xfs_inode_item.c1
-rw-r--r--fs/xfs/xfs_log.c15
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_cil.c5
-rw-r--r--fs/xfs/xfs_log_priv.h2
-rw-r--r--fs/xfs/xfs_log_recover.c75
-rw-r--r--fs/xfs/xfs_mount.c4
-rw-r--r--fs/xfs/xfs_trans.c6
-rw-r--r--fs/xfs/xfs_types.h2
134 files changed, 4837 insertions, 4338 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 257b00e98428..bf9c7a720371 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1120,6 +1120,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1120 goto restart; 1120 goto restart;
1121 } 1121 }
1122 } 1122 }
1123
1124 if (!ret && !bdev->bd_openers) {
1125 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1126 bdi = blk_get_backing_dev_info(bdev);
1127 if (bdi == NULL)
1128 bdi = &default_backing_dev_info;
1129 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1130 }
1131
1123 /* 1132 /*
1124 * If the device is invalidated, rescan partition 1133 * If the device is invalidated, rescan partition
1125 * if open succeeded or failed with -ENOMEDIUM. 1134 * if open succeeded or failed with -ENOMEDIUM.
@@ -1130,14 +1139,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1130 rescan_partitions(disk, bdev); 1139 rescan_partitions(disk, bdev);
1131 if (ret) 1140 if (ret)
1132 goto out_clear; 1141 goto out_clear;
1133
1134 if (!bdev->bd_openers) {
1135 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1136 bdi = blk_get_backing_dev_info(bdev);
1137 if (bdi == NULL)
1138 bdi = &default_backing_dev_info;
1139 bdev_inode_switch_bdi(bdev->bd_inode, bdi);
1140 }
1141 } else { 1142 } else {
1142 struct block_device *whole; 1143 struct block_device *whole;
1143 whole = bdget_disk(disk, 0); 1144 whole = bdget_disk(disk, 0);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ba41da59e31b..96fcfa522dab 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -10,6 +10,7 @@
10#include <linux/swap.h> 10#include <linux/swap.h>
11#include <linux/writeback.h> 11#include <linux/writeback.h>
12#include <linux/pagevec.h> 12#include <linux/pagevec.h>
13#include <linux/prefetch.h>
13#include "extent_io.h" 14#include "extent_io.h"
14#include "extent_map.h" 15#include "extent_map.h"
15#include "compat.h" 16#include "compat.h"
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 199a80134312..f340f7c99d09 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -709,7 +709,7 @@ again:
709 WARN_ON(cur->checked); 709 WARN_ON(cur->checked);
710 if (!list_empty(&cur->upper)) { 710 if (!list_empty(&cur->upper)) {
711 /* 711 /*
712 * the backref was added previously when processsing 712 * the backref was added previously when processing
713 * backref of type BTRFS_TREE_BLOCK_REF_KEY 713 * backref of type BTRFS_TREE_BLOCK_REF_KEY
714 */ 714 */
715 BUG_ON(!list_is_singular(&cur->upper)); 715 BUG_ON(!list_is_singular(&cur->upper));
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 7cb0f7f847e4..75c47cd8d086 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -7,6 +7,7 @@ config CIFS
7 select CRYPTO_MD5 7 select CRYPTO_MD5
8 select CRYPTO_HMAC 8 select CRYPTO_HMAC
9 select CRYPTO_ARC4 9 select CRYPTO_ARC4
10 select CRYPTO_DES
10 help 11 help
11 This is the client VFS module for the Common Internet File System 12 This is the client VFS module for the Common Internet File System
12 (CIFS) protocol which is the successor to the Server Message Block 13 (CIFS) protocol which is the successor to the Server Message Block
@@ -152,16 +153,28 @@ config CIFS_ACL
152 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob 153 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob
153 is handed over to the application/caller. 154 is handed over to the application/caller.
154 155
155config CIFS_EXPERIMENTAL 156config CIFS_SMB2
156 bool "CIFS Experimental Features (EXPERIMENTAL)" 157 bool "SMB2 network file system support (EXPERIMENTAL)"
158 depends on EXPERIMENTAL && INET && BROKEN
159 select NLS
160 select KEYS
161 select FSCACHE
162 select DNS_RESOLVER
163
164 help
165 This enables experimental support for the SMB2 (Server Message Block
166 version 2) protocol. The SMB2 protocol is the successor to the
167 popular CIFS and SMB network file sharing protocols. SMB2 is the
168 native file sharing mechanism for recent versions of Windows
169 operating systems (since Vista). SMB2 enablement will eventually
170 allow users better performance, security and features, than would be
171 possible with cifs. Note that smb2 mount options also are simpler
172 (compared to cifs) due to protocol improvements.
173
174 Unless you are a developer or tester, say N.
175
176config CIFS_NFSD_EXPORT
177 bool "Allow nfsd to export CIFS file system (EXPERIMENTAL)"
157 depends on CIFS && EXPERIMENTAL 178 depends on CIFS && EXPERIMENTAL
158 help 179 help
159 Enables cifs features under testing. These features are 180 Allows NFS server to export a CIFS mounted share (nfsd over cifs)
160 experimental and currently include DFS support and directory
161 change notification ie fcntl(F_DNOTIFY), as well as the upcall
162 mechanism which will be used for Kerberos session negotiation
163 and uid remapping. Some of these features also may depend on
164 setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
165 (which is disabled by default). See the file fs/cifs/README
166 for more details. If unsure, say N.
167
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index d87558448e3d..005d524c3a4a 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -4,7 +4,7 @@
4obj-$(CONFIG_CIFS) += cifs.o 4obj-$(CONFIG_CIFS) += cifs.o
5 5
6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ 6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
7 link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ 7 link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
8 cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ 8 cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
9 readdir.o ioctl.o sess.o export.o 9 readdir.o ioctl.o sess.o export.o
10 10
diff --git a/fs/cifs/README b/fs/cifs/README
index 74ab165fc646..4a3ca0e5ca24 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -704,18 +704,6 @@ the start of smb requests and responses can be enabled via:
704 704
705 echo 1 > /proc/fs/cifs/traceSMB 705 echo 1 > /proc/fs/cifs/traceSMB
706 706
707Two other experimental features are under development. To test these
708requires enabling CONFIG_CIFS_EXPERIMENTAL
709
710 cifsacl support needed to retrieve approximated mode bits based on
711 the contents on the CIFS ACL.
712
713 lease support: cifs will check the oplock state before calling into
714 the vfs to see if we can grant a lease on a file.
715
716 DNOTIFY fcntl: needed for support of directory change
717 notification and perhaps later for file leases)
718
719Per share (per client mount) statistics are available in /proc/fs/cifs/Stats 707Per share (per client mount) statistics are available in /proc/fs/cifs/Stats
720if the kernel was configured with cifs statistics enabled. The statistics 708if the kernel was configured with cifs statistics enabled. The statistics
721represent the number of successful (ie non-zero return code from the server) 709represent the number of successful (ie non-zero return code from the server)
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 30d01bc90855..18f4272d9047 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -63,7 +63,7 @@ void cifs_dump_detail(struct smb_hdr *smb)
63 cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d", 63 cERROR(1, "Cmd: %d Err: 0x%x Flags: 0x%x Flgs2: 0x%x Mid: %d Pid: %d",
64 smb->Command, smb->Status.CifsError, 64 smb->Command, smb->Status.CifsError,
65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid); 65 smb->Flags, smb->Flags2, smb->Mid, smb->Pid);
66 cERROR(1, "smb buf %p len %d", smb, smbCalcSize_LE(smb)); 66 cERROR(1, "smb buf %p len %d", smb, smbCalcSize(smb));
67} 67}
68 68
69 69
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index ac51cd2d33ae..a9d5692e0c20 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -58,9 +58,7 @@ struct cifs_sb_info {
58 unsigned int mnt_cifs_flags; 58 unsigned int mnt_cifs_flags;
59 int prepathlen; 59 int prepathlen;
60 char *prepath; /* relative path under the share to mount to */ 60 char *prepath; /* relative path under the share to mount to */
61#ifdef CONFIG_CIFS_DFS_UPCALL 61 char *mountdata; /* options received at mount time or via DFS refs */
62 char *mountdata; /* mount options received at mount time */
63#endif
64 struct backing_dev_info bdi; 62 struct backing_dev_info bdi;
65 struct delayed_work prune_tlinks; 63 struct delayed_work prune_tlinks;
66}; 64};
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 644dd882a560..6d02fd560566 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -82,6 +82,9 @@ int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *);
82char *cifs_strndup_from_ucs(const char *src, const int maxlen, 82char *cifs_strndup_from_ucs(const char *src, const int maxlen,
83 const bool is_unicode, 83 const bool is_unicode,
84 const struct nls_table *codepage); 84 const struct nls_table *codepage);
85extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
86 const struct nls_table *cp, int mapChars);
87
85#endif 88#endif
86 89
87/* 90/*
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index beeebf194234..f3c6fb9942ac 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -23,24 +23,16 @@
23 23
24#include <linux/fs.h> 24#include <linux/fs.h>
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/string.h>
27#include <linux/keyctl.h>
28#include <linux/key-type.h>
29#include <keys/user-type.h>
26#include "cifspdu.h" 30#include "cifspdu.h"
27#include "cifsglob.h" 31#include "cifsglob.h"
28#include "cifsacl.h" 32#include "cifsacl.h"
29#include "cifsproto.h" 33#include "cifsproto.h"
30#include "cifs_debug.h" 34#include "cifs_debug.h"
31 35
32
33static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
34 {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
35 {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
36 {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11), 0, 0, 0, 0} }, "net-users"},
37 {{1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(18), 0, 0, 0, 0} }, "sys"},
38 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(544), 0, 0, 0} }, "root"},
39 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(545), 0, 0, 0} }, "users"},
40 {{1, 2, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(32), __constant_cpu_to_le32(546), 0, 0, 0} }, "guest"} }
41;
42
43
44/* security id for everyone/world system group */ 36/* security id for everyone/world system group */
45static const struct cifs_sid sid_everyone = { 37static const struct cifs_sid sid_everyone = {
46 1, 1, {0, 0, 0, 0, 0, 1}, {0} }; 38 1, 1, {0, 0, 0, 0, 0, 1}, {0} };
@@ -50,50 +42,385 @@ static const struct cifs_sid sid_authusers = {
50/* group users */ 42/* group users */
51static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} }; 43static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
52 44
45const struct cred *root_cred;
53 46
54int match_sid(struct cifs_sid *ctsid) 47static void
48shrink_idmap_tree(struct rb_root *root, int nr_to_scan, int *nr_rem,
49 int *nr_del)
55{ 50{
56 int i, j; 51 struct rb_node *node;
57 int num_subauth, num_sat, num_saw; 52 struct rb_node *tmp;
58 struct cifs_sid *cwsid; 53 struct cifs_sid_id *psidid;
54
55 node = rb_first(root);
56 while (node) {
57 tmp = node;
58 node = rb_next(tmp);
59 psidid = rb_entry(tmp, struct cifs_sid_id, rbnode);
60 if (nr_to_scan == 0 || *nr_del == nr_to_scan)
61 ++(*nr_rem);
62 else {
63 if (time_after(jiffies, psidid->time + SID_MAP_EXPIRE)
64 && psidid->refcount == 0) {
65 rb_erase(tmp, root);
66 ++(*nr_del);
67 } else
68 ++(*nr_rem);
69 }
70 }
71}
72
73/*
74 * Run idmap cache shrinker.
75 */
76static int
77cifs_idmap_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
78{
79 int nr_del = 0;
80 int nr_rem = 0;
81 struct rb_root *root;
82
83 root = &uidtree;
84 spin_lock(&siduidlock);
85 shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del);
86 spin_unlock(&siduidlock);
87
88 root = &gidtree;
89 spin_lock(&sidgidlock);
90 shrink_idmap_tree(root, nr_to_scan, &nr_rem, &nr_del);
91 spin_unlock(&sidgidlock);
92
93 return nr_rem;
94}
95
96static struct shrinker cifs_shrinker = {
97 .shrink = cifs_idmap_shrinker,
98 .seeks = DEFAULT_SEEKS,
99};
100
101static int
102cifs_idmap_key_instantiate(struct key *key, const void *data, size_t datalen)
103{
104 char *payload;
105
106 payload = kmalloc(datalen, GFP_KERNEL);
107 if (!payload)
108 return -ENOMEM;
109
110 memcpy(payload, data, datalen);
111 key->payload.data = payload;
112 return 0;
113}
114
115static inline void
116cifs_idmap_key_destroy(struct key *key)
117{
118 kfree(key->payload.data);
119}
59 120
60 if (!ctsid) 121struct key_type cifs_idmap_key_type = {
61 return -1; 122 .name = "cifs.idmap",
123 .instantiate = cifs_idmap_key_instantiate,
124 .destroy = cifs_idmap_key_destroy,
125 .describe = user_describe,
126 .match = user_match,
127};
128
129static void
130sid_to_str(struct cifs_sid *sidptr, char *sidstr)
131{
132 int i;
133 unsigned long saval;
134 char *strptr;
62 135
63 for (i = 0; i < NUM_WK_SIDS; ++i) { 136 strptr = sidstr;
64 cwsid = &(wksidarr[i].cifssid);
65 137
66 /* compare the revision */ 138 sprintf(strptr, "%s", "S");
67 if (ctsid->revision != cwsid->revision) 139 strptr = sidstr + strlen(sidstr);
68 continue;
69 140
70 /* compare all of the six auth values */ 141 sprintf(strptr, "-%d", sidptr->revision);
71 for (j = 0; j < 6; ++j) { 142 strptr = sidstr + strlen(sidstr);
72 if (ctsid->authority[j] != cwsid->authority[j]) 143
73 break; 144 for (i = 0; i < 6; ++i) {
145 if (sidptr->authority[i]) {
146 sprintf(strptr, "-%d", sidptr->authority[i]);
147 strptr = sidstr + strlen(sidstr);
74 } 148 }
75 if (j < 6) 149 }
76 continue; /* all of the auth values did not match */ 150
77 151 for (i = 0; i < sidptr->num_subauth; ++i) {
78 /* compare all of the subauth values if any */ 152 saval = le32_to_cpu(sidptr->sub_auth[i]);
79 num_sat = ctsid->num_subauth; 153 sprintf(strptr, "-%ld", saval);
80 num_saw = cwsid->num_subauth; 154 strptr = sidstr + strlen(sidstr);
81 num_subauth = num_sat < num_saw ? num_sat : num_saw; 155 }
82 if (num_subauth) { 156}
83 for (j = 0; j < num_subauth; ++j) { 157
84 if (ctsid->sub_auth[j] != cwsid->sub_auth[j]) 158static void
85 break; 159id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
86 } 160 struct cifs_sid_id **psidid, char *typestr)
87 if (j < num_subauth) 161{
88 continue; /* all sub_auth values do not match */ 162 int rc;
163 char *strptr;
164 struct rb_node *node = root->rb_node;
165 struct rb_node *parent = NULL;
166 struct rb_node **linkto = &(root->rb_node);
167 struct cifs_sid_id *lsidid;
168
169 while (node) {
170 lsidid = rb_entry(node, struct cifs_sid_id, rbnode);
171 parent = node;
172 rc = compare_sids(sidptr, &((lsidid)->sid));
173 if (rc > 0) {
174 linkto = &(node->rb_left);
175 node = node->rb_left;
176 } else if (rc < 0) {
177 linkto = &(node->rb_right);
178 node = node->rb_right;
179 }
180 }
181
182 memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid));
183 (*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
184 (*psidid)->refcount = 0;
185
186 sprintf((*psidid)->sidstr, "%s", typestr);
187 strptr = (*psidid)->sidstr + strlen((*psidid)->sidstr);
188 sid_to_str(&(*psidid)->sid, strptr);
189
190 clear_bit(SID_ID_PENDING, &(*psidid)->state);
191 clear_bit(SID_ID_MAPPED, &(*psidid)->state);
192
193 rb_link_node(&(*psidid)->rbnode, parent, linkto);
194 rb_insert_color(&(*psidid)->rbnode, root);
195}
196
197static struct cifs_sid_id *
198id_rb_search(struct rb_root *root, struct cifs_sid *sidptr)
199{
200 int rc;
201 struct rb_node *node = root->rb_node;
202 struct cifs_sid_id *lsidid;
203
204 while (node) {
205 lsidid = rb_entry(node, struct cifs_sid_id, rbnode);
206 rc = compare_sids(sidptr, &((lsidid)->sid));
207 if (rc > 0) {
208 node = node->rb_left;
209 } else if (rc < 0) {
210 node = node->rb_right;
211 } else /* node found */
212 return lsidid;
213 }
214
215 return NULL;
216}
217
218static int
219sidid_pending_wait(void *unused)
220{
221 schedule();
222 return signal_pending(current) ? -ERESTARTSYS : 0;
223}
224
225static int
226sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
227 struct cifs_fattr *fattr, uint sidtype)
228{
229 int rc;
230 unsigned long cid;
231 struct key *idkey;
232 const struct cred *saved_cred;
233 struct cifs_sid_id *psidid, *npsidid;
234 struct rb_root *cidtree;
235 spinlock_t *cidlock;
236
237 if (sidtype == SIDOWNER) {
238 cid = cifs_sb->mnt_uid; /* default uid, in case upcall fails */
239 cidlock = &siduidlock;
240 cidtree = &uidtree;
241 } else if (sidtype == SIDGROUP) {
242 cid = cifs_sb->mnt_gid; /* default gid, in case upcall fails */
243 cidlock = &sidgidlock;
244 cidtree = &gidtree;
245 } else
246 return -ENOENT;
247
248 spin_lock(cidlock);
249 psidid = id_rb_search(cidtree, psid);
250
251 if (!psidid) { /* node does not exist, allocate one & attempt adding */
252 spin_unlock(cidlock);
253 npsidid = kzalloc(sizeof(struct cifs_sid_id), GFP_KERNEL);
254 if (!npsidid)
255 return -ENOMEM;
256
257 npsidid->sidstr = kmalloc(SIDLEN, GFP_KERNEL);
258 if (!npsidid->sidstr) {
259 kfree(npsidid);
260 return -ENOMEM;
261 }
262
263 spin_lock(cidlock);
264 psidid = id_rb_search(cidtree, psid);
265 if (psidid) { /* node happened to get inserted meanwhile */
266 ++psidid->refcount;
267 spin_unlock(cidlock);
268 kfree(npsidid->sidstr);
269 kfree(npsidid);
270 } else {
271 psidid = npsidid;
272 id_rb_insert(cidtree, psid, &psidid,
273 sidtype == SIDOWNER ? "os:" : "gs:");
274 ++psidid->refcount;
275 spin_unlock(cidlock);
89 } 276 }
277 } else {
278 ++psidid->refcount;
279 spin_unlock(cidlock);
280 }
281
282 /*
283 * If we are here, it is safe to access psidid and its fields
284 * since a reference was taken earlier while holding the spinlock.
285 * A reference on the node is put without holding the spinlock
286 * and it is OK to do so in this case, shrinker will not erase
287 * this node until all references are put and we do not access
288 * any fields of the node after a reference is put .
289 */
290 if (test_bit(SID_ID_MAPPED, &psidid->state)) {
291 cid = psidid->id;
292 psidid->time = jiffies; /* update ts for accessing */
293 goto sid_to_id_out;
294 }
90 295
91 cFYI(1, "matching sid: %s\n", wksidarr[i].sidname); 296 if (time_after(psidid->time + SID_MAP_RETRY, jiffies))
92 return 0; /* sids compare/match */ 297 goto sid_to_id_out;
298
299 if (!test_and_set_bit(SID_ID_PENDING, &psidid->state)) {
300 saved_cred = override_creds(root_cred);
301 idkey = request_key(&cifs_idmap_key_type, psidid->sidstr, "");
302 if (IS_ERR(idkey))
303 cFYI(1, "%s: Can't map SID to an id", __func__);
304 else {
305 cid = *(unsigned long *)idkey->payload.value;
306 psidid->id = cid;
307 set_bit(SID_ID_MAPPED, &psidid->state);
308 key_put(idkey);
309 kfree(psidid->sidstr);
310 }
311 revert_creds(saved_cred);
312 psidid->time = jiffies; /* update ts for accessing */
313 clear_bit(SID_ID_PENDING, &psidid->state);
314 wake_up_bit(&psidid->state, SID_ID_PENDING);
315 } else {
316 rc = wait_on_bit(&psidid->state, SID_ID_PENDING,
317 sidid_pending_wait, TASK_INTERRUPTIBLE);
318 if (rc) {
319 cFYI(1, "%s: sidid_pending_wait interrupted %d",
320 __func__, rc);
321 --psidid->refcount; /* decremented without spinlock */
322 return rc;
323 }
324 if (test_bit(SID_ID_MAPPED, &psidid->state))
325 cid = psidid->id;
93 } 326 }
94 327
95 cFYI(1, "No matching sid"); 328sid_to_id_out:
96 return -1; 329 --psidid->refcount; /* decremented without spinlock */
330 if (sidtype == SIDOWNER)
331 fattr->cf_uid = cid;
332 else
333 fattr->cf_gid = cid;
334
335 return 0;
336}
337
338int
339init_cifs_idmap(void)
340{
341 struct cred *cred;
342 struct key *keyring;
343 int ret;
344
345 cFYI(1, "Registering the %s key type\n", cifs_idmap_key_type.name);
346
347 /* create an override credential set with a special thread keyring in
348 * which requests are cached
349 *
350 * this is used to prevent malicious redirections from being installed
351 * with add_key().
352 */
353 cred = prepare_kernel_cred(NULL);
354 if (!cred)
355 return -ENOMEM;
356
357 keyring = key_alloc(&key_type_keyring, ".cifs_idmap", 0, 0, cred,
358 (KEY_POS_ALL & ~KEY_POS_SETATTR) |
359 KEY_USR_VIEW | KEY_USR_READ,
360 KEY_ALLOC_NOT_IN_QUOTA);
361 if (IS_ERR(keyring)) {
362 ret = PTR_ERR(keyring);
363 goto failed_put_cred;
364 }
365
366 ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
367 if (ret < 0)
368 goto failed_put_key;
369
370 ret = register_key_type(&cifs_idmap_key_type);
371 if (ret < 0)
372 goto failed_put_key;
373
374 /* instruct request_key() to use this special keyring as a cache for
375 * the results it looks up */
376 cred->thread_keyring = keyring;
377 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
378 root_cred = cred;
379
380 spin_lock_init(&siduidlock);
381 uidtree = RB_ROOT;
382 spin_lock_init(&sidgidlock);
383 gidtree = RB_ROOT;
384
385 register_shrinker(&cifs_shrinker);
386
387 cFYI(1, "cifs idmap keyring: %d\n", key_serial(keyring));
388 return 0;
389
390failed_put_key:
391 key_put(keyring);
392failed_put_cred:
393 put_cred(cred);
394 return ret;
395}
396
397void
398exit_cifs_idmap(void)
399{
400 key_revoke(root_cred->thread_keyring);
401 unregister_key_type(&cifs_idmap_key_type);
402 put_cred(root_cred);
403 unregister_shrinker(&cifs_shrinker);
404 cFYI(1, "Unregistered %s key type\n", cifs_idmap_key_type.name);
405}
406
407void
408cifs_destroy_idmaptrees(void)
409{
410 struct rb_root *root;
411 struct rb_node *node;
412
413 root = &uidtree;
414 spin_lock(&siduidlock);
415 while ((node = rb_first(root)))
416 rb_erase(node, root);
417 spin_unlock(&siduidlock);
418
419 root = &gidtree;
420 spin_lock(&sidgidlock);
421 while ((node = rb_first(root)))
422 rb_erase(node, root);
423 spin_unlock(&sidgidlock);
97} 424}
98 425
99/* if the two SIDs (roughly equivalent to a UUID for a user or group) are 426/* if the two SIDs (roughly equivalent to a UUID for a user or group) are
@@ -104,16 +431,24 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
104 int num_subauth, num_sat, num_saw; 431 int num_subauth, num_sat, num_saw;
105 432
106 if ((!ctsid) || (!cwsid)) 433 if ((!ctsid) || (!cwsid))
107 return 0; 434 return 1;
108 435
109 /* compare the revision */ 436 /* compare the revision */
110 if (ctsid->revision != cwsid->revision) 437 if (ctsid->revision != cwsid->revision) {
111 return 0; 438 if (ctsid->revision > cwsid->revision)
439 return 1;
440 else
441 return -1;
442 }
112 443
113 /* compare all of the six auth values */ 444 /* compare all of the six auth values */
114 for (i = 0; i < 6; ++i) { 445 for (i = 0; i < 6; ++i) {
115 if (ctsid->authority[i] != cwsid->authority[i]) 446 if (ctsid->authority[i] != cwsid->authority[i]) {
116 return 0; 447 if (ctsid->authority[i] > cwsid->authority[i])
448 return 1;
449 else
450 return -1;
451 }
117 } 452 }
118 453
119 /* compare all of the subauth values if any */ 454 /* compare all of the subauth values if any */
@@ -122,12 +457,16 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
122 num_subauth = num_sat < num_saw ? num_sat : num_saw; 457 num_subauth = num_sat < num_saw ? num_sat : num_saw;
123 if (num_subauth) { 458 if (num_subauth) {
124 for (i = 0; i < num_subauth; ++i) { 459 for (i = 0; i < num_subauth; ++i) {
125 if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) 460 if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) {
126 return 0; 461 if (ctsid->sub_auth[i] > cwsid->sub_auth[i])
462 return 1;
463 else
464 return -1;
465 }
127 } 466 }
128 } 467 }
129 468
130 return 1; /* sids compare/match */ 469 return 0; /* sids compare/match */
131} 470}
132 471
133 472
@@ -382,22 +721,22 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
382#ifdef CONFIG_CIFS_DEBUG2 721#ifdef CONFIG_CIFS_DEBUG2
383 dump_ace(ppace[i], end_of_acl); 722 dump_ace(ppace[i], end_of_acl);
384#endif 723#endif
385 if (compare_sids(&(ppace[i]->sid), pownersid)) 724 if (compare_sids(&(ppace[i]->sid), pownersid) == 0)
386 access_flags_to_mode(ppace[i]->access_req, 725 access_flags_to_mode(ppace[i]->access_req,
387 ppace[i]->type, 726 ppace[i]->type,
388 &fattr->cf_mode, 727 &fattr->cf_mode,
389 &user_mask); 728 &user_mask);
390 if (compare_sids(&(ppace[i]->sid), pgrpsid)) 729 if (compare_sids(&(ppace[i]->sid), pgrpsid) == 0)
391 access_flags_to_mode(ppace[i]->access_req, 730 access_flags_to_mode(ppace[i]->access_req,
392 ppace[i]->type, 731 ppace[i]->type,
393 &fattr->cf_mode, 732 &fattr->cf_mode,
394 &group_mask); 733 &group_mask);
395 if (compare_sids(&(ppace[i]->sid), &sid_everyone)) 734 if (compare_sids(&(ppace[i]->sid), &sid_everyone) == 0)
396 access_flags_to_mode(ppace[i]->access_req, 735 access_flags_to_mode(ppace[i]->access_req,
397 ppace[i]->type, 736 ppace[i]->type,
398 &fattr->cf_mode, 737 &fattr->cf_mode,
399 &other_mask); 738 &other_mask);
400 if (compare_sids(&(ppace[i]->sid), &sid_authusers)) 739 if (compare_sids(&(ppace[i]->sid), &sid_authusers) == 0)
401 access_flags_to_mode(ppace[i]->access_req, 740 access_flags_to_mode(ppace[i]->access_req,
402 ppace[i]->type, 741 ppace[i]->type,
403 &fattr->cf_mode, 742 &fattr->cf_mode,
@@ -475,10 +814,10 @@ static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
475 814
476 815
477/* Convert CIFS ACL to POSIX form */ 816/* Convert CIFS ACL to POSIX form */
478static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len, 817static int parse_sec_desc(struct cifs_sb_info *cifs_sb,
479 struct cifs_fattr *fattr) 818 struct cifs_ntsd *pntsd, int acl_len, struct cifs_fattr *fattr)
480{ 819{
481 int rc; 820 int rc = 0;
482 struct cifs_sid *owner_sid_ptr, *group_sid_ptr; 821 struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
483 struct cifs_acl *dacl_ptr; /* no need for SACL ptr */ 822 struct cifs_acl *dacl_ptr; /* no need for SACL ptr */
484 char *end_of_acl = ((char *)pntsd) + acl_len; 823 char *end_of_acl = ((char *)pntsd) + acl_len;
@@ -500,12 +839,26 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
500 le32_to_cpu(pntsd->sacloffset), dacloffset); 839 le32_to_cpu(pntsd->sacloffset), dacloffset);
501/* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */ 840/* cifs_dump_mem("owner_sid: ", owner_sid_ptr, 64); */
502 rc = parse_sid(owner_sid_ptr, end_of_acl); 841 rc = parse_sid(owner_sid_ptr, end_of_acl);
503 if (rc) 842 if (rc) {
843 cFYI(1, "%s: Error %d parsing Owner SID", __func__, rc);
844 return rc;
845 }
846 rc = sid_to_id(cifs_sb, owner_sid_ptr, fattr, SIDOWNER);
847 if (rc) {
848 cFYI(1, "%s: Error %d mapping Owner SID to uid", __func__, rc);
504 return rc; 849 return rc;
850 }
505 851
506 rc = parse_sid(group_sid_ptr, end_of_acl); 852 rc = parse_sid(group_sid_ptr, end_of_acl);
507 if (rc) 853 if (rc) {
854 cFYI(1, "%s: Error %d mapping Owner SID to gid", __func__, rc);
508 return rc; 855 return rc;
856 }
857 rc = sid_to_id(cifs_sb, group_sid_ptr, fattr, SIDGROUP);
858 if (rc) {
859 cFYI(1, "%s: Error %d mapping Group SID to gid", __func__, rc);
860 return rc;
861 }
509 862
510 if (dacloffset) 863 if (dacloffset)
511 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr, 864 parse_dacl(dacl_ptr, end_of_acl, owner_sid_ptr,
@@ -520,7 +873,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
520 memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr, 873 memcpy((void *)(&(cifscred->gsid)), (void *)group_sid_ptr,
521 sizeof(struct cifs_sid)); */ 874 sizeof(struct cifs_sid)); */
522 875
523 return 0; 876 return rc;
524} 877}
525 878
526 879
@@ -688,7 +1041,7 @@ out:
688} 1041}
689 1042
690/* Set an ACL on the server */ 1043/* Set an ACL on the server */
691static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, 1044int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
692 struct inode *inode, const char *path) 1045 struct inode *inode, const char *path)
693{ 1046{
694 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1047 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -727,7 +1080,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
727 rc = PTR_ERR(pntsd); 1080 rc = PTR_ERR(pntsd);
728 cERROR(1, "%s: error %d getting sec desc", __func__, rc); 1081 cERROR(1, "%s: error %d getting sec desc", __func__, rc);
729 } else { 1082 } else {
730 rc = parse_sec_desc(pntsd, acllen, fattr); 1083 rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr);
731 kfree(pntsd); 1084 kfree(pntsd);
732 if (rc) 1085 if (rc)
733 cERROR(1, "parse sec desc failed rc = %d", rc); 1086 cERROR(1, "parse sec desc failed rc = %d", rc);
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index c4ae7d036563..5c902c7ce524 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -39,6 +39,15 @@
39#define ACCESS_ALLOWED 0 39#define ACCESS_ALLOWED 0
40#define ACCESS_DENIED 1 40#define ACCESS_DENIED 1
41 41
42#define SIDOWNER 1
43#define SIDGROUP 2
44#define SIDLEN 150 /* S- 1 revision- 6 authorities- max 5 sub authorities */
45
46#define SID_ID_MAPPED 0
47#define SID_ID_PENDING 1
48#define SID_MAP_EXPIRE (3600 * HZ) /* map entry expires after one hour */
49#define SID_MAP_RETRY (300 * HZ) /* wait 5 minutes for next attempt to map */
50
42struct cifs_ntsd { 51struct cifs_ntsd {
43 __le16 revision; /* revision level */ 52 __le16 revision; /* revision level */
44 __le16 type; 53 __le16 type;
@@ -74,7 +83,21 @@ struct cifs_wksid {
74 char sidname[SIDNAMELENGTH]; 83 char sidname[SIDNAMELENGTH];
75} __attribute__((packed)); 84} __attribute__((packed));
76 85
77extern int match_sid(struct cifs_sid *); 86struct cifs_sid_id {
87 unsigned int refcount; /* increment with spinlock, decrement without */
88 unsigned long id;
89 unsigned long time;
90 unsigned long state;
91 char *sidstr;
92 struct rb_node rbnode;
93 struct cifs_sid sid;
94};
95
96#ifdef __KERNEL__
97extern struct key_type cifs_idmap_key_type;
98extern const struct cred *root_cred;
99#endif /* KERNEL */
100
78extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *); 101extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *);
79 102
80#endif /* _CIFSACL_H */ 103#endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index d1a016be73ba..45c3f78c8f81 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -60,7 +60,7 @@ static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
60 server->session_key.response, server->session_key.len); 60 server->session_key.response, server->session_key.len);
61 61
62 crypto_shash_update(&server->secmech.sdescmd5->shash, 62 crypto_shash_update(&server->secmech.sdescmd5->shash,
63 cifs_pdu->Protocol, cifs_pdu->smb_buf_length); 63 cifs_pdu->Protocol, be32_to_cpu(cifs_pdu->smb_buf_length));
64 64
65 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); 65 rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
66 66
@@ -268,10 +268,11 @@ int setup_ntlm_response(struct cifsSesInfo *ses)
268} 268}
269 269
270#ifdef CONFIG_CIFS_WEAK_PW_HASH 270#ifdef CONFIG_CIFS_WEAK_PW_HASH
271void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, 271int calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
272 char *lnm_session_key) 272 char *lnm_session_key)
273{ 273{
274 int i; 274 int i;
275 int rc;
275 char password_with_pad[CIFS_ENCPWD_SIZE]; 276 char password_with_pad[CIFS_ENCPWD_SIZE];
276 277
277 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE); 278 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
@@ -282,7 +283,7 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
282 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE); 283 memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE);
283 memcpy(lnm_session_key, password_with_pad, 284 memcpy(lnm_session_key, password_with_pad,
284 CIFS_ENCPWD_SIZE); 285 CIFS_ENCPWD_SIZE);
285 return; 286 return 0;
286 } 287 }
287 288
288 /* calculate old style session key */ 289 /* calculate old style session key */
@@ -299,10 +300,9 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt,
299 for (i = 0; i < CIFS_ENCPWD_SIZE; i++) 300 for (i = 0; i < CIFS_ENCPWD_SIZE; i++)
300 password_with_pad[i] = toupper(password_with_pad[i]); 301 password_with_pad[i] = toupper(password_with_pad[i]);
301 302
302 SMBencrypt(password_with_pad, cryptkey, lnm_session_key); 303 rc = SMBencrypt(password_with_pad, cryptkey, lnm_session_key);
303 304
304 /* clear password before we return/free memory */ 305 return rc;
305 memset(password_with_pad, 0, CIFS_ENCPWD_SIZE);
306} 306}
307#endif /* CIFS_WEAK_PW_HASH */ 307#endif /* CIFS_WEAK_PW_HASH */
308 308
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 5c412b33cd7c..493b74ca5648 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -128,29 +128,22 @@ cifs_read_super(struct super_block *sb, void *data,
128 } 128 }
129 cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages; 129 cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages;
130 130
131#ifdef CONFIG_CIFS_DFS_UPCALL 131 /*
132 /* copy mount params to sb for use in submounts */ 132 * Copy mount params to sb for use in submounts. Better to do
133 /* BB: should we move this after the mount so we 133 * the copy here and deal with the error before cleanup gets
134 * do not have to do the copy on failed mounts? 134 * complicated post-mount.
135 * BB: May be it is better to do simple copy before 135 */
136 * complex operation (mount), and in case of fail
137 * just exit instead of doing mount and attempting
138 * undo it if this copy fails?*/
139 if (data) { 136 if (data) {
140 int len = strlen(data); 137 cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
141 cifs_sb->mountdata = kzalloc(len + 1, GFP_KERNEL);
142 if (cifs_sb->mountdata == NULL) { 138 if (cifs_sb->mountdata == NULL) {
143 bdi_destroy(&cifs_sb->bdi); 139 bdi_destroy(&cifs_sb->bdi);
144 kfree(sb->s_fs_info); 140 kfree(sb->s_fs_info);
145 sb->s_fs_info = NULL; 141 sb->s_fs_info = NULL;
146 return -ENOMEM; 142 return -ENOMEM;
147 } 143 }
148 strncpy(cifs_sb->mountdata, data, len + 1);
149 cifs_sb->mountdata[len] = '\0';
150 } 144 }
151#endif
152 145
153 rc = cifs_mount(sb, cifs_sb, data, devname); 146 rc = cifs_mount(sb, cifs_sb, devname);
154 147
155 if (rc) { 148 if (rc) {
156 if (!silent) 149 if (!silent)
@@ -163,7 +156,7 @@ cifs_read_super(struct super_block *sb, void *data,
163 sb->s_bdi = &cifs_sb->bdi; 156 sb->s_bdi = &cifs_sb->bdi;
164 sb->s_blocksize = CIFS_MAX_MSGSIZE; 157 sb->s_blocksize = CIFS_MAX_MSGSIZE;
165 sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ 158 sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */
166 inode = cifs_root_iget(sb, ROOT_I); 159 inode = cifs_root_iget(sb);
167 160
168 if (IS_ERR(inode)) { 161 if (IS_ERR(inode)) {
169 rc = PTR_ERR(inode); 162 rc = PTR_ERR(inode);
@@ -184,12 +177,12 @@ cifs_read_super(struct super_block *sb, void *data,
184 else 177 else
185 sb->s_d_op = &cifs_dentry_ops; 178 sb->s_d_op = &cifs_dentry_ops;
186 179
187#ifdef CONFIG_CIFS_EXPERIMENTAL 180#ifdef CIFS_NFSD_EXPORT
188 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { 181 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
189 cFYI(1, "export ops supported"); 182 cFYI(1, "export ops supported");
190 sb->s_export_op = &cifs_export_ops; 183 sb->s_export_op = &cifs_export_ops;
191 } 184 }
192#endif /* EXPERIMENTAL */ 185#endif /* CIFS_NFSD_EXPORT */
193 186
194 return 0; 187 return 0;
195 188
@@ -202,12 +195,10 @@ out_no_root:
202 195
203out_mount_failed: 196out_mount_failed:
204 if (cifs_sb) { 197 if (cifs_sb) {
205#ifdef CONFIG_CIFS_DFS_UPCALL
206 if (cifs_sb->mountdata) { 198 if (cifs_sb->mountdata) {
207 kfree(cifs_sb->mountdata); 199 kfree(cifs_sb->mountdata);
208 cifs_sb->mountdata = NULL; 200 cifs_sb->mountdata = NULL;
209 } 201 }
210#endif
211 unload_nls(cifs_sb->local_nls); 202 unload_nls(cifs_sb->local_nls);
212 bdi_destroy(&cifs_sb->bdi); 203 bdi_destroy(&cifs_sb->bdi);
213 kfree(cifs_sb); 204 kfree(cifs_sb);
@@ -231,12 +222,10 @@ cifs_put_super(struct super_block *sb)
231 rc = cifs_umount(sb, cifs_sb); 222 rc = cifs_umount(sb, cifs_sb);
232 if (rc) 223 if (rc)
233 cERROR(1, "cifs_umount failed with return code %d", rc); 224 cERROR(1, "cifs_umount failed with return code %d", rc);
234#ifdef CONFIG_CIFS_DFS_UPCALL
235 if (cifs_sb->mountdata) { 225 if (cifs_sb->mountdata) {
236 kfree(cifs_sb->mountdata); 226 kfree(cifs_sb->mountdata);
237 cifs_sb->mountdata = NULL; 227 cifs_sb->mountdata = NULL;
238 } 228 }
239#endif
240 229
241 unload_nls(cifs_sb->local_nls); 230 unload_nls(cifs_sb->local_nls);
242 bdi_destroy(&cifs_sb->bdi); 231 bdi_destroy(&cifs_sb->bdi);
@@ -618,16 +607,31 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
618{ 607{
619 /* origin == SEEK_END => we must revalidate the cached file length */ 608 /* origin == SEEK_END => we must revalidate the cached file length */
620 if (origin == SEEK_END) { 609 if (origin == SEEK_END) {
621 int retval; 610 int rc;
622 611 struct inode *inode = file->f_path.dentry->d_inode;
623 /* some applications poll for the file length in this strange 612
624 way so we must seek to end on non-oplocked files by 613 /*
625 setting the revalidate time to zero */ 614 * We need to be sure that all dirty pages are written and the
626 CIFS_I(file->f_path.dentry->d_inode)->time = 0; 615 * server has the newest file length.
627 616 */
628 retval = cifs_revalidate_file(file); 617 if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
629 if (retval < 0) 618 inode->i_mapping->nrpages != 0) {
630 return (loff_t)retval; 619 rc = filemap_fdatawait(inode->i_mapping);
620 if (rc) {
621 mapping_set_error(inode->i_mapping, rc);
622 return rc;
623 }
624 }
625 /*
626 * Some applications poll for the file length in this strange
627 * way so we must seek to end on non-oplocked files by
628 * setting the revalidate time to zero.
629 */
630 CIFS_I(inode)->time = 0;
631
632 rc = cifs_revalidate_file_attr(file);
633 if (rc < 0)
634 return (loff_t)rc;
631 } 635 }
632 return generic_file_llseek_unlocked(file, offset, origin); 636 return generic_file_llseek_unlocked(file, offset, origin);
633} 637}
@@ -760,10 +764,11 @@ const struct file_operations cifs_file_strict_ops = {
760}; 764};
761 765
762const struct file_operations cifs_file_direct_ops = { 766const struct file_operations cifs_file_direct_ops = {
763 /* no aio, no readv - 767 /* BB reevaluate whether they can be done with directio, no cache */
764 BB reevaluate whether they can be done with directio, no cache */ 768 .read = do_sync_read,
765 .read = cifs_user_read, 769 .write = do_sync_write,
766 .write = cifs_user_write, 770 .aio_read = cifs_user_readv,
771 .aio_write = cifs_user_writev,
767 .open = cifs_open, 772 .open = cifs_open,
768 .release = cifs_close, 773 .release = cifs_close,
769 .lock = cifs_lock, 774 .lock = cifs_lock,
@@ -815,10 +820,11 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
815}; 820};
816 821
817const struct file_operations cifs_file_direct_nobrl_ops = { 822const struct file_operations cifs_file_direct_nobrl_ops = {
818 /* no mmap, no aio, no readv - 823 /* BB reevaluate whether they can be done with directio, no cache */
819 BB reevaluate whether they can be done with directio, no cache */ 824 .read = do_sync_read,
820 .read = cifs_user_read, 825 .write = do_sync_write,
821 .write = cifs_user_write, 826 .aio_read = cifs_user_readv,
827 .aio_write = cifs_user_writev,
822 .open = cifs_open, 828 .open = cifs_open,
823 .release = cifs_close, 829 .release = cifs_close,
824 .fsync = cifs_fsync, 830 .fsync = cifs_fsync,
@@ -981,10 +987,10 @@ init_cifs(void)
981 int rc = 0; 987 int rc = 0;
982 cifs_proc_init(); 988 cifs_proc_init();
983 INIT_LIST_HEAD(&cifs_tcp_ses_list); 989 INIT_LIST_HEAD(&cifs_tcp_ses_list);
984#ifdef CONFIG_CIFS_EXPERIMENTAL 990#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
985 INIT_LIST_HEAD(&GlobalDnotifyReqList); 991 INIT_LIST_HEAD(&GlobalDnotifyReqList);
986 INIT_LIST_HEAD(&GlobalDnotifyRsp_Q); 992 INIT_LIST_HEAD(&GlobalDnotifyRsp_Q);
987#endif 993#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
988/* 994/*
989 * Initialize Global counters 995 * Initialize Global counters
990 */ 996 */
@@ -1033,22 +1039,33 @@ init_cifs(void)
1033 if (rc) 1039 if (rc)
1034 goto out_destroy_mids; 1040 goto out_destroy_mids;
1035 1041
1036 rc = register_filesystem(&cifs_fs_type);
1037 if (rc)
1038 goto out_destroy_request_bufs;
1039#ifdef CONFIG_CIFS_UPCALL 1042#ifdef CONFIG_CIFS_UPCALL
1040 rc = register_key_type(&cifs_spnego_key_type); 1043 rc = register_key_type(&cifs_spnego_key_type);
1041 if (rc) 1044 if (rc)
1042 goto out_unregister_filesystem; 1045 goto out_destroy_request_bufs;
1043#endif 1046#endif /* CONFIG_CIFS_UPCALL */
1047
1048#ifdef CONFIG_CIFS_ACL
1049 rc = init_cifs_idmap();
1050 if (rc)
1051 goto out_register_key_type;
1052#endif /* CONFIG_CIFS_ACL */
1053
1054 rc = register_filesystem(&cifs_fs_type);
1055 if (rc)
1056 goto out_init_cifs_idmap;
1044 1057
1045 return 0; 1058 return 0;
1046 1059
1047#ifdef CONFIG_CIFS_UPCALL 1060out_init_cifs_idmap:
1048out_unregister_filesystem: 1061#ifdef CONFIG_CIFS_ACL
1049 unregister_filesystem(&cifs_fs_type); 1062 exit_cifs_idmap();
1063out_register_key_type:
1050#endif 1064#endif
1065#ifdef CONFIG_CIFS_UPCALL
1066 unregister_key_type(&cifs_spnego_key_type);
1051out_destroy_request_bufs: 1067out_destroy_request_bufs:
1068#endif
1052 cifs_destroy_request_bufs(); 1069 cifs_destroy_request_bufs();
1053out_destroy_mids: 1070out_destroy_mids:
1054 cifs_destroy_mids(); 1071 cifs_destroy_mids();
@@ -1070,6 +1087,10 @@ exit_cifs(void)
1070#ifdef CONFIG_CIFS_DFS_UPCALL 1087#ifdef CONFIG_CIFS_DFS_UPCALL
1071 cifs_dfs_release_automount_timer(); 1088 cifs_dfs_release_automount_timer();
1072#endif 1089#endif
1090#ifdef CONFIG_CIFS_ACL
1091 cifs_destroy_idmaptrees();
1092 exit_cifs_idmap();
1093#endif
1073#ifdef CONFIG_CIFS_UPCALL 1094#ifdef CONFIG_CIFS_UPCALL
1074 unregister_key_type(&cifs_spnego_key_type); 1095 unregister_key_type(&cifs_spnego_key_type);
1075#endif 1096#endif
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index a9371b6578c0..64313f778ebf 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -47,7 +47,7 @@ extern void cifs_sb_deactive(struct super_block *sb);
47 47
48/* Functions related to inodes */ 48/* Functions related to inodes */
49extern const struct inode_operations cifs_dir_inode_ops; 49extern const struct inode_operations cifs_dir_inode_ops;
50extern struct inode *cifs_root_iget(struct super_block *, unsigned long); 50extern struct inode *cifs_root_iget(struct super_block *);
51extern int cifs_create(struct inode *, struct dentry *, int, 51extern int cifs_create(struct inode *, struct dentry *, int,
52 struct nameidata *); 52 struct nameidata *);
53extern struct dentry *cifs_lookup(struct inode *, struct dentry *, 53extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
@@ -59,9 +59,11 @@ extern int cifs_mkdir(struct inode *, struct dentry *, int);
59extern int cifs_rmdir(struct inode *, struct dentry *); 59extern int cifs_rmdir(struct inode *, struct dentry *);
60extern int cifs_rename(struct inode *, struct dentry *, struct inode *, 60extern int cifs_rename(struct inode *, struct dentry *, struct inode *,
61 struct dentry *); 61 struct dentry *);
62extern int cifs_revalidate_file_attr(struct file *filp);
63extern int cifs_revalidate_dentry_attr(struct dentry *);
62extern int cifs_revalidate_file(struct file *filp); 64extern int cifs_revalidate_file(struct file *filp);
63extern int cifs_revalidate_dentry(struct dentry *); 65extern int cifs_revalidate_dentry(struct dentry *);
64extern void cifs_invalidate_mapping(struct inode *inode); 66extern int cifs_invalidate_mapping(struct inode *inode);
65extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 67extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
66extern int cifs_setattr(struct dentry *, struct iattr *); 68extern int cifs_setattr(struct dentry *, struct iattr *);
67 69
@@ -80,12 +82,12 @@ extern const struct file_operations cifs_file_strict_nobrl_ops;
80extern int cifs_open(struct inode *inode, struct file *file); 82extern int cifs_open(struct inode *inode, struct file *file);
81extern int cifs_close(struct inode *inode, struct file *file); 83extern int cifs_close(struct inode *inode, struct file *file);
82extern int cifs_closedir(struct inode *inode, struct file *file); 84extern int cifs_closedir(struct inode *inode, struct file *file);
83extern ssize_t cifs_user_read(struct file *file, char __user *read_data, 85extern ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
84 size_t read_size, loff_t *poffset); 86 unsigned long nr_segs, loff_t pos);
85extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, 87extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
86 unsigned long nr_segs, loff_t pos); 88 unsigned long nr_segs, loff_t pos);
87extern ssize_t cifs_user_write(struct file *file, const char __user *write_data, 89extern ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
88 size_t write_size, loff_t *poffset); 90 unsigned long nr_segs, loff_t pos);
89extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, 91extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
90 unsigned long nr_segs, loff_t pos); 92 unsigned long nr_segs, loff_t pos);
91extern int cifs_lock(struct file *, int, struct file_lock *); 93extern int cifs_lock(struct file *, int, struct file_lock *);
@@ -123,9 +125,9 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
123extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); 125extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
124extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); 126extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
125 127
126#ifdef CONFIG_CIFS_EXPERIMENTAL 128#ifdef CIFS_NFSD_EXPORT
127extern const struct export_operations cifs_export_ops; 129extern const struct export_operations cifs_export_ops;
128#endif /* EXPERIMENTAL */ 130#endif /* CIFS_NFSD_EXPORT */
129 131
130#define CIFS_VERSION "1.71" 132#define CIFS_VERSION "1.72"
131#endif /* _CIFSFS_H */ 133#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a5d1106fcbde..76b4517e74b0 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -274,7 +274,8 @@ struct cifsSesInfo {
274 int capabilities; 274 int capabilities;
275 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for 275 char serverName[SERVER_NAME_LEN_WITH_NULL * 2]; /* BB make bigger for
276 TCP names - will ipv6 and sctp addresses fit? */ 276 TCP names - will ipv6 and sctp addresses fit? */
277 char *user_name; 277 char *user_name; /* must not be null except during init of sess
278 and after mount option parsing we fill it */
278 char *domainName; 279 char *domainName;
279 char *password; 280 char *password;
280 struct session_key auth_key; 281 struct session_key auth_key;
@@ -780,10 +781,12 @@ GLOBAL_EXTERN spinlock_t cifs_tcp_ses_lock;
780 */ 781 */
781GLOBAL_EXTERN spinlock_t cifs_file_list_lock; 782GLOBAL_EXTERN spinlock_t cifs_file_list_lock;
782 783
784#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
783/* Outstanding dir notify requests */ 785/* Outstanding dir notify requests */
784GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; 786GLOBAL_EXTERN struct list_head GlobalDnotifyReqList;
785/* DirNotify response queue */ 787/* DirNotify response queue */
786GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q; 788GLOBAL_EXTERN struct list_head GlobalDnotifyRsp_Q;
789#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
787 790
788/* 791/*
789 * Global transaction id (XID) information 792 * Global transaction id (XID) information
@@ -830,6 +833,11 @@ GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
830/* reconnect after this many failed echo attempts */ 833/* reconnect after this many failed echo attempts */
831GLOBAL_EXTERN unsigned short echo_retries; 834GLOBAL_EXTERN unsigned short echo_retries;
832 835
836GLOBAL_EXTERN struct rb_root uidtree;
837GLOBAL_EXTERN struct rb_root gidtree;
838GLOBAL_EXTERN spinlock_t siduidlock;
839GLOBAL_EXTERN spinlock_t sidgidlock;
840
833void cifs_oplock_break(struct work_struct *work); 841void cifs_oplock_break(struct work_struct *work);
834void cifs_oplock_break_get(struct cifsFileInfo *cfile); 842void cifs_oplock_break_get(struct cifsFileInfo *cfile);
835void cifs_oplock_break_put(struct cifsFileInfo *cfile); 843void cifs_oplock_break_put(struct cifsFileInfo *cfile);
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b5c8cc5d7a7f..de3aa285de03 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -397,9 +397,9 @@
397#define GETU32(var) (*((__u32 *)var)) /* BB check for endian issues */ 397#define GETU32(var) (*((__u32 *)var)) /* BB check for endian issues */
398 398
399struct smb_hdr { 399struct smb_hdr {
400 __u32 smb_buf_length; /* big endian on wire *//* BB length is only two 400 __be32 smb_buf_length; /* BB length is only two (rarely three) bytes,
401 or three bytes - with one or two byte type preceding it that are 401 with one or two byte "type" preceding it that will be
402 zero - we could mask the type byte off just in case BB */ 402 zero - we could mask the type byte off */
403 __u8 Protocol[4]; 403 __u8 Protocol[4];
404 __u8 Command; 404 __u8 Command;
405 union { 405 union {
@@ -428,43 +428,28 @@ struct smb_hdr {
428 __u8 WordCount; 428 __u8 WordCount;
429} __attribute__((packed)); 429} __attribute__((packed));
430 430
431/* given a pointer to an smb_hdr retrieve a char pointer to the byte count */ 431/* given a pointer to an smb_hdr, retrieve a void pointer to the ByteCount */
432#define BCC(smb_var) ((unsigned char *)(smb_var) + sizeof(struct smb_hdr) + \ 432static inline void *
433 (2 * (smb_var)->WordCount)) 433BCC(struct smb_hdr *smb)
434{
435 return (void *)smb + sizeof(*smb) + 2 * smb->WordCount;
436}
434 437
435/* given a pointer to an smb_hdr retrieve the pointer to the byte area */ 438/* given a pointer to an smb_hdr retrieve the pointer to the byte area */
436#define pByteArea(smb_var) (BCC(smb_var) + 2) 439#define pByteArea(smb_var) (BCC(smb_var) + 2)
437 440
438/* get the converted ByteCount for a SMB packet and return it */
439static inline __u16
440get_bcc(struct smb_hdr *hdr)
441{
442 __u16 *bc_ptr = (__u16 *)BCC(hdr);
443
444 return get_unaligned(bc_ptr);
445}
446
447/* get the unconverted ByteCount for a SMB packet and return it */ 441/* get the unconverted ByteCount for a SMB packet and return it */
448static inline __u16 442static inline __u16
449get_bcc_le(struct smb_hdr *hdr) 443get_bcc(struct smb_hdr *hdr)
450{ 444{
451 __le16 *bc_ptr = (__le16 *)BCC(hdr); 445 __le16 *bc_ptr = (__le16 *)BCC(hdr);
452 446
453 return get_unaligned_le16(bc_ptr); 447 return get_unaligned_le16(bc_ptr);
454} 448}
455 449
456/* set the ByteCount for a SMB packet in host-byte order */
457static inline void
458put_bcc(__u16 count, struct smb_hdr *hdr)
459{
460 __u16 *bc_ptr = (__u16 *)BCC(hdr);
461
462 put_unaligned(count, bc_ptr);
463}
464
465/* set the ByteCount for a SMB packet in little-endian */ 450/* set the ByteCount for a SMB packet in little-endian */
466static inline void 451static inline void
467put_bcc_le(__u16 count, struct smb_hdr *hdr) 452put_bcc(__u16 count, struct smb_hdr *hdr)
468{ 453{
469 __le16 *bc_ptr = (__le16 *)BCC(hdr); 454 __le16 *bc_ptr = (__le16 *)BCC(hdr);
470 455
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 8096f27ad9a8..6e69e06a30b3 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -53,6 +53,9 @@ do { \
53 cFYI(1, "CIFS VFS: leaving %s (xid = %d) rc = %d", \ 53 cFYI(1, "CIFS VFS: leaving %s (xid = %d) rc = %d", \
54 __func__, curr_xid, (int)rc); \ 54 __func__, curr_xid, (int)rc); \
55} while (0) 55} while (0)
56extern int init_cifs_idmap(void);
57extern void exit_cifs_idmap(void);
58extern void cifs_destroy_idmaptrees(void);
56extern char *build_path_from_dentry(struct dentry *); 59extern char *build_path_from_dentry(struct dentry *);
57extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb, 60extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
58 struct cifsTconInfo *tcon); 61 struct cifsTconInfo *tcon);
@@ -90,7 +93,6 @@ extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
90extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); 93extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
91extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); 94extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
92extern unsigned int smbCalcSize(struct smb_hdr *ptr); 95extern unsigned int smbCalcSize(struct smb_hdr *ptr);
93extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
94extern int decode_negTokenInit(unsigned char *security_blob, int length, 96extern int decode_negTokenInit(unsigned char *security_blob, int length,
95 struct TCP_Server_Info *server); 97 struct TCP_Server_Info *server);
96extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len); 98extern int cifs_convert_address(struct sockaddr *dst, const char *src, int len);
@@ -143,8 +145,10 @@ extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
143extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64); 145extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64);
144extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *, 146extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
145 const char *, u32 *); 147 const char *, u32 *);
148extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *,
149 const char *);
146 150
147extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, 151extern int cifs_mount(struct super_block *, struct cifs_sb_info *,
148 const char *); 152 const char *);
149extern int cifs_umount(struct super_block *, struct cifs_sb_info *); 153extern int cifs_umount(struct super_block *, struct cifs_sb_info *);
150extern void cifs_dfs_release_automount_timer(void); 154extern void cifs_dfs_release_automount_timer(void);
@@ -304,12 +308,13 @@ extern int CIFSSMBUnixQuerySymLink(const int xid,
304 struct cifsTconInfo *tcon, 308 struct cifsTconInfo *tcon,
305 const unsigned char *searchName, char **syminfo, 309 const unsigned char *searchName, char **syminfo,
306 const struct nls_table *nls_codepage); 310 const struct nls_table *nls_codepage);
311#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
307extern int CIFSSMBQueryReparseLinkInfo(const int xid, 312extern int CIFSSMBQueryReparseLinkInfo(const int xid,
308 struct cifsTconInfo *tcon, 313 struct cifsTconInfo *tcon,
309 const unsigned char *searchName, 314 const unsigned char *searchName,
310 char *symlinkinfo, const int buflen, __u16 fid, 315 char *symlinkinfo, const int buflen, __u16 fid,
311 const struct nls_table *nls_codepage); 316 const struct nls_table *nls_codepage);
312 317#endif /* temporarily unused until cifs_symlink fixed */
313extern int CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon, 318extern int CIFSSMBOpen(const int xid, struct cifsTconInfo *tcon,
314 const char *fileName, const int disposition, 319 const char *fileName, const int disposition,
315 const int access_flags, const int omode, 320 const int access_flags, const int omode,
@@ -348,8 +353,6 @@ extern int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
348 const unsigned char *searchName, __u64 *inode_number, 353 const unsigned char *searchName, __u64 *inode_number,
349 const struct nls_table *nls_codepage, 354 const struct nls_table *nls_codepage,
350 int remap_special_chars); 355 int remap_special_chars);
351extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
352 const struct nls_table *cp, int mapChars);
353 356
354extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon, 357extern int CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
355 const __u16 netfid, const __u64 len, 358 const __u16 netfid, const __u64 len,
@@ -383,9 +386,15 @@ extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
383extern int calc_seckey(struct cifsSesInfo *); 386extern int calc_seckey(struct cifsSesInfo *);
384 387
385#ifdef CONFIG_CIFS_WEAK_PW_HASH 388#ifdef CONFIG_CIFS_WEAK_PW_HASH
386extern void calc_lanman_hash(const char *password, const char *cryptkey, 389extern int calc_lanman_hash(const char *password, const char *cryptkey,
387 bool encrypt, char *lnm_session_key); 390 bool encrypt, char *lnm_session_key);
388#endif /* CIFS_WEAK_PW_HASH */ 391#endif /* CIFS_WEAK_PW_HASH */
392#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
393extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
394 const int notify_subdirs, const __u16 netfid,
395 __u32 filter, struct file *file, int multishot,
396 const struct nls_table *nls_codepage);
397#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
389extern int CIFSSMBCopy(int xid, 398extern int CIFSSMBCopy(int xid,
390 struct cifsTconInfo *source_tcon, 399 struct cifsTconInfo *source_tcon,
391 const char *fromName, 400 const char *fromName,
@@ -393,10 +402,6 @@ extern int CIFSSMBCopy(int xid,
393 const char *toName, const int flags, 402 const char *toName, const int flags,
394 const struct nls_table *nls_codepage, 403 const struct nls_table *nls_codepage,
395 int remap_special_chars); 404 int remap_special_chars);
396extern int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
397 const int notify_subdirs, const __u16 netfid,
398 __u32 filter, struct file *file, int multishot,
399 const struct nls_table *nls_codepage);
400extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon, 405extern ssize_t CIFSSMBQAllEAs(const int xid, struct cifsTconInfo *tcon,
401 const unsigned char *searchName, 406 const unsigned char *searchName,
402 const unsigned char *ea_name, char *EAData, 407 const unsigned char *ea_name, char *EAData,
@@ -427,9 +432,6 @@ extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr,
427 struct cifs_sb_info *cifs_sb, int xid); 432 struct cifs_sb_info *cifs_sb, int xid);
428extern int mdfour(unsigned char *, unsigned char *, int); 433extern int mdfour(unsigned char *, unsigned char *, int);
429extern int E_md4hash(const unsigned char *passwd, unsigned char *p16); 434extern int E_md4hash(const unsigned char *passwd, unsigned char *p16);
430extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8, 435extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
431 unsigned char *p24);
432extern void E_P16(unsigned char *p14, unsigned char *p16);
433extern void E_P24(unsigned char *p21, const unsigned char *c8,
434 unsigned char *p24); 436 unsigned char *p24);
435#endif /* _CIFSPROTO_H */ 437#endif /* _CIFSPROTO_H */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index df959bae6728..83df937b814e 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -339,12 +339,13 @@ static int validate_t2(struct smb_t2_rsp *pSMB)
339 get_unaligned_le16(&pSMB->t2_rsp.DataOffset) > 1024) 339 get_unaligned_le16(&pSMB->t2_rsp.DataOffset) > 1024)
340 goto vt2_err; 340 goto vt2_err;
341 341
342 /* check that bcc is at least as big as parms + data */
343 /* check that bcc is less than negotiated smb buffer */
344 total_size = get_unaligned_le16(&pSMB->t2_rsp.ParameterCount); 342 total_size = get_unaligned_le16(&pSMB->t2_rsp.ParameterCount);
345 if (total_size >= 512) 343 if (total_size >= 512)
346 goto vt2_err; 344 goto vt2_err;
347 345
346 /* check that bcc is at least as big as parms + data, and that it is
347 * less than negotiated smb buffer
348 */
348 total_size += get_unaligned_le16(&pSMB->t2_rsp.DataCount); 349 total_size += get_unaligned_le16(&pSMB->t2_rsp.DataCount);
349 if (total_size > get_bcc(&pSMB->hdr) || 350 if (total_size > get_bcc(&pSMB->hdr) ||
350 total_size >= CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) 351 total_size >= CIFSMaxBufSize + MAX_CIFS_HDR_SIZE)
@@ -357,6 +358,13 @@ vt2_err:
357 return -EINVAL; 358 return -EINVAL;
358} 359}
359 360
361static inline void inc_rfc1001_len(void *pSMB, int count)
362{
363 struct smb_hdr *hdr = (struct smb_hdr *)pSMB;
364
365 be32_add_cpu(&hdr->smb_buf_length, count);
366}
367
360int 368int
361CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) 369CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
362{ 370{
@@ -409,7 +417,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
409 count += strlen(protocols[i].name) + 1; 417 count += strlen(protocols[i].name) + 1;
410 /* null at end of source and target buffers anyway */ 418 /* null at end of source and target buffers anyway */
411 } 419 }
412 pSMB->hdr.smb_buf_length += count; 420 inc_rfc1001_len(pSMB, count);
413 pSMB->ByteCount = cpu_to_le16(count); 421 pSMB->ByteCount = cpu_to_le16(count);
414 422
415 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, 423 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
@@ -541,10 +549,6 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
541 server->secType = RawNTLMSSP; 549 server->secType = RawNTLMSSP;
542 else if (secFlags & CIFSSEC_MAY_LANMAN) 550 else if (secFlags & CIFSSEC_MAY_LANMAN)
543 server->secType = LANMAN; 551 server->secType = LANMAN;
544/* #ifdef CONFIG_CIFS_EXPERIMENTAL
545 else if (secFlags & CIFSSEC_MAY_PLNTXT)
546 server->secType = ??
547#endif */
548 else { 552 else {
549 rc = -EOPNOTSUPP; 553 rc = -EOPNOTSUPP;
550 cERROR(1, "Invalid security type"); 554 cERROR(1, "Invalid security type");
@@ -578,7 +582,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
578 582
579 if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) && 583 if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) &&
580 (server->capabilities & CAP_EXTENDED_SECURITY)) { 584 (server->capabilities & CAP_EXTENDED_SECURITY)) {
581 count = pSMBr->ByteCount; 585 count = get_bcc(&pSMBr->hdr);
582 if (count < 16) { 586 if (count < 16) {
583 rc = -EIO; 587 rc = -EIO;
584 goto neg_err_exit; 588 goto neg_err_exit;
@@ -732,9 +736,9 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
732 smb->hdr.Tid = 0xffff; 736 smb->hdr.Tid = 0xffff;
733 smb->hdr.WordCount = 1; 737 smb->hdr.WordCount = 1;
734 put_unaligned_le16(1, &smb->EchoCount); 738 put_unaligned_le16(1, &smb->EchoCount);
735 put_bcc_le(1, &smb->hdr); 739 put_bcc(1, &smb->hdr);
736 smb->Data[0] = 'a'; 740 smb->Data[0] = 'a';
737 smb->hdr.smb_buf_length += 3; 741 inc_rfc1001_len(smb, 3);
738 742
739 rc = cifs_call_async(server, (struct smb_hdr *)smb, 743 rc = cifs_call_async(server, (struct smb_hdr *)smb,
740 cifs_echo_callback, server); 744 cifs_echo_callback, server);
@@ -852,7 +856,7 @@ PsxDelete:
852 pSMB->TotalParameterCount = pSMB->ParameterCount; 856 pSMB->TotalParameterCount = pSMB->ParameterCount;
853 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_UNLINK); 857 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_UNLINK);
854 pSMB->Reserved4 = 0; 858 pSMB->Reserved4 = 0;
855 pSMB->hdr.smb_buf_length += byte_count; 859 inc_rfc1001_len(pSMB, byte_count);
856 pSMB->ByteCount = cpu_to_le16(byte_count); 860 pSMB->ByteCount = cpu_to_le16(byte_count);
857 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 861 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
858 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 862 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -898,7 +902,7 @@ DelFileRetry:
898 pSMB->SearchAttributes = 902 pSMB->SearchAttributes =
899 cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM); 903 cpu_to_le16(ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM);
900 pSMB->BufferFormat = 0x04; 904 pSMB->BufferFormat = 0x04;
901 pSMB->hdr.smb_buf_length += name_len + 1; 905 inc_rfc1001_len(pSMB, name_len + 1);
902 pSMB->ByteCount = cpu_to_le16(name_len + 1); 906 pSMB->ByteCount = cpu_to_le16(name_len + 1);
903 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 907 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
904 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 908 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -942,7 +946,7 @@ RmDirRetry:
942 } 946 }
943 947
944 pSMB->BufferFormat = 0x04; 948 pSMB->BufferFormat = 0x04;
945 pSMB->hdr.smb_buf_length += name_len + 1; 949 inc_rfc1001_len(pSMB, name_len + 1);
946 pSMB->ByteCount = cpu_to_le16(name_len + 1); 950 pSMB->ByteCount = cpu_to_le16(name_len + 1);
947 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 951 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
948 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 952 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -985,7 +989,7 @@ MkDirRetry:
985 } 989 }
986 990
987 pSMB->BufferFormat = 0x04; 991 pSMB->BufferFormat = 0x04;
988 pSMB->hdr.smb_buf_length += name_len + 1; 992 inc_rfc1001_len(pSMB, name_len + 1);
989 pSMB->ByteCount = cpu_to_le16(name_len + 1); 993 pSMB->ByteCount = cpu_to_le16(name_len + 1);
990 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 994 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
991 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 995 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -1063,7 +1067,7 @@ PsxCreat:
1063 pSMB->TotalParameterCount = pSMB->ParameterCount; 1067 pSMB->TotalParameterCount = pSMB->ParameterCount;
1064 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_OPEN); 1068 pSMB->InformationLevel = cpu_to_le16(SMB_POSIX_OPEN);
1065 pSMB->Reserved4 = 0; 1069 pSMB->Reserved4 = 0;
1066 pSMB->hdr.smb_buf_length += byte_count; 1070 inc_rfc1001_len(pSMB, byte_count);
1067 pSMB->ByteCount = cpu_to_le16(byte_count); 1071 pSMB->ByteCount = cpu_to_le16(byte_count);
1068 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 1072 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
1069 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 1073 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -1075,7 +1079,7 @@ PsxCreat:
1075 cFYI(1, "copying inode info"); 1079 cFYI(1, "copying inode info");
1076 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 1080 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
1077 1081
1078 if (rc || (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP))) { 1082 if (rc || get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)) {
1079 rc = -EIO; /* bad smb */ 1083 rc = -EIO; /* bad smb */
1080 goto psx_create_err; 1084 goto psx_create_err;
1081 } 1085 }
@@ -1096,7 +1100,7 @@ PsxCreat:
1096 pRetData->Type = cpu_to_le32(-1); /* unknown */ 1100 pRetData->Type = cpu_to_le32(-1); /* unknown */
1097 cFYI(DBG2, "unknown type"); 1101 cFYI(DBG2, "unknown type");
1098 } else { 1102 } else {
1099 if (pSMBr->ByteCount < sizeof(OPEN_PSX_RSP) 1103 if (get_bcc(&pSMBr->hdr) < sizeof(OPEN_PSX_RSP)
1100 + sizeof(FILE_UNIX_BASIC_INFO)) { 1104 + sizeof(FILE_UNIX_BASIC_INFO)) {
1101 cERROR(1, "Open response data too small"); 1105 cERROR(1, "Open response data too small");
1102 pRetData->Type = cpu_to_le32(-1); 1106 pRetData->Type = cpu_to_le32(-1);
@@ -1228,7 +1232,7 @@ OldOpenRetry:
1228 pSMB->Sattr = cpu_to_le16(ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY); 1232 pSMB->Sattr = cpu_to_le16(ATTR_HIDDEN | ATTR_SYSTEM | ATTR_DIRECTORY);
1229 pSMB->OpenFunction = cpu_to_le16(convert_disposition(openDisposition)); 1233 pSMB->OpenFunction = cpu_to_le16(convert_disposition(openDisposition));
1230 count += name_len; 1234 count += name_len;
1231 pSMB->hdr.smb_buf_length += count; 1235 inc_rfc1001_len(pSMB, count);
1232 1236
1233 pSMB->ByteCount = cpu_to_le16(count); 1237 pSMB->ByteCount = cpu_to_le16(count);
1234 /* long_op set to 1 to allow for oplock break timeouts */ 1238 /* long_op set to 1 to allow for oplock break timeouts */
@@ -1341,7 +1345,7 @@ openRetry:
1341 SECURITY_CONTEXT_TRACKING | SECURITY_EFFECTIVE_ONLY; 1345 SECURITY_CONTEXT_TRACKING | SECURITY_EFFECTIVE_ONLY;
1342 1346
1343 count += name_len; 1347 count += name_len;
1344 pSMB->hdr.smb_buf_length += count; 1348 inc_rfc1001_len(pSMB, count);
1345 1349
1346 pSMB->ByteCount = cpu_to_le16(count); 1350 pSMB->ByteCount = cpu_to_le16(count);
1347 /* long_op set to 1 to allow for oplock break timeouts */ 1351 /* long_op set to 1 to allow for oplock break timeouts */
@@ -1426,7 +1430,7 @@ CIFSSMBRead(const int xid, struct cifsTconInfo *tcon, const int netfid,
1426 } 1430 }
1427 1431
1428 iov[0].iov_base = (char *)pSMB; 1432 iov[0].iov_base = (char *)pSMB;
1429 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; 1433 iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
1430 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */, 1434 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
1431 &resp_buf_type, CIFS_LOG_ERROR); 1435 &resp_buf_type, CIFS_LOG_ERROR);
1432 cifs_stats_inc(&tcon->num_reads); 1436 cifs_stats_inc(&tcon->num_reads);
@@ -1560,7 +1564,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1560 1564
1561 pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF); 1565 pSMB->DataLengthLow = cpu_to_le16(bytes_sent & 0xFFFF);
1562 pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16); 1566 pSMB->DataLengthHigh = cpu_to_le16(bytes_sent >> 16);
1563 pSMB->hdr.smb_buf_length += byte_count; 1567 inc_rfc1001_len(pSMB, byte_count);
1564 1568
1565 if (wct == 14) 1569 if (wct == 14)
1566 pSMB->ByteCount = cpu_to_le16(byte_count); 1570 pSMB->ByteCount = cpu_to_le16(byte_count);
@@ -1644,11 +1648,12 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1644 1648
1645 pSMB->DataLengthLow = cpu_to_le16(count & 0xFFFF); 1649 pSMB->DataLengthLow = cpu_to_le16(count & 0xFFFF);
1646 pSMB->DataLengthHigh = cpu_to_le16(count >> 16); 1650 pSMB->DataLengthHigh = cpu_to_le16(count >> 16);
1647 smb_hdr_len = pSMB->hdr.smb_buf_length + 1; /* hdr + 1 byte pad */ 1651 /* header + 1 byte pad */
1652 smb_hdr_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 1;
1648 if (wct == 14) 1653 if (wct == 14)
1649 pSMB->hdr.smb_buf_length += count+1; 1654 inc_rfc1001_len(pSMB, count + 1);
1650 else /* wct == 12 */ 1655 else /* wct == 12 */
1651 pSMB->hdr.smb_buf_length += count+5; /* smb data starts later */ 1656 inc_rfc1001_len(pSMB, count + 5); /* smb data starts later */
1652 if (wct == 14) 1657 if (wct == 14)
1653 pSMB->ByteCount = cpu_to_le16(count + 1); 1658 pSMB->ByteCount = cpu_to_le16(count + 1);
1654 else /* wct == 12 */ /* bigger pad, smaller smb hdr, keep offset ok */ { 1659 else /* wct == 12 */ /* bigger pad, smaller smb hdr, keep offset ok */ {
@@ -1748,7 +1753,7 @@ CIFSSMBLock(const int xid, struct cifsTconInfo *tcon,
1748 /* oplock break */ 1753 /* oplock break */
1749 count = 0; 1754 count = 0;
1750 } 1755 }
1751 pSMB->hdr.smb_buf_length += count; 1756 inc_rfc1001_len(pSMB, count);
1752 pSMB->ByteCount = cpu_to_le16(count); 1757 pSMB->ByteCount = cpu_to_le16(count);
1753 1758
1754 if (waitFlag) { 1759 if (waitFlag) {
@@ -1839,14 +1844,14 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1839 pSMB->Fid = smb_file_id; 1844 pSMB->Fid = smb_file_id;
1840 pSMB->InformationLevel = cpu_to_le16(SMB_SET_POSIX_LOCK); 1845 pSMB->InformationLevel = cpu_to_le16(SMB_SET_POSIX_LOCK);
1841 pSMB->Reserved4 = 0; 1846 pSMB->Reserved4 = 0;
1842 pSMB->hdr.smb_buf_length += byte_count; 1847 inc_rfc1001_len(pSMB, byte_count);
1843 pSMB->ByteCount = cpu_to_le16(byte_count); 1848 pSMB->ByteCount = cpu_to_le16(byte_count);
1844 if (waitFlag) { 1849 if (waitFlag) {
1845 rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB, 1850 rc = SendReceiveBlockingLock(xid, tcon, (struct smb_hdr *) pSMB,
1846 (struct smb_hdr *) pSMBr, &bytes_returned); 1851 (struct smb_hdr *) pSMBr, &bytes_returned);
1847 } else { 1852 } else {
1848 iov[0].iov_base = (char *)pSMB; 1853 iov[0].iov_base = (char *)pSMB;
1849 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; 1854 iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
1850 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */, 1855 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovecs */,
1851 &resp_buf_type, timeout); 1856 &resp_buf_type, timeout);
1852 pSMB = NULL; /* request buf already freed by SendReceive2. Do 1857 pSMB = NULL; /* request buf already freed by SendReceive2. Do
@@ -1862,7 +1867,7 @@ CIFSSMBPosixLock(const int xid, struct cifsTconInfo *tcon,
1862 __u16 data_count; 1867 __u16 data_count;
1863 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 1868 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
1864 1869
1865 if (rc || (pSMBr->ByteCount < sizeof(struct cifs_posix_lock))) { 1870 if (rc || get_bcc(&pSMBr->hdr) < sizeof(*parm_data)) {
1866 rc = -EIO; /* bad smb */ 1871 rc = -EIO; /* bad smb */
1867 goto plk_err_exit; 1872 goto plk_err_exit;
1868 } 1873 }
@@ -2012,7 +2017,7 @@ renameRetry:
2012 } 2017 }
2013 2018
2014 count = 1 /* 1st signature byte */ + name_len + name_len2; 2019 count = 1 /* 1st signature byte */ + name_len + name_len2;
2015 pSMB->hdr.smb_buf_length += count; 2020 inc_rfc1001_len(pSMB, count);
2016 pSMB->ByteCount = cpu_to_le16(count); 2021 pSMB->ByteCount = cpu_to_le16(count);
2017 2022
2018 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2023 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2092,7 +2097,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon,
2092 pSMB->InformationLevel = 2097 pSMB->InformationLevel =
2093 cpu_to_le16(SMB_SET_FILE_RENAME_INFORMATION); 2098 cpu_to_le16(SMB_SET_FILE_RENAME_INFORMATION);
2094 pSMB->Reserved4 = 0; 2099 pSMB->Reserved4 = 0;
2095 pSMB->hdr.smb_buf_length += byte_count; 2100 inc_rfc1001_len(pSMB, byte_count);
2096 pSMB->ByteCount = cpu_to_le16(byte_count); 2101 pSMB->ByteCount = cpu_to_le16(byte_count);
2097 rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB, 2102 rc = SendReceive(xid, pTcon->ses, (struct smb_hdr *) pSMB,
2098 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2103 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2159,7 +2164,7 @@ copyRetry:
2159 } 2164 }
2160 2165
2161 count = 1 /* 1st signature byte */ + name_len + name_len2; 2166 count = 1 /* 1st signature byte */ + name_len + name_len2;
2162 pSMB->hdr.smb_buf_length += count; 2167 inc_rfc1001_len(pSMB, count);
2163 pSMB->ByteCount = cpu_to_le16(count); 2168 pSMB->ByteCount = cpu_to_le16(count);
2164 2169
2165 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2170 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2249,7 +2254,7 @@ createSymLinkRetry:
2249 pSMB->DataOffset = cpu_to_le16(offset); 2254 pSMB->DataOffset = cpu_to_le16(offset);
2250 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_LINK); 2255 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_LINK);
2251 pSMB->Reserved4 = 0; 2256 pSMB->Reserved4 = 0;
2252 pSMB->hdr.smb_buf_length += byte_count; 2257 inc_rfc1001_len(pSMB, byte_count);
2253 pSMB->ByteCount = cpu_to_le16(byte_count); 2258 pSMB->ByteCount = cpu_to_le16(byte_count);
2254 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2259 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2255 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2260 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2335,7 +2340,7 @@ createHardLinkRetry:
2335 pSMB->DataOffset = cpu_to_le16(offset); 2340 pSMB->DataOffset = cpu_to_le16(offset);
2336 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_HLINK); 2341 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_HLINK);
2337 pSMB->Reserved4 = 0; 2342 pSMB->Reserved4 = 0;
2338 pSMB->hdr.smb_buf_length += byte_count; 2343 inc_rfc1001_len(pSMB, byte_count);
2339 pSMB->ByteCount = cpu_to_le16(byte_count); 2344 pSMB->ByteCount = cpu_to_le16(byte_count);
2340 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2345 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2341 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2346 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2406,7 +2411,7 @@ winCreateHardLinkRetry:
2406 } 2411 }
2407 2412
2408 count = 1 /* string type byte */ + name_len + name_len2; 2413 count = 1 /* string type byte */ + name_len + name_len2;
2409 pSMB->hdr.smb_buf_length += count; 2414 inc_rfc1001_len(pSMB, count);
2410 pSMB->ByteCount = cpu_to_le16(count); 2415 pSMB->ByteCount = cpu_to_le16(count);
2411 2416
2412 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2417 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2477,7 +2482,7 @@ querySymLinkRetry:
2477 pSMB->ParameterCount = pSMB->TotalParameterCount; 2482 pSMB->ParameterCount = pSMB->TotalParameterCount;
2478 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_LINK); 2483 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_LINK);
2479 pSMB->Reserved4 = 0; 2484 pSMB->Reserved4 = 0;
2480 pSMB->hdr.smb_buf_length += byte_count; 2485 inc_rfc1001_len(pSMB, byte_count);
2481 pSMB->ByteCount = cpu_to_le16(byte_count); 2486 pSMB->ByteCount = cpu_to_le16(byte_count);
2482 2487
2483 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2488 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2489,7 +2494,7 @@ querySymLinkRetry:
2489 2494
2490 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 2495 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
2491 /* BB also check enough total bytes returned */ 2496 /* BB also check enough total bytes returned */
2492 if (rc || (pSMBr->ByteCount < 2)) 2497 if (rc || get_bcc(&pSMBr->hdr) < 2)
2493 rc = -EIO; 2498 rc = -EIO;
2494 else { 2499 else {
2495 bool is_unicode; 2500 bool is_unicode;
@@ -2516,7 +2521,17 @@ querySymLinkRetry:
2516 return rc; 2521 return rc;
2517} 2522}
2518 2523
2519#ifdef CONFIG_CIFS_EXPERIMENTAL 2524#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
2525/*
2526 * Recent Windows versions now create symlinks more frequently
2527 * and they use the "reparse point" mechanism below. We can of course
2528 * do symlinks nicely to Samba and other servers which support the
2529 * CIFS Unix Extensions and we can also do SFU symlinks and "client only"
2530 * "MF" symlinks optionally, but for recent Windows we really need to
2531 * reenable the code below and fix the cifs_symlink callers to handle this.
2532 * In the interim this code has been moved to its own config option so
2533 * it is not compiled in by default until callers fixed up and more tested.
2534 */
2520int 2535int
2521CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, 2536CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2522 const unsigned char *searchName, 2537 const unsigned char *searchName,
@@ -2561,14 +2576,14 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2561 } else { /* decode response */ 2576 } else { /* decode response */
2562 __u32 data_offset = le32_to_cpu(pSMBr->DataOffset); 2577 __u32 data_offset = le32_to_cpu(pSMBr->DataOffset);
2563 __u32 data_count = le32_to_cpu(pSMBr->DataCount); 2578 __u32 data_count = le32_to_cpu(pSMBr->DataCount);
2564 if ((pSMBr->ByteCount < 2) || (data_offset > 512)) { 2579 if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) {
2565 /* BB also check enough total bytes returned */ 2580 /* BB also check enough total bytes returned */
2566 rc = -EIO; /* bad smb */ 2581 rc = -EIO; /* bad smb */
2567 goto qreparse_out; 2582 goto qreparse_out;
2568 } 2583 }
2569 if (data_count && (data_count < 2048)) { 2584 if (data_count && (data_count < 2048)) {
2570 char *end_of_smb = 2 /* sizeof byte count */ + 2585 char *end_of_smb = 2 /* sizeof byte count */ +
2571 pSMBr->ByteCount + (char *)&pSMBr->ByteCount; 2586 get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount;
2572 2587
2573 struct reparse_data *reparse_buf = 2588 struct reparse_data *reparse_buf =
2574 (struct reparse_data *) 2589 (struct reparse_data *)
@@ -2618,7 +2633,7 @@ qreparse_out:
2618 2633
2619 return rc; 2634 return rc;
2620} 2635}
2621#endif /* CIFS_EXPERIMENTAL */ 2636#endif /* CIFS_SYMLINK_EXPERIMENTAL */ /* BB temporarily unused */
2622 2637
2623#ifdef CONFIG_CIFS_POSIX 2638#ifdef CONFIG_CIFS_POSIX
2624 2639
@@ -2814,7 +2829,7 @@ queryAclRetry:
2814 pSMB->ParameterCount = pSMB->TotalParameterCount; 2829 pSMB->ParameterCount = pSMB->TotalParameterCount;
2815 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_ACL); 2830 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_ACL);
2816 pSMB->Reserved4 = 0; 2831 pSMB->Reserved4 = 0;
2817 pSMB->hdr.smb_buf_length += byte_count; 2832 inc_rfc1001_len(pSMB, byte_count);
2818 pSMB->ByteCount = cpu_to_le16(byte_count); 2833 pSMB->ByteCount = cpu_to_le16(byte_count);
2819 2834
2820 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2835 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2826,8 +2841,8 @@ queryAclRetry:
2826 /* decode response */ 2841 /* decode response */
2827 2842
2828 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 2843 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
2829 if (rc || (pSMBr->ByteCount < 2))
2830 /* BB also check enough total bytes returned */ 2844 /* BB also check enough total bytes returned */
2845 if (rc || get_bcc(&pSMBr->hdr) < 2)
2831 rc = -EIO; /* bad smb */ 2846 rc = -EIO; /* bad smb */
2832 else { 2847 else {
2833 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 2848 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -2908,7 +2923,7 @@ setAclRetry:
2908 pSMB->ParameterCount = cpu_to_le16(params); 2923 pSMB->ParameterCount = cpu_to_le16(params);
2909 pSMB->TotalParameterCount = pSMB->ParameterCount; 2924 pSMB->TotalParameterCount = pSMB->ParameterCount;
2910 pSMB->Reserved4 = 0; 2925 pSMB->Reserved4 = 0;
2911 pSMB->hdr.smb_buf_length += byte_count; 2926 inc_rfc1001_len(pSMB, byte_count);
2912 pSMB->ByteCount = cpu_to_le16(byte_count); 2927 pSMB->ByteCount = cpu_to_le16(byte_count);
2913 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2928 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
2914 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 2929 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -2966,7 +2981,7 @@ GetExtAttrRetry:
2966 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_ATTR_FLAGS); 2981 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_ATTR_FLAGS);
2967 pSMB->Pad = 0; 2982 pSMB->Pad = 0;
2968 pSMB->Fid = netfid; 2983 pSMB->Fid = netfid;
2969 pSMB->hdr.smb_buf_length += byte_count; 2984 inc_rfc1001_len(pSMB, byte_count);
2970 pSMB->t2.ByteCount = cpu_to_le16(byte_count); 2985 pSMB->t2.ByteCount = cpu_to_le16(byte_count);
2971 2986
2972 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 2987 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -2976,8 +2991,8 @@ GetExtAttrRetry:
2976 } else { 2991 } else {
2977 /* decode response */ 2992 /* decode response */
2978 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 2993 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
2979 if (rc || (pSMBr->ByteCount < 2))
2980 /* BB also check enough total bytes returned */ 2994 /* BB also check enough total bytes returned */
2995 if (rc || get_bcc(&pSMBr->hdr) < 2)
2981 /* If rc should we check for EOPNOSUPP and 2996 /* If rc should we check for EOPNOSUPP and
2982 disable the srvino flag? or in caller? */ 2997 disable the srvino flag? or in caller? */
2983 rc = -EIO; /* bad smb */ 2998 rc = -EIO; /* bad smb */
@@ -3052,6 +3067,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
3052 char *end_of_smb; 3067 char *end_of_smb;
3053 __u32 data_count, data_offset, parm_count, parm_offset; 3068 __u32 data_count, data_offset, parm_count, parm_offset;
3054 struct smb_com_ntransact_rsp *pSMBr; 3069 struct smb_com_ntransact_rsp *pSMBr;
3070 u16 bcc;
3055 3071
3056 *pdatalen = 0; 3072 *pdatalen = 0;
3057 *pparmlen = 0; 3073 *pparmlen = 0;
@@ -3061,8 +3077,8 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
3061 3077
3062 pSMBr = (struct smb_com_ntransact_rsp *)buf; 3078 pSMBr = (struct smb_com_ntransact_rsp *)buf;
3063 3079
3064 /* ByteCount was converted from little endian in SendReceive */ 3080 bcc = get_bcc(&pSMBr->hdr);
3065 end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount + 3081 end_of_smb = 2 /* sizeof byte count */ + bcc +
3066 (char *)&pSMBr->ByteCount; 3082 (char *)&pSMBr->ByteCount;
3067 3083
3068 data_offset = le32_to_cpu(pSMBr->DataOffset); 3084 data_offset = le32_to_cpu(pSMBr->DataOffset);
@@ -3088,7 +3104,7 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
3088 *ppdata, data_count, (data_count + *ppdata), 3104 *ppdata, data_count, (data_count + *ppdata),
3089 end_of_smb, pSMBr); 3105 end_of_smb, pSMBr);
3090 return -EINVAL; 3106 return -EINVAL;
3091 } else if (parm_count + data_count > pSMBr->ByteCount) { 3107 } else if (parm_count + data_count > bcc) {
3092 cFYI(1, "parm count and data count larger than SMB"); 3108 cFYI(1, "parm count and data count larger than SMB");
3093 return -EINVAL; 3109 return -EINVAL;
3094 } 3110 }
@@ -3124,9 +3140,9 @@ CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
3124 pSMB->AclFlags = cpu_to_le32(CIFS_ACL_OWNER | CIFS_ACL_GROUP | 3140 pSMB->AclFlags = cpu_to_le32(CIFS_ACL_OWNER | CIFS_ACL_GROUP |
3125 CIFS_ACL_DACL); 3141 CIFS_ACL_DACL);
3126 pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */ 3142 pSMB->ByteCount = cpu_to_le16(11); /* 3 bytes pad + 8 bytes parm */
3127 pSMB->hdr.smb_buf_length += 11; 3143 inc_rfc1001_len(pSMB, 11);
3128 iov[0].iov_base = (char *)pSMB; 3144 iov[0].iov_base = (char *)pSMB;
3129 iov[0].iov_len = pSMB->hdr.smb_buf_length + 4; 3145 iov[0].iov_len = be32_to_cpu(pSMB->hdr.smb_buf_length) + 4;
3130 3146
3131 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type, 3147 rc = SendReceive2(xid, tcon->ses, iov, 1 /* num iovec */, &buf_type,
3132 0); 3148 0);
@@ -3235,10 +3251,9 @@ setCifsAclRetry:
3235 memcpy((char *) &pSMBr->hdr.Protocol + data_offset, 3251 memcpy((char *) &pSMBr->hdr.Protocol + data_offset,
3236 (char *) pntsd, 3252 (char *) pntsd,
3237 acllen); 3253 acllen);
3238 pSMB->hdr.smb_buf_length += (byte_count + data_count); 3254 inc_rfc1001_len(pSMB, byte_count + data_count);
3239
3240 } else 3255 } else
3241 pSMB->hdr.smb_buf_length += byte_count; 3256 inc_rfc1001_len(pSMB, byte_count);
3242 3257
3243 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3258 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3244 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3259 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3289,7 +3304,7 @@ QInfRetry:
3289 } 3304 }
3290 pSMB->BufferFormat = 0x04; 3305 pSMB->BufferFormat = 0x04;
3291 name_len++; /* account for buffer type byte */ 3306 name_len++; /* account for buffer type byte */
3292 pSMB->hdr.smb_buf_length += (__u16) name_len; 3307 inc_rfc1001_len(pSMB, (__u16)name_len);
3293 pSMB->ByteCount = cpu_to_le16(name_len); 3308 pSMB->ByteCount = cpu_to_le16(name_len);
3294 3309
3295 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3310 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3364,7 +3379,7 @@ QFileInfoRetry:
3364 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO); 3379 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
3365 pSMB->Pad = 0; 3380 pSMB->Pad = 0;
3366 pSMB->Fid = netfid; 3381 pSMB->Fid = netfid;
3367 pSMB->hdr.smb_buf_length += byte_count; 3382 inc_rfc1001_len(pSMB, byte_count);
3368 3383
3369 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3384 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3370 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3385 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3375,7 +3390,7 @@ QFileInfoRetry:
3375 3390
3376 if (rc) /* BB add auto retry on EOPNOTSUPP? */ 3391 if (rc) /* BB add auto retry on EOPNOTSUPP? */
3377 rc = -EIO; 3392 rc = -EIO;
3378 else if (pSMBr->ByteCount < 40) 3393 else if (get_bcc(&pSMBr->hdr) < 40)
3379 rc = -EIO; /* bad smb */ 3394 rc = -EIO; /* bad smb */
3380 else if (pFindData) { 3395 else if (pFindData) {
3381 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 3396 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -3451,7 +3466,7 @@ QPathInfoRetry:
3451 else 3466 else
3452 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO); 3467 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_ALL_INFO);
3453 pSMB->Reserved4 = 0; 3468 pSMB->Reserved4 = 0;
3454 pSMB->hdr.smb_buf_length += byte_count; 3469 inc_rfc1001_len(pSMB, byte_count);
3455 pSMB->ByteCount = cpu_to_le16(byte_count); 3470 pSMB->ByteCount = cpu_to_le16(byte_count);
3456 3471
3457 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3472 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3463,9 +3478,9 @@ QPathInfoRetry:
3463 3478
3464 if (rc) /* BB add auto retry on EOPNOTSUPP? */ 3479 if (rc) /* BB add auto retry on EOPNOTSUPP? */
3465 rc = -EIO; 3480 rc = -EIO;
3466 else if (!legacy && (pSMBr->ByteCount < 40)) 3481 else if (!legacy && get_bcc(&pSMBr->hdr) < 40)
3467 rc = -EIO; /* bad smb */ 3482 rc = -EIO; /* bad smb */
3468 else if (legacy && (pSMBr->ByteCount < 24)) 3483 else if (legacy && get_bcc(&pSMBr->hdr) < 24)
3469 rc = -EIO; /* 24 or 26 expected but we do not read 3484 rc = -EIO; /* 24 or 26 expected but we do not read
3470 last field */ 3485 last field */
3471 else if (pFindData) { 3486 else if (pFindData) {
@@ -3532,7 +3547,7 @@ UnixQFileInfoRetry:
3532 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC); 3547 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
3533 pSMB->Pad = 0; 3548 pSMB->Pad = 0;
3534 pSMB->Fid = netfid; 3549 pSMB->Fid = netfid;
3535 pSMB->hdr.smb_buf_length += byte_count; 3550 inc_rfc1001_len(pSMB, byte_count);
3536 3551
3537 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3552 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
3538 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 3553 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -3541,7 +3556,7 @@ UnixQFileInfoRetry:
3541 } else { /* decode response */ 3556 } else { /* decode response */
3542 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3557 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3543 3558
3544 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) { 3559 if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
3545 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n" 3560 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
3546 "Unix Extensions can be disabled on mount " 3561 "Unix Extensions can be disabled on mount "
3547 "by specifying the nosfu mount option."); 3562 "by specifying the nosfu mount option.");
@@ -3617,7 +3632,7 @@ UnixQPathInfoRetry:
3617 pSMB->ParameterCount = pSMB->TotalParameterCount; 3632 pSMB->ParameterCount = pSMB->TotalParameterCount;
3618 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC); 3633 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC);
3619 pSMB->Reserved4 = 0; 3634 pSMB->Reserved4 = 0;
3620 pSMB->hdr.smb_buf_length += byte_count; 3635 inc_rfc1001_len(pSMB, byte_count);
3621 pSMB->ByteCount = cpu_to_le16(byte_count); 3636 pSMB->ByteCount = cpu_to_le16(byte_count);
3622 3637
3623 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3638 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3627,7 +3642,7 @@ UnixQPathInfoRetry:
3627 } else { /* decode response */ 3642 } else { /* decode response */
3628 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 3643 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
3629 3644
3630 if (rc || (pSMBr->ByteCount < sizeof(FILE_UNIX_BASIC_INFO))) { 3645 if (rc || get_bcc(&pSMBr->hdr) < sizeof(FILE_UNIX_BASIC_INFO)) {
3631 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n" 3646 cERROR(1, "Malformed FILE_UNIX_BASIC_INFO response.\n"
3632 "Unix Extensions can be disabled on mount " 3647 "Unix Extensions can be disabled on mount "
3633 "by specifying the nosfu mount option."); 3648 "by specifying the nosfu mount option.");
@@ -3731,7 +3746,7 @@ findFirstRetry:
3731 3746
3732 /* BB what should we set StorageType to? Does it matter? BB */ 3747 /* BB what should we set StorageType to? Does it matter? BB */
3733 pSMB->SearchStorageType = 0; 3748 pSMB->SearchStorageType = 0;
3734 pSMB->hdr.smb_buf_length += byte_count; 3749 inc_rfc1001_len(pSMB, byte_count);
3735 pSMB->ByteCount = cpu_to_le16(byte_count); 3750 pSMB->ByteCount = cpu_to_le16(byte_count);
3736 3751
3737 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3752 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3860,7 +3875,7 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon,
3860 byte_count = params + 1 /* pad */ ; 3875 byte_count = params + 1 /* pad */ ;
3861 pSMB->TotalParameterCount = cpu_to_le16(params); 3876 pSMB->TotalParameterCount = cpu_to_le16(params);
3862 pSMB->ParameterCount = pSMB->TotalParameterCount; 3877 pSMB->ParameterCount = pSMB->TotalParameterCount;
3863 pSMB->hdr.smb_buf_length += byte_count; 3878 inc_rfc1001_len(pSMB, byte_count);
3864 pSMB->ByteCount = cpu_to_le16(byte_count); 3879 pSMB->ByteCount = cpu_to_le16(byte_count);
3865 3880
3866 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 3881 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4022,7 +4037,7 @@ GetInodeNumberRetry:
4022 pSMB->ParameterCount = pSMB->TotalParameterCount; 4037 pSMB->ParameterCount = pSMB->TotalParameterCount;
4023 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_INTERNAL_INFO); 4038 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FILE_INTERNAL_INFO);
4024 pSMB->Reserved4 = 0; 4039 pSMB->Reserved4 = 0;
4025 pSMB->hdr.smb_buf_length += byte_count; 4040 inc_rfc1001_len(pSMB, byte_count);
4026 pSMB->ByteCount = cpu_to_le16(byte_count); 4041 pSMB->ByteCount = cpu_to_le16(byte_count);
4027 4042
4028 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4043 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4032,8 +4047,8 @@ GetInodeNumberRetry:
4032 } else { 4047 } else {
4033 /* decode response */ 4048 /* decode response */
4034 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4049 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4035 if (rc || (pSMBr->ByteCount < 2))
4036 /* BB also check enough total bytes returned */ 4050 /* BB also check enough total bytes returned */
4051 if (rc || get_bcc(&pSMBr->hdr) < 2)
4037 /* If rc should we check for EOPNOSUPP and 4052 /* If rc should we check for EOPNOSUPP and
4038 disable the srvino flag? or in caller? */ 4053 disable the srvino flag? or in caller? */
4039 rc = -EIO; /* bad smb */ 4054 rc = -EIO; /* bad smb */
@@ -4246,7 +4261,7 @@ getDFSRetry:
4246 pSMB->ParameterCount = cpu_to_le16(params); 4261 pSMB->ParameterCount = cpu_to_le16(params);
4247 pSMB->TotalParameterCount = pSMB->ParameterCount; 4262 pSMB->TotalParameterCount = pSMB->ParameterCount;
4248 pSMB->MaxReferralLevel = cpu_to_le16(3); 4263 pSMB->MaxReferralLevel = cpu_to_le16(3);
4249 pSMB->hdr.smb_buf_length += byte_count; 4264 inc_rfc1001_len(pSMB, byte_count);
4250 pSMB->ByteCount = cpu_to_le16(byte_count); 4265 pSMB->ByteCount = cpu_to_le16(byte_count);
4251 4266
4252 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB, 4267 rc = SendReceive(xid, ses, (struct smb_hdr *) pSMB,
@@ -4258,13 +4273,13 @@ getDFSRetry:
4258 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4273 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4259 4274
4260 /* BB Also check if enough total bytes returned? */ 4275 /* BB Also check if enough total bytes returned? */
4261 if (rc || (pSMBr->ByteCount < 17)) { 4276 if (rc || get_bcc(&pSMBr->hdr) < 17) {
4262 rc = -EIO; /* bad smb */ 4277 rc = -EIO; /* bad smb */
4263 goto GetDFSRefExit; 4278 goto GetDFSRefExit;
4264 } 4279 }
4265 4280
4266 cFYI(1, "Decoding GetDFSRefer response BCC: %d Offset %d", 4281 cFYI(1, "Decoding GetDFSRefer response BCC: %d Offset %d",
4267 pSMBr->ByteCount, 4282 get_bcc(&pSMBr->hdr),
4268 le16_to_cpu(pSMBr->t2.DataOffset)); 4283 le16_to_cpu(pSMBr->t2.DataOffset));
4269 4284
4270 /* parse returned result into more usable form */ 4285 /* parse returned result into more usable form */
@@ -4320,7 +4335,7 @@ oldQFSInfoRetry:
4320 pSMB->Reserved3 = 0; 4335 pSMB->Reserved3 = 0;
4321 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4336 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4322 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_ALLOCATION); 4337 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_ALLOCATION);
4323 pSMB->hdr.smb_buf_length += byte_count; 4338 inc_rfc1001_len(pSMB, byte_count);
4324 pSMB->ByteCount = cpu_to_le16(byte_count); 4339 pSMB->ByteCount = cpu_to_le16(byte_count);
4325 4340
4326 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4341 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4330,12 +4345,12 @@ oldQFSInfoRetry:
4330 } else { /* decode response */ 4345 } else { /* decode response */
4331 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4346 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4332 4347
4333 if (rc || (pSMBr->ByteCount < 18)) 4348 if (rc || get_bcc(&pSMBr->hdr) < 18)
4334 rc = -EIO; /* bad smb */ 4349 rc = -EIO; /* bad smb */
4335 else { 4350 else {
4336 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4351 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
4337 cFYI(1, "qfsinf resp BCC: %d Offset %d", 4352 cFYI(1, "qfsinf resp BCC: %d Offset %d",
4338 pSMBr->ByteCount, data_offset); 4353 get_bcc(&pSMBr->hdr), data_offset);
4339 4354
4340 response_data = (FILE_SYSTEM_ALLOC_INFO *) 4355 response_data = (FILE_SYSTEM_ALLOC_INFO *)
4341 (((char *) &pSMBr->hdr.Protocol) + data_offset); 4356 (((char *) &pSMBr->hdr.Protocol) + data_offset);
@@ -4399,7 +4414,7 @@ QFSInfoRetry:
4399 pSMB->Reserved3 = 0; 4414 pSMB->Reserved3 = 0;
4400 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4415 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4401 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_SIZE_INFO); 4416 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_SIZE_INFO);
4402 pSMB->hdr.smb_buf_length += byte_count; 4417 inc_rfc1001_len(pSMB, byte_count);
4403 pSMB->ByteCount = cpu_to_le16(byte_count); 4418 pSMB->ByteCount = cpu_to_le16(byte_count);
4404 4419
4405 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4420 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4409,7 +4424,7 @@ QFSInfoRetry:
4409 } else { /* decode response */ 4424 } else { /* decode response */
4410 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4425 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4411 4426
4412 if (rc || (pSMBr->ByteCount < 24)) 4427 if (rc || get_bcc(&pSMBr->hdr) < 24)
4413 rc = -EIO; /* bad smb */ 4428 rc = -EIO; /* bad smb */
4414 else { 4429 else {
4415 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4430 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4479,7 +4494,7 @@ QFSAttributeRetry:
4479 pSMB->Reserved3 = 0; 4494 pSMB->Reserved3 = 0;
4480 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4495 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4481 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_ATTRIBUTE_INFO); 4496 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_ATTRIBUTE_INFO);
4482 pSMB->hdr.smb_buf_length += byte_count; 4497 inc_rfc1001_len(pSMB, byte_count);
4483 pSMB->ByteCount = cpu_to_le16(byte_count); 4498 pSMB->ByteCount = cpu_to_le16(byte_count);
4484 4499
4485 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4500 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4489,7 +4504,7 @@ QFSAttributeRetry:
4489 } else { /* decode response */ 4504 } else { /* decode response */
4490 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4505 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4491 4506
4492 if (rc || (pSMBr->ByteCount < 13)) { 4507 if (rc || get_bcc(&pSMBr->hdr) < 13) {
4493 /* BB also check if enough bytes returned */ 4508 /* BB also check if enough bytes returned */
4494 rc = -EIO; /* bad smb */ 4509 rc = -EIO; /* bad smb */
4495 } else { 4510 } else {
@@ -4550,7 +4565,7 @@ QFSDeviceRetry:
4550 pSMB->Reserved3 = 0; 4565 pSMB->Reserved3 = 0;
4551 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4566 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4552 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_DEVICE_INFO); 4567 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_FS_DEVICE_INFO);
4553 pSMB->hdr.smb_buf_length += byte_count; 4568 inc_rfc1001_len(pSMB, byte_count);
4554 pSMB->ByteCount = cpu_to_le16(byte_count); 4569 pSMB->ByteCount = cpu_to_le16(byte_count);
4555 4570
4556 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4571 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4560,7 +4575,8 @@ QFSDeviceRetry:
4560 } else { /* decode response */ 4575 } else { /* decode response */
4561 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4576 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4562 4577
4563 if (rc || (pSMBr->ByteCount < sizeof(FILE_SYSTEM_DEVICE_INFO))) 4578 if (rc || get_bcc(&pSMBr->hdr) <
4579 sizeof(FILE_SYSTEM_DEVICE_INFO))
4564 rc = -EIO; /* bad smb */ 4580 rc = -EIO; /* bad smb */
4565 else { 4581 else {
4566 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4582 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4619,7 +4635,7 @@ QFSUnixRetry:
4619 pSMB->Reserved3 = 0; 4635 pSMB->Reserved3 = 0;
4620 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4636 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4621 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_CIFS_UNIX_INFO); 4637 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_CIFS_UNIX_INFO);
4622 pSMB->hdr.smb_buf_length += byte_count; 4638 inc_rfc1001_len(pSMB, byte_count);
4623 pSMB->ByteCount = cpu_to_le16(byte_count); 4639 pSMB->ByteCount = cpu_to_le16(byte_count);
4624 4640
4625 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4641 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4629,7 +4645,7 @@ QFSUnixRetry:
4629 } else { /* decode response */ 4645 } else { /* decode response */
4630 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4646 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4631 4647
4632 if (rc || (pSMBr->ByteCount < 13)) { 4648 if (rc || get_bcc(&pSMBr->hdr) < 13) {
4633 rc = -EIO; /* bad smb */ 4649 rc = -EIO; /* bad smb */
4634 } else { 4650 } else {
4635 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4651 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4702,7 +4718,7 @@ SETFSUnixRetry:
4702 pSMB->ClientUnixMinor = cpu_to_le16(CIFS_UNIX_MINOR_VERSION); 4718 pSMB->ClientUnixMinor = cpu_to_le16(CIFS_UNIX_MINOR_VERSION);
4703 pSMB->ClientUnixCap = cpu_to_le64(cap); 4719 pSMB->ClientUnixCap = cpu_to_le64(cap);
4704 4720
4705 pSMB->hdr.smb_buf_length += byte_count; 4721 inc_rfc1001_len(pSMB, byte_count);
4706 pSMB->ByteCount = cpu_to_le16(byte_count); 4722 pSMB->ByteCount = cpu_to_le16(byte_count);
4707 4723
4708 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4724 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4764,7 +4780,7 @@ QFSPosixRetry:
4764 pSMB->Reserved3 = 0; 4780 pSMB->Reserved3 = 0;
4765 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION); 4781 pSMB->SubCommand = cpu_to_le16(TRANS2_QUERY_FS_INFORMATION);
4766 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_FS_INFO); 4782 pSMB->InformationLevel = cpu_to_le16(SMB_QUERY_POSIX_FS_INFO);
4767 pSMB->hdr.smb_buf_length += byte_count; 4783 inc_rfc1001_len(pSMB, byte_count);
4768 pSMB->ByteCount = cpu_to_le16(byte_count); 4784 pSMB->ByteCount = cpu_to_le16(byte_count);
4769 4785
4770 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4786 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4774,7 +4790,7 @@ QFSPosixRetry:
4774 } else { /* decode response */ 4790 } else { /* decode response */
4775 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 4791 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
4776 4792
4777 if (rc || (pSMBr->ByteCount < 13)) { 4793 if (rc || get_bcc(&pSMBr->hdr) < 13) {
4778 rc = -EIO; /* bad smb */ 4794 rc = -EIO; /* bad smb */
4779 } else { 4795 } else {
4780 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 4796 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset);
@@ -4890,7 +4906,7 @@ SetEOFRetry:
4890 pSMB->ParameterCount = cpu_to_le16(params); 4906 pSMB->ParameterCount = cpu_to_le16(params);
4891 pSMB->TotalParameterCount = pSMB->ParameterCount; 4907 pSMB->TotalParameterCount = pSMB->ParameterCount;
4892 pSMB->Reserved4 = 0; 4908 pSMB->Reserved4 = 0;
4893 pSMB->hdr.smb_buf_length += byte_count; 4909 inc_rfc1001_len(pSMB, byte_count);
4894 parm_data->FileSize = cpu_to_le64(size); 4910 parm_data->FileSize = cpu_to_le64(size);
4895 pSMB->ByteCount = cpu_to_le16(byte_count); 4911 pSMB->ByteCount = cpu_to_le16(byte_count);
4896 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 4912 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -4969,7 +4985,7 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4969 cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO); 4985 cpu_to_le16(SMB_SET_FILE_END_OF_FILE_INFO);
4970 } 4986 }
4971 pSMB->Reserved4 = 0; 4987 pSMB->Reserved4 = 0;
4972 pSMB->hdr.smb_buf_length += byte_count; 4988 inc_rfc1001_len(pSMB, byte_count);
4973 pSMB->ByteCount = cpu_to_le16(byte_count); 4989 pSMB->ByteCount = cpu_to_le16(byte_count);
4974 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 4990 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
4975 if (rc) { 4991 if (rc) {
@@ -5037,7 +5053,7 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5037 else 5053 else
5038 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO); 5054 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
5039 pSMB->Reserved4 = 0; 5055 pSMB->Reserved4 = 0;
5040 pSMB->hdr.smb_buf_length += byte_count; 5056 inc_rfc1001_len(pSMB, byte_count);
5041 pSMB->ByteCount = cpu_to_le16(byte_count); 5057 pSMB->ByteCount = cpu_to_le16(byte_count);
5042 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); 5058 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
5043 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5059 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
@@ -5096,7 +5112,7 @@ CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon,
5096 pSMB->Fid = fid; 5112 pSMB->Fid = fid;
5097 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO); 5113 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO);
5098 pSMB->Reserved4 = 0; 5114 pSMB->Reserved4 = 0;
5099 pSMB->hdr.smb_buf_length += byte_count; 5115 inc_rfc1001_len(pSMB, byte_count);
5100 pSMB->ByteCount = cpu_to_le16(byte_count); 5116 pSMB->ByteCount = cpu_to_le16(byte_count);
5101 *data_offset = delete_file ? 1 : 0; 5117 *data_offset = delete_file ? 1 : 0;
5102 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); 5118 rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0);
@@ -5169,7 +5185,7 @@ SetTimesRetry:
5169 else 5185 else
5170 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO); 5186 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_BASIC_INFO);
5171 pSMB->Reserved4 = 0; 5187 pSMB->Reserved4 = 0;
5172 pSMB->hdr.smb_buf_length += byte_count; 5188 inc_rfc1001_len(pSMB, byte_count);
5173 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO)); 5189 memcpy(data_offset, data, sizeof(FILE_BASIC_INFO));
5174 pSMB->ByteCount = cpu_to_le16(byte_count); 5190 pSMB->ByteCount = cpu_to_le16(byte_count);
5175 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5191 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -5221,7 +5237,7 @@ SetAttrLgcyRetry:
5221 } 5237 }
5222 pSMB->attr = cpu_to_le16(dos_attrs); 5238 pSMB->attr = cpu_to_le16(dos_attrs);
5223 pSMB->BufferFormat = 0x04; 5239 pSMB->BufferFormat = 0x04;
5224 pSMB->hdr.smb_buf_length += name_len + 1; 5240 inc_rfc1001_len(pSMB, name_len + 1);
5225 pSMB->ByteCount = cpu_to_le16(name_len + 1); 5241 pSMB->ByteCount = cpu_to_le16(name_len + 1);
5226 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5242 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5227 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5243 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5326,7 +5342,7 @@ CIFSSMBUnixSetFileInfo(const int xid, struct cifsTconInfo *tcon,
5326 pSMB->Fid = fid; 5342 pSMB->Fid = fid;
5327 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); 5343 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
5328 pSMB->Reserved4 = 0; 5344 pSMB->Reserved4 = 0;
5329 pSMB->hdr.smb_buf_length += byte_count; 5345 inc_rfc1001_len(pSMB, byte_count);
5330 pSMB->ByteCount = cpu_to_le16(byte_count); 5346 pSMB->ByteCount = cpu_to_le16(byte_count);
5331 5347
5332 cifs_fill_unix_set_info(data_offset, args); 5348 cifs_fill_unix_set_info(data_offset, args);
@@ -5402,7 +5418,7 @@ setPermsRetry:
5402 pSMB->TotalDataCount = pSMB->DataCount; 5418 pSMB->TotalDataCount = pSMB->DataCount;
5403 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC); 5419 pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_UNIX_BASIC);
5404 pSMB->Reserved4 = 0; 5420 pSMB->Reserved4 = 0;
5405 pSMB->hdr.smb_buf_length += byte_count; 5421 inc_rfc1001_len(pSMB, byte_count);
5406 5422
5407 cifs_fill_unix_set_info(data_offset, args); 5423 cifs_fill_unix_set_info(data_offset, args);
5408 5424
@@ -5418,79 +5434,6 @@ setPermsRetry:
5418 return rc; 5434 return rc;
5419} 5435}
5420 5436
5421int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
5422 const int notify_subdirs, const __u16 netfid,
5423 __u32 filter, struct file *pfile, int multishot,
5424 const struct nls_table *nls_codepage)
5425{
5426 int rc = 0;
5427 struct smb_com_transaction_change_notify_req *pSMB = NULL;
5428 struct smb_com_ntransaction_change_notify_rsp *pSMBr = NULL;
5429 struct dir_notify_req *dnotify_req;
5430 int bytes_returned;
5431
5432 cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
5433 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
5434 (void **) &pSMBr);
5435 if (rc)
5436 return rc;
5437
5438 pSMB->TotalParameterCount = 0 ;
5439 pSMB->TotalDataCount = 0;
5440 pSMB->MaxParameterCount = cpu_to_le32(2);
5441 /* BB find exact data count max from sess structure BB */
5442 pSMB->MaxDataCount = 0; /* same in little endian or be */
5443/* BB VERIFY verify which is correct for above BB */
5444 pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
5445 MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
5446
5447 pSMB->MaxSetupCount = 4;
5448 pSMB->Reserved = 0;
5449 pSMB->ParameterOffset = 0;
5450 pSMB->DataCount = 0;
5451 pSMB->DataOffset = 0;
5452 pSMB->SetupCount = 4; /* single byte does not need le conversion */
5453 pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_NOTIFY_CHANGE);
5454 pSMB->ParameterCount = pSMB->TotalParameterCount;
5455 if (notify_subdirs)
5456 pSMB->WatchTree = 1; /* one byte - no le conversion needed */
5457 pSMB->Reserved2 = 0;
5458 pSMB->CompletionFilter = cpu_to_le32(filter);
5459 pSMB->Fid = netfid; /* file handle always le */
5460 pSMB->ByteCount = 0;
5461
5462 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5463 (struct smb_hdr *)pSMBr, &bytes_returned,
5464 CIFS_ASYNC_OP);
5465 if (rc) {
5466 cFYI(1, "Error in Notify = %d", rc);
5467 } else {
5468 /* Add file to outstanding requests */
5469 /* BB change to kmem cache alloc */
5470 dnotify_req = kmalloc(
5471 sizeof(struct dir_notify_req),
5472 GFP_KERNEL);
5473 if (dnotify_req) {
5474 dnotify_req->Pid = pSMB->hdr.Pid;
5475 dnotify_req->PidHigh = pSMB->hdr.PidHigh;
5476 dnotify_req->Mid = pSMB->hdr.Mid;
5477 dnotify_req->Tid = pSMB->hdr.Tid;
5478 dnotify_req->Uid = pSMB->hdr.Uid;
5479 dnotify_req->netfid = netfid;
5480 dnotify_req->pfile = pfile;
5481 dnotify_req->filter = filter;
5482 dnotify_req->multishot = multishot;
5483 spin_lock(&GlobalMid_Lock);
5484 list_add_tail(&dnotify_req->lhead,
5485 &GlobalDnotifyReqList);
5486 spin_unlock(&GlobalMid_Lock);
5487 } else
5488 rc = -ENOMEM;
5489 }
5490 cifs_buf_release(pSMB);
5491 return rc;
5492}
5493
5494#ifdef CONFIG_CIFS_XATTR 5437#ifdef CONFIG_CIFS_XATTR
5495/* 5438/*
5496 * Do a path-based QUERY_ALL_EAS call and parse the result. This is a common 5439 * Do a path-based QUERY_ALL_EAS call and parse the result. This is a common
@@ -5560,7 +5503,7 @@ QAllEAsRetry:
5560 pSMB->ParameterCount = pSMB->TotalParameterCount; 5503 pSMB->ParameterCount = pSMB->TotalParameterCount;
5561 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_QUERY_ALL_EAS); 5504 pSMB->InformationLevel = cpu_to_le16(SMB_INFO_QUERY_ALL_EAS);
5562 pSMB->Reserved4 = 0; 5505 pSMB->Reserved4 = 0;
5563 pSMB->hdr.smb_buf_length += byte_count; 5506 inc_rfc1001_len(pSMB, byte_count);
5564 pSMB->ByteCount = cpu_to_le16(byte_count); 5507 pSMB->ByteCount = cpu_to_le16(byte_count);
5565 5508
5566 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5509 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -5576,7 +5519,7 @@ QAllEAsRetry:
5576 of these trans2 responses */ 5519 of these trans2 responses */
5577 5520
5578 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 5521 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
5579 if (rc || (pSMBr->ByteCount < 4)) { 5522 if (rc || get_bcc(&pSMBr->hdr) < 4) {
5580 rc = -EIO; /* bad smb */ 5523 rc = -EIO; /* bad smb */
5581 goto QAllEAsOut; 5524 goto QAllEAsOut;
5582 } 5525 }
@@ -5773,7 +5716,7 @@ SetEARetry:
5773 pSMB->ParameterCount = cpu_to_le16(params); 5716 pSMB->ParameterCount = cpu_to_le16(params);
5774 pSMB->TotalParameterCount = pSMB->ParameterCount; 5717 pSMB->TotalParameterCount = pSMB->ParameterCount;
5775 pSMB->Reserved4 = 0; 5718 pSMB->Reserved4 = 0;
5776 pSMB->hdr.smb_buf_length += byte_count; 5719 inc_rfc1001_len(pSMB, byte_count);
5777 pSMB->ByteCount = cpu_to_le16(byte_count); 5720 pSMB->ByteCount = cpu_to_le16(byte_count);
5778 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB, 5721 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5779 (struct smb_hdr *) pSMBr, &bytes_returned, 0); 5722 (struct smb_hdr *) pSMBr, &bytes_returned, 0);
@@ -5787,5 +5730,99 @@ SetEARetry:
5787 5730
5788 return rc; 5731 return rc;
5789} 5732}
5790
5791#endif 5733#endif
5734
5735#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* BB unused temporarily */
5736/*
5737 * Years ago the kernel added a "dnotify" function for Samba server,
5738 * to allow network clients (such as Windows) to display updated
5739 * lists of files in directory listings automatically when
5740 * files are added by one user when another user has the
5741 * same directory open on their desktop. The Linux cifs kernel
5742 * client hooked into the kernel side of this interface for
5743 * the same reason, but ironically when the VFS moved from
5744 * "dnotify" to "inotify" it became harder to plug in Linux
5745 * network file system clients (the most obvious use case
5746 * for notify interfaces is when multiple users can update
5747 * the contents of the same directory - exactly what network
5748 * file systems can do) although the server (Samba) could
5749 * still use it. For the short term we leave the worker
5750 * function ifdeffed out (below) until inotify is fixed
5751 * in the VFS to make it easier to plug in network file
5752 * system clients. If inotify turns out to be permanently
5753 * incompatible for network fs clients, we could instead simply
5754 * expose this config flag by adding a future cifs (and smb2) notify ioctl.
5755 */
5756int CIFSSMBNotify(const int xid, struct cifsTconInfo *tcon,
5757 const int notify_subdirs, const __u16 netfid,
5758 __u32 filter, struct file *pfile, int multishot,
5759 const struct nls_table *nls_codepage)
5760{
5761 int rc = 0;
5762 struct smb_com_transaction_change_notify_req *pSMB = NULL;
5763 struct smb_com_ntransaction_change_notify_rsp *pSMBr = NULL;
5764 struct dir_notify_req *dnotify_req;
5765 int bytes_returned;
5766
5767 cFYI(1, "In CIFSSMBNotify for file handle %d", (int)netfid);
5768 rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
5769 (void **) &pSMBr);
5770 if (rc)
5771 return rc;
5772
5773 pSMB->TotalParameterCount = 0 ;
5774 pSMB->TotalDataCount = 0;
5775 pSMB->MaxParameterCount = cpu_to_le32(2);
5776 /* BB find exact data count max from sess structure BB */
5777 pSMB->MaxDataCount = 0; /* same in little endian or be */
5778/* BB VERIFY verify which is correct for above BB */
5779 pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
5780 MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
5781
5782 pSMB->MaxSetupCount = 4;
5783 pSMB->Reserved = 0;
5784 pSMB->ParameterOffset = 0;
5785 pSMB->DataCount = 0;
5786 pSMB->DataOffset = 0;
5787 pSMB->SetupCount = 4; /* single byte does not need le conversion */
5788 pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_NOTIFY_CHANGE);
5789 pSMB->ParameterCount = pSMB->TotalParameterCount;
5790 if (notify_subdirs)
5791 pSMB->WatchTree = 1; /* one byte - no le conversion needed */
5792 pSMB->Reserved2 = 0;
5793 pSMB->CompletionFilter = cpu_to_le32(filter);
5794 pSMB->Fid = netfid; /* file handle always le */
5795 pSMB->ByteCount = 0;
5796
5797 rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
5798 (struct smb_hdr *)pSMBr, &bytes_returned,
5799 CIFS_ASYNC_OP);
5800 if (rc) {
5801 cFYI(1, "Error in Notify = %d", rc);
5802 } else {
5803 /* Add file to outstanding requests */
5804 /* BB change to kmem cache alloc */
5805 dnotify_req = kmalloc(
5806 sizeof(struct dir_notify_req),
5807 GFP_KERNEL);
5808 if (dnotify_req) {
5809 dnotify_req->Pid = pSMB->hdr.Pid;
5810 dnotify_req->PidHigh = pSMB->hdr.PidHigh;
5811 dnotify_req->Mid = pSMB->hdr.Mid;
5812 dnotify_req->Tid = pSMB->hdr.Tid;
5813 dnotify_req->Uid = pSMB->hdr.Uid;
5814 dnotify_req->netfid = netfid;
5815 dnotify_req->pfile = pfile;
5816 dnotify_req->filter = filter;
5817 dnotify_req->multishot = multishot;
5818 spin_lock(&GlobalMid_Lock);
5819 list_add_tail(&dnotify_req->lhead,
5820 &GlobalDnotifyReqList);
5821 spin_unlock(&GlobalMid_Lock);
5822 } else
5823 rc = -ENOMEM;
5824 }
5825 cifs_buf_release(pSMB);
5826 return rc;
5827}
5828#endif /* was needed for dnotify, and will be needed for inotify when VFS fix */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 277262a8e82f..da284e3cb653 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -102,6 +102,7 @@ struct smb_vol {
102 bool fsc:1; /* enable fscache */ 102 bool fsc:1; /* enable fscache */
103 bool mfsymlinks:1; /* use Minshall+French Symlinks */ 103 bool mfsymlinks:1; /* use Minshall+French Symlinks */
104 bool multiuser:1; 104 bool multiuser:1;
105 bool use_smb2:1; /* force smb2 use on mount instead of cifs */
105 unsigned int rsize; 106 unsigned int rsize;
106 unsigned int wsize; 107 unsigned int wsize;
107 bool sockopt_tcp_nodelay:1; 108 bool sockopt_tcp_nodelay:1;
@@ -316,19 +317,19 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
316 put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount); 317 put_unaligned_le16(total_in_buf, &pSMBt->t2_rsp.DataCount);
317 318
318 /* fix up the BCC */ 319 /* fix up the BCC */
319 byte_count = get_bcc_le(pTargetSMB); 320 byte_count = get_bcc(pTargetSMB);
320 byte_count += total_in_buf2; 321 byte_count += total_in_buf2;
321 /* is the result too big for the field? */ 322 /* is the result too big for the field? */
322 if (byte_count > USHRT_MAX) 323 if (byte_count > USHRT_MAX)
323 return -EPROTO; 324 return -EPROTO;
324 put_bcc_le(byte_count, pTargetSMB); 325 put_bcc(byte_count, pTargetSMB);
325 326
326 byte_count = pTargetSMB->smb_buf_length; 327 byte_count = be32_to_cpu(pTargetSMB->smb_buf_length);
327 byte_count += total_in_buf2; 328 byte_count += total_in_buf2;
328 /* don't allow buffer to overflow */ 329 /* don't allow buffer to overflow */
329 if (byte_count > CIFSMaxBufSize) 330 if (byte_count > CIFSMaxBufSize)
330 return -ENOBUFS; 331 return -ENOBUFS;
331 pTargetSMB->smb_buf_length = byte_count; 332 pTargetSMB->smb_buf_length = cpu_to_be32(byte_count);
332 333
333 memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2); 334 memcpy(data_area_of_target, data_area_of_buf2, total_in_buf2);
334 335
@@ -495,8 +496,7 @@ incomplete_rcv:
495 /* Note that FC 1001 length is big endian on the wire, 496 /* Note that FC 1001 length is big endian on the wire,
496 but we convert it here so it is always manipulated 497 but we convert it here so it is always manipulated
497 as host byte order */ 498 as host byte order */
498 pdu_length = be32_to_cpu((__force __be32)smb_buffer->smb_buf_length); 499 pdu_length = be32_to_cpu(smb_buffer->smb_buf_length);
499 smb_buffer->smb_buf_length = pdu_length;
500 500
501 cFYI(1, "rfc1002 length 0x%x", pdu_length+4); 501 cFYI(1, "rfc1002 length 0x%x", pdu_length+4);
502 502
@@ -735,7 +735,7 @@ multi_t2_fnd:
735 sock_release(csocket); 735 sock_release(csocket);
736 server->ssocket = NULL; 736 server->ssocket = NULL;
737 } 737 }
738 /* buffer usuallly freed in free_mid - need to free it here on exit */ 738 /* buffer usually freed in free_mid - need to free it here on exit */
739 cifs_buf_release(bigbuf); 739 cifs_buf_release(bigbuf);
740 if (smallbuf) /* no sense logging a debug message if NULL */ 740 if (smallbuf) /* no sense logging a debug message if NULL */
741 cifs_small_buf_release(smallbuf); 741 cifs_small_buf_release(smallbuf);
@@ -818,10 +818,11 @@ extract_hostname(const char *unc)
818} 818}
819 819
820static int 820static int
821cifs_parse_mount_options(char *options, const char *devname, 821cifs_parse_mount_options(const char *mountdata, const char *devname,
822 struct smb_vol *vol) 822 struct smb_vol *vol)
823{ 823{
824 char *value, *data, *end; 824 char *value, *data, *end;
825 char *mountdata_copy, *options;
825 unsigned int temp_len, i, j; 826 unsigned int temp_len, i, j;
826 char separator[2]; 827 char separator[2];
827 short int override_uid = -1; 828 short int override_uid = -1;
@@ -861,9 +862,14 @@ cifs_parse_mount_options(char *options, const char *devname,
861 862
862 vol->actimeo = CIFS_DEF_ACTIMEO; 863 vol->actimeo = CIFS_DEF_ACTIMEO;
863 864
864 if (!options) 865 if (!mountdata)
865 return 1; 866 goto cifs_parse_mount_err;
867
868 mountdata_copy = kstrndup(mountdata, PAGE_SIZE, GFP_KERNEL);
869 if (!mountdata_copy)
870 goto cifs_parse_mount_err;
866 871
872 options = mountdata_copy;
867 end = options + strlen(options); 873 end = options + strlen(options);
868 if (strncmp(options, "sep=", 4) == 0) { 874 if (strncmp(options, "sep=", 4) == 0) {
869 if (options[4] != 0) { 875 if (options[4] != 0) {
@@ -889,17 +895,22 @@ cifs_parse_mount_options(char *options, const char *devname,
889 if (!value) { 895 if (!value) {
890 printk(KERN_WARNING 896 printk(KERN_WARNING
891 "CIFS: invalid or missing username\n"); 897 "CIFS: invalid or missing username\n");
892 return 1; /* needs_arg; */ 898 goto cifs_parse_mount_err;
893 } else if (!*value) { 899 } else if (!*value) {
894 /* null user, ie anonymous, authentication */ 900 /* null user, ie anonymous, authentication */
895 vol->nullauth = 1; 901 vol->nullauth = 1;
896 } 902 }
897 if (strnlen(value, MAX_USERNAME_SIZE) < 903 if (strnlen(value, MAX_USERNAME_SIZE) <
898 MAX_USERNAME_SIZE) { 904 MAX_USERNAME_SIZE) {
899 vol->username = value; 905 vol->username = kstrdup(value, GFP_KERNEL);
906 if (!vol->username) {
907 printk(KERN_WARNING "CIFS: no memory "
908 "for username\n");
909 goto cifs_parse_mount_err;
910 }
900 } else { 911 } else {
901 printk(KERN_WARNING "CIFS: username too long\n"); 912 printk(KERN_WARNING "CIFS: username too long\n");
902 return 1; 913 goto cifs_parse_mount_err;
903 } 914 }
904 } else if (strnicmp(data, "pass", 4) == 0) { 915 } else if (strnicmp(data, "pass", 4) == 0) {
905 if (!value) { 916 if (!value) {
@@ -963,7 +974,7 @@ cifs_parse_mount_options(char *options, const char *devname,
963 if (vol->password == NULL) { 974 if (vol->password == NULL) {
964 printk(KERN_WARNING "CIFS: no memory " 975 printk(KERN_WARNING "CIFS: no memory "
965 "for password\n"); 976 "for password\n");
966 return 1; 977 goto cifs_parse_mount_err;
967 } 978 }
968 for (i = 0, j = 0; i < temp_len; i++, j++) { 979 for (i = 0, j = 0; i < temp_len; i++, j++) {
969 vol->password[j] = value[i]; 980 vol->password[j] = value[i];
@@ -979,7 +990,7 @@ cifs_parse_mount_options(char *options, const char *devname,
979 if (vol->password == NULL) { 990 if (vol->password == NULL) {
980 printk(KERN_WARNING "CIFS: no memory " 991 printk(KERN_WARNING "CIFS: no memory "
981 "for password\n"); 992 "for password\n");
982 return 1; 993 goto cifs_parse_mount_err;
983 } 994 }
984 strcpy(vol->password, value); 995 strcpy(vol->password, value);
985 } 996 }
@@ -989,11 +1000,16 @@ cifs_parse_mount_options(char *options, const char *devname,
989 vol->UNCip = NULL; 1000 vol->UNCip = NULL;
990 } else if (strnlen(value, INET6_ADDRSTRLEN) < 1001 } else if (strnlen(value, INET6_ADDRSTRLEN) <
991 INET6_ADDRSTRLEN) { 1002 INET6_ADDRSTRLEN) {
992 vol->UNCip = value; 1003 vol->UNCip = kstrdup(value, GFP_KERNEL);
1004 if (!vol->UNCip) {
1005 printk(KERN_WARNING "CIFS: no memory "
1006 "for UNC IP\n");
1007 goto cifs_parse_mount_err;
1008 }
993 } else { 1009 } else {
994 printk(KERN_WARNING "CIFS: ip address " 1010 printk(KERN_WARNING "CIFS: ip address "
995 "too long\n"); 1011 "too long\n");
996 return 1; 1012 goto cifs_parse_mount_err;
997 } 1013 }
998 } else if (strnicmp(data, "sec", 3) == 0) { 1014 } else if (strnicmp(data, "sec", 3) == 0) {
999 if (!value || !*value) { 1015 if (!value || !*value) {
@@ -1006,7 +1022,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1006 /* vol->secFlg |= CIFSSEC_MUST_SEAL | 1022 /* vol->secFlg |= CIFSSEC_MUST_SEAL |
1007 CIFSSEC_MAY_KRB5; */ 1023 CIFSSEC_MAY_KRB5; */
1008 cERROR(1, "Krb5 cifs privacy not supported"); 1024 cERROR(1, "Krb5 cifs privacy not supported");
1009 return 1; 1025 goto cifs_parse_mount_err;
1010 } else if (strnicmp(value, "krb5", 4) == 0) { 1026 } else if (strnicmp(value, "krb5", 4) == 0) {
1011 vol->secFlg |= CIFSSEC_MAY_KRB5; 1027 vol->secFlg |= CIFSSEC_MAY_KRB5;
1012 } else if (strnicmp(value, "ntlmsspi", 8) == 0) { 1028 } else if (strnicmp(value, "ntlmsspi", 8) == 0) {
@@ -1036,7 +1052,23 @@ cifs_parse_mount_options(char *options, const char *devname,
1036 vol->nullauth = 1; 1052 vol->nullauth = 1;
1037 } else { 1053 } else {
1038 cERROR(1, "bad security option: %s", value); 1054 cERROR(1, "bad security option: %s", value);
1039 return 1; 1055 goto cifs_parse_mount_err;
1056 }
1057 } else if (strnicmp(data, "vers", 3) == 0) {
1058 if (!value || !*value) {
1059 cERROR(1, "no protocol version specified"
1060 " after vers= mount option");
1061 } else if ((strnicmp(value, "cifs", 4) == 0) ||
1062 (strnicmp(value, "1", 1) == 0)) {
1063 /* this is the default */
1064 continue;
1065 } else if ((strnicmp(value, "smb2", 4) == 0) ||
1066 (strnicmp(value, "2", 1) == 0)) {
1067#ifdef CONFIG_CIFS_SMB2
1068 vol->use_smb2 = true;
1069#else
1070 cERROR(1, "smb2 support not enabled");
1071#endif /* CONFIG_CIFS_SMB2 */
1040 } 1072 }
1041 } else if ((strnicmp(data, "unc", 3) == 0) 1073 } else if ((strnicmp(data, "unc", 3) == 0)
1042 || (strnicmp(data, "target", 6) == 0) 1074 || (strnicmp(data, "target", 6) == 0)
@@ -1044,12 +1076,12 @@ cifs_parse_mount_options(char *options, const char *devname,
1044 if (!value || !*value) { 1076 if (!value || !*value) {
1045 printk(KERN_WARNING "CIFS: invalid path to " 1077 printk(KERN_WARNING "CIFS: invalid path to "
1046 "network resource\n"); 1078 "network resource\n");
1047 return 1; /* needs_arg; */ 1079 goto cifs_parse_mount_err;
1048 } 1080 }
1049 if ((temp_len = strnlen(value, 300)) < 300) { 1081 if ((temp_len = strnlen(value, 300)) < 300) {
1050 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL); 1082 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
1051 if (vol->UNC == NULL) 1083 if (vol->UNC == NULL)
1052 return 1; 1084 goto cifs_parse_mount_err;
1053 strcpy(vol->UNC, value); 1085 strcpy(vol->UNC, value);
1054 if (strncmp(vol->UNC, "//", 2) == 0) { 1086 if (strncmp(vol->UNC, "//", 2) == 0) {
1055 vol->UNC[0] = '\\'; 1087 vol->UNC[0] = '\\';
@@ -1058,27 +1090,32 @@ cifs_parse_mount_options(char *options, const char *devname,
1058 printk(KERN_WARNING 1090 printk(KERN_WARNING
1059 "CIFS: UNC Path does not begin " 1091 "CIFS: UNC Path does not begin "
1060 "with // or \\\\ \n"); 1092 "with // or \\\\ \n");
1061 return 1; 1093 goto cifs_parse_mount_err;
1062 } 1094 }
1063 } else { 1095 } else {
1064 printk(KERN_WARNING "CIFS: UNC name too long\n"); 1096 printk(KERN_WARNING "CIFS: UNC name too long\n");
1065 return 1; 1097 goto cifs_parse_mount_err;
1066 } 1098 }
1067 } else if ((strnicmp(data, "domain", 3) == 0) 1099 } else if ((strnicmp(data, "domain", 3) == 0)
1068 || (strnicmp(data, "workgroup", 5) == 0)) { 1100 || (strnicmp(data, "workgroup", 5) == 0)) {
1069 if (!value || !*value) { 1101 if (!value || !*value) {
1070 printk(KERN_WARNING "CIFS: invalid domain name\n"); 1102 printk(KERN_WARNING "CIFS: invalid domain name\n");
1071 return 1; /* needs_arg; */ 1103 goto cifs_parse_mount_err;
1072 } 1104 }
1073 /* BB are there cases in which a comma can be valid in 1105 /* BB are there cases in which a comma can be valid in
1074 a domain name and need special handling? */ 1106 a domain name and need special handling? */
1075 if (strnlen(value, 256) < 256) { 1107 if (strnlen(value, 256) < 256) {
1076 vol->domainname = value; 1108 vol->domainname = kstrdup(value, GFP_KERNEL);
1109 if (!vol->domainname) {
1110 printk(KERN_WARNING "CIFS: no memory "
1111 "for domainname\n");
1112 goto cifs_parse_mount_err;
1113 }
1077 cFYI(1, "Domain name set"); 1114 cFYI(1, "Domain name set");
1078 } else { 1115 } else {
1079 printk(KERN_WARNING "CIFS: domain name too " 1116 printk(KERN_WARNING "CIFS: domain name too "
1080 "long\n"); 1117 "long\n");
1081 return 1; 1118 goto cifs_parse_mount_err;
1082 } 1119 }
1083 } else if (strnicmp(data, "srcaddr", 7) == 0) { 1120 } else if (strnicmp(data, "srcaddr", 7) == 0) {
1084 vol->srcaddr.ss_family = AF_UNSPEC; 1121 vol->srcaddr.ss_family = AF_UNSPEC;
@@ -1086,7 +1123,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1086 if (!value || !*value) { 1123 if (!value || !*value) {
1087 printk(KERN_WARNING "CIFS: srcaddr value" 1124 printk(KERN_WARNING "CIFS: srcaddr value"
1088 " not specified.\n"); 1125 " not specified.\n");
1089 return 1; /* needs_arg; */ 1126 goto cifs_parse_mount_err;
1090 } 1127 }
1091 i = cifs_convert_address((struct sockaddr *)&vol->srcaddr, 1128 i = cifs_convert_address((struct sockaddr *)&vol->srcaddr,
1092 value, strlen(value)); 1129 value, strlen(value));
@@ -1094,20 +1131,20 @@ cifs_parse_mount_options(char *options, const char *devname,
1094 printk(KERN_WARNING "CIFS: Could not parse" 1131 printk(KERN_WARNING "CIFS: Could not parse"
1095 " srcaddr: %s\n", 1132 " srcaddr: %s\n",
1096 value); 1133 value);
1097 return 1; 1134 goto cifs_parse_mount_err;
1098 } 1135 }
1099 } else if (strnicmp(data, "prefixpath", 10) == 0) { 1136 } else if (strnicmp(data, "prefixpath", 10) == 0) {
1100 if (!value || !*value) { 1137 if (!value || !*value) {
1101 printk(KERN_WARNING 1138 printk(KERN_WARNING
1102 "CIFS: invalid path prefix\n"); 1139 "CIFS: invalid path prefix\n");
1103 return 1; /* needs_argument */ 1140 goto cifs_parse_mount_err;
1104 } 1141 }
1105 if ((temp_len = strnlen(value, 1024)) < 1024) { 1142 if ((temp_len = strnlen(value, 1024)) < 1024) {
1106 if (value[0] != '/') 1143 if (value[0] != '/')
1107 temp_len++; /* missing leading slash */ 1144 temp_len++; /* missing leading slash */
1108 vol->prepath = kmalloc(temp_len+1, GFP_KERNEL); 1145 vol->prepath = kmalloc(temp_len+1, GFP_KERNEL);
1109 if (vol->prepath == NULL) 1146 if (vol->prepath == NULL)
1110 return 1; 1147 goto cifs_parse_mount_err;
1111 if (value[0] != '/') { 1148 if (value[0] != '/') {
1112 vol->prepath[0] = '/'; 1149 vol->prepath[0] = '/';
1113 strcpy(vol->prepath+1, value); 1150 strcpy(vol->prepath+1, value);
@@ -1116,24 +1153,33 @@ cifs_parse_mount_options(char *options, const char *devname,
1116 cFYI(1, "prefix path %s", vol->prepath); 1153 cFYI(1, "prefix path %s", vol->prepath);
1117 } else { 1154 } else {
1118 printk(KERN_WARNING "CIFS: prefix too long\n"); 1155 printk(KERN_WARNING "CIFS: prefix too long\n");
1119 return 1; 1156 goto cifs_parse_mount_err;
1120 } 1157 }
1121 } else if (strnicmp(data, "iocharset", 9) == 0) { 1158 } else if (strnicmp(data, "iocharset", 9) == 0) {
1122 if (!value || !*value) { 1159 if (!value || !*value) {
1123 printk(KERN_WARNING "CIFS: invalid iocharset " 1160 printk(KERN_WARNING "CIFS: invalid iocharset "
1124 "specified\n"); 1161 "specified\n");
1125 return 1; /* needs_arg; */ 1162 goto cifs_parse_mount_err;
1126 } 1163 }
1127 if (strnlen(value, 65) < 65) { 1164 if (strnlen(value, 65) < 65) {
1128 if (strnicmp(value, "default", 7)) 1165 if (strnicmp(value, "default", 7)) {
1129 vol->iocharset = value; 1166 vol->iocharset = kstrdup(value,
1167 GFP_KERNEL);
1168
1169 if (!vol->iocharset) {
1170 printk(KERN_WARNING "CIFS: no "
1171 "memory for"
1172 "charset\n");
1173 goto cifs_parse_mount_err;
1174 }
1175 }
1130 /* if iocharset not set then load_nls_default 1176 /* if iocharset not set then load_nls_default
1131 is used by caller */ 1177 is used by caller */
1132 cFYI(1, "iocharset set to %s", value); 1178 cFYI(1, "iocharset set to %s", value);
1133 } else { 1179 } else {
1134 printk(KERN_WARNING "CIFS: iocharset name " 1180 printk(KERN_WARNING "CIFS: iocharset name "
1135 "too long.\n"); 1181 "too long.\n");
1136 return 1; 1182 goto cifs_parse_mount_err;
1137 } 1183 }
1138 } else if (!strnicmp(data, "uid", 3) && value && *value) { 1184 } else if (!strnicmp(data, "uid", 3) && value && *value) {
1139 vol->linux_uid = simple_strtoul(value, &value, 0); 1185 vol->linux_uid = simple_strtoul(value, &value, 0);
@@ -1246,7 +1292,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1246 if (vol->actimeo > CIFS_MAX_ACTIMEO) { 1292 if (vol->actimeo > CIFS_MAX_ACTIMEO) {
1247 cERROR(1, "CIFS: attribute cache" 1293 cERROR(1, "CIFS: attribute cache"
1248 "timeout too large"); 1294 "timeout too large");
1249 return 1; 1295 goto cifs_parse_mount_err;
1250 } 1296 }
1251 } 1297 }
1252 } else if (strnicmp(data, "credentials", 4) == 0) { 1298 } else if (strnicmp(data, "credentials", 4) == 0) {
@@ -1390,7 +1436,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1390#ifndef CONFIG_CIFS_FSCACHE 1436#ifndef CONFIG_CIFS_FSCACHE
1391 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE" 1437 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE"
1392 "kernel config option set"); 1438 "kernel config option set");
1393 return 1; 1439 goto cifs_parse_mount_err;
1394#endif 1440#endif
1395 vol->fsc = true; 1441 vol->fsc = true;
1396 } else if (strnicmp(data, "mfsymlinks", 10) == 0) { 1442 } else if (strnicmp(data, "mfsymlinks", 10) == 0) {
@@ -1405,12 +1451,12 @@ cifs_parse_mount_options(char *options, const char *devname,
1405 if (devname == NULL) { 1451 if (devname == NULL) {
1406 printk(KERN_WARNING "CIFS: Missing UNC name for mount " 1452 printk(KERN_WARNING "CIFS: Missing UNC name for mount "
1407 "target\n"); 1453 "target\n");
1408 return 1; 1454 goto cifs_parse_mount_err;
1409 } 1455 }
1410 if ((temp_len = strnlen(devname, 300)) < 300) { 1456 if ((temp_len = strnlen(devname, 300)) < 300) {
1411 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL); 1457 vol->UNC = kmalloc(temp_len+1, GFP_KERNEL);
1412 if (vol->UNC == NULL) 1458 if (vol->UNC == NULL)
1413 return 1; 1459 goto cifs_parse_mount_err;
1414 strcpy(vol->UNC, devname); 1460 strcpy(vol->UNC, devname);
1415 if (strncmp(vol->UNC, "//", 2) == 0) { 1461 if (strncmp(vol->UNC, "//", 2) == 0) {
1416 vol->UNC[0] = '\\'; 1462 vol->UNC[0] = '\\';
@@ -1418,21 +1464,21 @@ cifs_parse_mount_options(char *options, const char *devname,
1418 } else if (strncmp(vol->UNC, "\\\\", 2) != 0) { 1464 } else if (strncmp(vol->UNC, "\\\\", 2) != 0) {
1419 printk(KERN_WARNING "CIFS: UNC Path does not " 1465 printk(KERN_WARNING "CIFS: UNC Path does not "
1420 "begin with // or \\\\ \n"); 1466 "begin with // or \\\\ \n");
1421 return 1; 1467 goto cifs_parse_mount_err;
1422 } 1468 }
1423 value = strpbrk(vol->UNC+2, "/\\"); 1469 value = strpbrk(vol->UNC+2, "/\\");
1424 if (value) 1470 if (value)
1425 *value = '\\'; 1471 *value = '\\';
1426 } else { 1472 } else {
1427 printk(KERN_WARNING "CIFS: UNC name too long\n"); 1473 printk(KERN_WARNING "CIFS: UNC name too long\n");
1428 return 1; 1474 goto cifs_parse_mount_err;
1429 } 1475 }
1430 } 1476 }
1431 1477
1432 if (vol->multiuser && !(vol->secFlg & CIFSSEC_MAY_KRB5)) { 1478 if (vol->multiuser && !(vol->secFlg & CIFSSEC_MAY_KRB5)) {
1433 cERROR(1, "Multiuser mounts currently require krb5 " 1479 cERROR(1, "Multiuser mounts currently require krb5 "
1434 "authentication!"); 1480 "authentication!");
1435 return 1; 1481 goto cifs_parse_mount_err;
1436 } 1482 }
1437 1483
1438 if (vol->UNCip == NULL) 1484 if (vol->UNCip == NULL)
@@ -1450,7 +1496,12 @@ cifs_parse_mount_options(char *options, const char *devname,
1450 printk(KERN_NOTICE "CIFS: ignoring forcegid mount option " 1496 printk(KERN_NOTICE "CIFS: ignoring forcegid mount option "
1451 "specified with no gid= option.\n"); 1497 "specified with no gid= option.\n");
1452 1498
1499 kfree(mountdata_copy);
1453 return 0; 1500 return 0;
1501
1502cifs_parse_mount_err:
1503 kfree(mountdata_copy);
1504 return 1;
1454} 1505}
1455 1506
1456/** Returns true if srcaddr isn't specified and rhs isn't 1507/** Returns true if srcaddr isn't specified and rhs isn't
@@ -2280,7 +2331,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
2280 smb_buf = (struct smb_hdr *)ses_init_buf; 2331 smb_buf = (struct smb_hdr *)ses_init_buf;
2281 2332
2282 /* sizeof RFC1002_SESSION_REQUEST with no scope */ 2333 /* sizeof RFC1002_SESSION_REQUEST with no scope */
2283 smb_buf->smb_buf_length = 0x81000044; 2334 smb_buf->smb_buf_length = cpu_to_be32(0x81000044);
2284 rc = smb_send(server, smb_buf, 0x44); 2335 rc = smb_send(server, smb_buf, 0x44);
2285 kfree(ses_init_buf); 2336 kfree(ses_init_buf);
2286 /* 2337 /*
@@ -2691,8 +2742,12 @@ cleanup_volume_info(struct smb_vol **pvolume_info)
2691 return; 2742 return;
2692 2743
2693 volume_info = *pvolume_info; 2744 volume_info = *pvolume_info;
2745 kfree(volume_info->username);
2694 kzfree(volume_info->password); 2746 kzfree(volume_info->password);
2695 kfree(volume_info->UNC); 2747 kfree(volume_info->UNC);
2748 kfree(volume_info->UNCip);
2749 kfree(volume_info->domainname);
2750 kfree(volume_info->iocharset);
2696 kfree(volume_info->prepath); 2751 kfree(volume_info->prepath);
2697 kfree(volume_info); 2752 kfree(volume_info);
2698 *pvolume_info = NULL; 2753 *pvolume_info = NULL;
@@ -2729,11 +2784,65 @@ build_unc_path_to_root(const struct smb_vol *volume_info,
2729 full_path[unc_len + cifs_sb->prepathlen] = 0; /* add trailing null */ 2784 full_path[unc_len + cifs_sb->prepathlen] = 0; /* add trailing null */
2730 return full_path; 2785 return full_path;
2731} 2786}
2787
2788/*
2789 * Perform a dfs referral query for a share and (optionally) prefix
2790 *
2791 * If a referral is found, cifs_sb->mountdata will be (re-)allocated
2792 * to a string containing updated options for the submount. Otherwise it
2793 * will be left untouched.
2794 *
2795 * Returns the rc from get_dfs_path to the caller, which can be used to
2796 * determine whether there were referrals.
2797 */
2798static int
2799expand_dfs_referral(int xid, struct cifsSesInfo *pSesInfo,
2800 struct smb_vol *volume_info, struct cifs_sb_info *cifs_sb,
2801 int check_prefix)
2802{
2803 int rc;
2804 unsigned int num_referrals = 0;
2805 struct dfs_info3_param *referrals = NULL;
2806 char *full_path = NULL, *ref_path = NULL, *mdata = NULL;
2807
2808 full_path = build_unc_path_to_root(volume_info, cifs_sb);
2809 if (IS_ERR(full_path))
2810 return PTR_ERR(full_path);
2811
2812 /* For DFS paths, skip the first '\' of the UNC */
2813 ref_path = check_prefix ? full_path + 1 : volume_info->UNC + 1;
2814
2815 rc = get_dfs_path(xid, pSesInfo , ref_path, cifs_sb->local_nls,
2816 &num_referrals, &referrals,
2817 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
2818
2819 if (!rc && num_referrals > 0) {
2820 char *fake_devname = NULL;
2821
2822 mdata = cifs_compose_mount_options(cifs_sb->mountdata,
2823 full_path + 1, referrals,
2824 &fake_devname);
2825
2826 free_dfs_info_array(referrals, num_referrals);
2827 kfree(fake_devname);
2828
2829 if (cifs_sb->mountdata != NULL)
2830 kfree(cifs_sb->mountdata);
2831
2832 if (IS_ERR(mdata)) {
2833 rc = PTR_ERR(mdata);
2834 mdata = NULL;
2835 }
2836 cifs_sb->mountdata = mdata;
2837 }
2838 kfree(full_path);
2839 return rc;
2840}
2732#endif 2841#endif
2733 2842
2734int 2843int
2735cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 2844cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2736 char *mount_data_global, const char *devname) 2845 const char *devname)
2737{ 2846{
2738 int rc; 2847 int rc;
2739 int xid; 2848 int xid;
@@ -2742,13 +2851,20 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2742 struct cifsTconInfo *tcon; 2851 struct cifsTconInfo *tcon;
2743 struct TCP_Server_Info *srvTcp; 2852 struct TCP_Server_Info *srvTcp;
2744 char *full_path; 2853 char *full_path;
2745 char *mount_data = mount_data_global;
2746 struct tcon_link *tlink; 2854 struct tcon_link *tlink;
2747#ifdef CONFIG_CIFS_DFS_UPCALL 2855#ifdef CONFIG_CIFS_DFS_UPCALL
2748 struct dfs_info3_param *referrals = NULL;
2749 unsigned int num_referrals = 0;
2750 int referral_walks_count = 0; 2856 int referral_walks_count = 0;
2751try_mount_again: 2857try_mount_again:
2858 /* cleanup activities if we're chasing a referral */
2859 if (referral_walks_count) {
2860 if (tcon)
2861 cifs_put_tcon(tcon);
2862 else if (pSesInfo)
2863 cifs_put_smb_ses(pSesInfo);
2864
2865 cleanup_volume_info(&volume_info);
2866 FreeXid(xid);
2867 }
2752#endif 2868#endif
2753 rc = 0; 2869 rc = 0;
2754 tcon = NULL; 2870 tcon = NULL;
@@ -2765,7 +2881,8 @@ try_mount_again:
2765 goto out; 2881 goto out;
2766 } 2882 }
2767 2883
2768 if (cifs_parse_mount_options(mount_data, devname, volume_info)) { 2884 if (cifs_parse_mount_options(cifs_sb->mountdata, devname,
2885 volume_info)) {
2769 rc = -EINVAL; 2886 rc = -EINVAL;
2770 goto out; 2887 goto out;
2771 } 2888 }
@@ -2861,6 +2978,24 @@ try_mount_again:
2861 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE)); 2978 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE));
2862 2979
2863remote_path_check: 2980remote_path_check:
2981#ifdef CONFIG_CIFS_DFS_UPCALL
2982 /*
2983 * Perform an unconditional check for whether there are DFS
2984 * referrals for this path without prefix, to provide support
2985 * for DFS referrals from w2k8 servers which don't seem to respond
2986 * with PATH_NOT_COVERED to requests that include the prefix.
2987 * Chase the referral if found, otherwise continue normally.
2988 */
2989 if (referral_walks_count == 0) {
2990 int refrc = expand_dfs_referral(xid, pSesInfo, volume_info,
2991 cifs_sb, false);
2992 if (!refrc) {
2993 referral_walks_count++;
2994 goto try_mount_again;
2995 }
2996 }
2997#endif
2998
2864 /* check if a whole path (including prepath) is not remote */ 2999 /* check if a whole path (including prepath) is not remote */
2865 if (!rc && tcon) { 3000 if (!rc && tcon) {
2866 /* build_path_to_root works only when we have a valid tcon */ 3001 /* build_path_to_root works only when we have a valid tcon */
@@ -2894,46 +3029,15 @@ remote_path_check:
2894 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0) 3029 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0)
2895 convert_delimiter(cifs_sb->prepath, 3030 convert_delimiter(cifs_sb->prepath,
2896 CIFS_DIR_SEP(cifs_sb)); 3031 CIFS_DIR_SEP(cifs_sb));
2897 full_path = build_unc_path_to_root(volume_info, cifs_sb);
2898 if (IS_ERR(full_path)) {
2899 rc = PTR_ERR(full_path);
2900 goto mount_fail_check;
2901 }
2902
2903 cFYI(1, "Getting referral for: %s", full_path);
2904 rc = get_dfs_path(xid, pSesInfo , full_path + 1,
2905 cifs_sb->local_nls, &num_referrals, &referrals,
2906 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
2907 if (!rc && num_referrals > 0) {
2908 char *fake_devname = NULL;
2909
2910 if (mount_data != mount_data_global)
2911 kfree(mount_data);
2912 3032
2913 mount_data = cifs_compose_mount_options( 3033 rc = expand_dfs_referral(xid, pSesInfo, volume_info, cifs_sb,
2914 cifs_sb->mountdata, full_path + 1, 3034 true);
2915 referrals, &fake_devname);
2916 3035
2917 free_dfs_info_array(referrals, num_referrals); 3036 if (!rc) {
2918 kfree(fake_devname);
2919 kfree(full_path);
2920
2921 if (IS_ERR(mount_data)) {
2922 rc = PTR_ERR(mount_data);
2923 mount_data = NULL;
2924 goto mount_fail_check;
2925 }
2926
2927 if (tcon)
2928 cifs_put_tcon(tcon);
2929 else if (pSesInfo)
2930 cifs_put_smb_ses(pSesInfo);
2931
2932 cleanup_volume_info(&volume_info);
2933 referral_walks_count++; 3037 referral_walks_count++;
2934 FreeXid(xid);
2935 goto try_mount_again; 3038 goto try_mount_again;
2936 } 3039 }
3040 goto mount_fail_check;
2937#else /* No DFS support, return error on mount */ 3041#else /* No DFS support, return error on mount */
2938 rc = -EOPNOTSUPP; 3042 rc = -EOPNOTSUPP;
2939#endif 3043#endif
@@ -2966,8 +3070,6 @@ remote_path_check:
2966mount_fail_check: 3070mount_fail_check:
2967 /* on error free sesinfo and tcon struct if needed */ 3071 /* on error free sesinfo and tcon struct if needed */
2968 if (rc) { 3072 if (rc) {
2969 if (mount_data != mount_data_global)
2970 kfree(mount_data);
2971 /* If find_unc succeeded then rc == 0 so we can not end */ 3073 /* If find_unc succeeded then rc == 0 so we can not end */
2972 /* up accidentally freeing someone elses tcon struct */ 3074 /* up accidentally freeing someone elses tcon struct */
2973 if (tcon) 3075 if (tcon)
@@ -3083,7 +3185,8 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3083 bcc_ptr += strlen("?????"); 3185 bcc_ptr += strlen("?????");
3084 bcc_ptr += 1; 3186 bcc_ptr += 1;
3085 count = bcc_ptr - &pSMB->Password[0]; 3187 count = bcc_ptr - &pSMB->Password[0];
3086 pSMB->hdr.smb_buf_length += count; 3188 pSMB->hdr.smb_buf_length = cpu_to_be32(be32_to_cpu(
3189 pSMB->hdr.smb_buf_length) + count);
3087 pSMB->ByteCount = cpu_to_le16(count); 3190 pSMB->ByteCount = cpu_to_le16(count);
3088 3191
3089 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length, 3192 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length,
@@ -3258,7 +3361,9 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid)
3258 struct cifsSesInfo *ses; 3361 struct cifsSesInfo *ses;
3259 struct cifsTconInfo *tcon = NULL; 3362 struct cifsTconInfo *tcon = NULL;
3260 struct smb_vol *vol_info; 3363 struct smb_vol *vol_info;
3261 char username[MAX_USERNAME_SIZE + 1]; 3364 char username[28]; /* big enough for "krb50x" + hex of ULONG_MAX 6+16 */
3365 /* We used to have this as MAX_USERNAME which is */
3366 /* way too big now (256 instead of 32) */
3262 3367
3263 vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL); 3368 vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL);
3264 if (vol_info == NULL) { 3369 if (vol_info == NULL) {
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 993f82045bf6..55d87ac52000 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -45,7 +45,7 @@
45#include "cifs_debug.h" 45#include "cifs_debug.h"
46#include "cifsfs.h" 46#include "cifsfs.h"
47 47
48#ifdef CONFIG_CIFS_EXPERIMENTAL 48#ifdef CIFS_NFSD_EXPORT
49static struct dentry *cifs_get_parent(struct dentry *dentry) 49static struct dentry *cifs_get_parent(struct dentry *dentry)
50{ 50{
51 /* BB need to add code here eventually to enable export via NFSD */ 51 /* BB need to add code here eventually to enable export via NFSD */
@@ -63,5 +63,5 @@ const struct export_operations cifs_export_ops = {
63 .encode_fs = */ 63 .encode_fs = */
64}; 64};
65 65
66#endif /* EXPERIMENTAL */ 66#endif /* CIFS_NFSD_EXPORT */
67 67
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index faf59529e847..c672afef0c09 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -857,95 +857,6 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
857 cifsi->server_eof = end_of_write; 857 cifsi->server_eof = end_of_write;
858} 858}
859 859
860ssize_t cifs_user_write(struct file *file, const char __user *write_data,
861 size_t write_size, loff_t *poffset)
862{
863 struct inode *inode = file->f_path.dentry->d_inode;
864 int rc = 0;
865 unsigned int bytes_written = 0;
866 unsigned int total_written;
867 struct cifs_sb_info *cifs_sb;
868 struct cifsTconInfo *pTcon;
869 int xid;
870 struct cifsFileInfo *open_file;
871 struct cifsInodeInfo *cifsi = CIFS_I(inode);
872
873 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
874
875 /* cFYI(1, " write %d bytes to offset %lld of %s", write_size,
876 *poffset, file->f_path.dentry->d_name.name); */
877
878 if (file->private_data == NULL)
879 return -EBADF;
880
881 open_file = file->private_data;
882 pTcon = tlink_tcon(open_file->tlink);
883
884 rc = generic_write_checks(file, poffset, &write_size, 0);
885 if (rc)
886 return rc;
887
888 xid = GetXid();
889
890 for (total_written = 0; write_size > total_written;
891 total_written += bytes_written) {
892 rc = -EAGAIN;
893 while (rc == -EAGAIN) {
894 if (file->private_data == NULL) {
895 /* file has been closed on us */
896 FreeXid(xid);
897 /* if we have gotten here we have written some data
898 and blocked, and the file has been freed on us while
899 we blocked so return what we managed to write */
900 return total_written;
901 }
902 if (open_file->invalidHandle) {
903 /* we could deadlock if we called
904 filemap_fdatawait from here so tell
905 reopen_file not to flush data to server
906 now */
907 rc = cifs_reopen_file(open_file, false);
908 if (rc != 0)
909 break;
910 }
911
912 rc = CIFSSMBWrite(xid, pTcon,
913 open_file->netfid,
914 min_t(const int, cifs_sb->wsize,
915 write_size - total_written),
916 *poffset, &bytes_written,
917 NULL, write_data + total_written, 0);
918 }
919 if (rc || (bytes_written == 0)) {
920 if (total_written)
921 break;
922 else {
923 FreeXid(xid);
924 return rc;
925 }
926 } else {
927 cifs_update_eof(cifsi, *poffset, bytes_written);
928 *poffset += bytes_written;
929 }
930 }
931
932 cifs_stats_bytes_written(pTcon, total_written);
933
934/* Do not update local mtime - server will set its actual value on write
935 * inode->i_ctime = inode->i_mtime =
936 * current_fs_time(inode->i_sb);*/
937 if (total_written > 0) {
938 spin_lock(&inode->i_lock);
939 if (*poffset > inode->i_size)
940 i_size_write(inode, *poffset);
941 spin_unlock(&inode->i_lock);
942 }
943 mark_inode_dirty_sync(inode);
944
945 FreeXid(xid);
946 return total_written;
947}
948
949static ssize_t cifs_write(struct cifsFileInfo *open_file, 860static ssize_t cifs_write(struct cifsFileInfo *open_file,
950 const char *write_data, size_t write_size, 861 const char *write_data, size_t write_size,
951 loff_t *poffset) 862 loff_t *poffset)
@@ -1420,9 +1331,10 @@ retry_write:
1420 return rc; 1331 return rc;
1421} 1332}
1422 1333
1423static int cifs_writepage(struct page *page, struct writeback_control *wbc) 1334static int
1335cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1424{ 1336{
1425 int rc = -EFAULT; 1337 int rc;
1426 int xid; 1338 int xid;
1427 1339
1428 xid = GetXid(); 1340 xid = GetXid();
@@ -1442,15 +1354,29 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1442 * to fail to update with the state of the page correctly. 1354 * to fail to update with the state of the page correctly.
1443 */ 1355 */
1444 set_page_writeback(page); 1356 set_page_writeback(page);
1357retry_write:
1445 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE); 1358 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1446 SetPageUptodate(page); /* BB add check for error and Clearuptodate? */ 1359 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1447 unlock_page(page); 1360 goto retry_write;
1361 else if (rc == -EAGAIN)
1362 redirty_page_for_writepage(wbc, page);
1363 else if (rc != 0)
1364 SetPageError(page);
1365 else
1366 SetPageUptodate(page);
1448 end_page_writeback(page); 1367 end_page_writeback(page);
1449 page_cache_release(page); 1368 page_cache_release(page);
1450 FreeXid(xid); 1369 FreeXid(xid);
1451 return rc; 1370 return rc;
1452} 1371}
1453 1372
1373static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1374{
1375 int rc = cifs_writepage_locked(page, wbc);
1376 unlock_page(page);
1377 return rc;
1378}
1379
1454static int cifs_write_end(struct file *file, struct address_space *mapping, 1380static int cifs_write_end(struct file *file, struct address_space *mapping,
1455 loff_t pos, unsigned len, unsigned copied, 1381 loff_t pos, unsigned len, unsigned copied,
1456 struct page *page, void *fsdata) 1382 struct page *page, void *fsdata)
@@ -1519,8 +1445,13 @@ int cifs_strict_fsync(struct file *file, int datasync)
1519 cFYI(1, "Sync file - name: %s datasync: 0x%x", 1445 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1520 file->f_path.dentry->d_name.name, datasync); 1446 file->f_path.dentry->d_name.name, datasync);
1521 1447
1522 if (!CIFS_I(inode)->clientCanCacheRead) 1448 if (!CIFS_I(inode)->clientCanCacheRead) {
1523 cifs_invalidate_mapping(inode); 1449 rc = cifs_invalidate_mapping(inode);
1450 if (rc) {
1451 cFYI(1, "rc: %d during invalidate phase", rc);
1452 rc = 0; /* don't care about it in fsync */
1453 }
1454 }
1524 1455
1525 tcon = tlink_tcon(smbfile->tlink); 1456 tcon = tlink_tcon(smbfile->tlink);
1526 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) 1457 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
@@ -1726,7 +1657,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
1726 return total_written; 1657 return total_written;
1727} 1658}
1728 1659
1729static ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, 1660ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
1730 unsigned long nr_segs, loff_t pos) 1661 unsigned long nr_segs, loff_t pos)
1731{ 1662{
1732 ssize_t written; 1663 ssize_t written;
@@ -1849,17 +1780,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
1849 return total_read; 1780 return total_read;
1850} 1781}
1851 1782
1852ssize_t cifs_user_read(struct file *file, char __user *read_data, 1783ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
1853 size_t read_size, loff_t *poffset)
1854{
1855 struct iovec iov;
1856 iov.iov_base = read_data;
1857 iov.iov_len = read_size;
1858
1859 return cifs_iovec_read(file, &iov, 1, poffset);
1860}
1861
1862static ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
1863 unsigned long nr_segs, loff_t pos) 1784 unsigned long nr_segs, loff_t pos)
1864{ 1785{
1865 ssize_t read; 1786 ssize_t read;
@@ -1987,8 +1908,11 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
1987 1908
1988 xid = GetXid(); 1909 xid = GetXid();
1989 1910
1990 if (!CIFS_I(inode)->clientCanCacheRead) 1911 if (!CIFS_I(inode)->clientCanCacheRead) {
1991 cifs_invalidate_mapping(inode); 1912 rc = cifs_invalidate_mapping(inode);
1913 if (rc)
1914 return rc;
1915 }
1992 1916
1993 rc = generic_file_mmap(file, vma); 1917 rc = generic_file_mmap(file, vma);
1994 if (rc == 0) 1918 if (rc == 0)
@@ -2415,6 +2339,27 @@ static void cifs_invalidate_page(struct page *page, unsigned long offset)
2415 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode); 2339 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
2416} 2340}
2417 2341
2342static int cifs_launder_page(struct page *page)
2343{
2344 int rc = 0;
2345 loff_t range_start = page_offset(page);
2346 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
2347 struct writeback_control wbc = {
2348 .sync_mode = WB_SYNC_ALL,
2349 .nr_to_write = 0,
2350 .range_start = range_start,
2351 .range_end = range_end,
2352 };
2353
2354 cFYI(1, "Launder page: %p", page);
2355
2356 if (clear_page_dirty_for_io(page))
2357 rc = cifs_writepage_locked(page, &wbc);
2358
2359 cifs_fscache_invalidate_page(page, page->mapping->host);
2360 return rc;
2361}
2362
2418void cifs_oplock_break(struct work_struct *work) 2363void cifs_oplock_break(struct work_struct *work)
2419{ 2364{
2420 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, 2365 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
@@ -2486,7 +2431,7 @@ const struct address_space_operations cifs_addr_ops = {
2486 .set_page_dirty = __set_page_dirty_nobuffers, 2431 .set_page_dirty = __set_page_dirty_nobuffers,
2487 .releasepage = cifs_release_page, 2432 .releasepage = cifs_release_page,
2488 .invalidatepage = cifs_invalidate_page, 2433 .invalidatepage = cifs_invalidate_page,
2489 /* .direct_IO = */ 2434 .launder_page = cifs_launder_page,
2490}; 2435};
2491 2436
2492/* 2437/*
@@ -2503,5 +2448,5 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
2503 .set_page_dirty = __set_page_dirty_nobuffers, 2448 .set_page_dirty = __set_page_dirty_nobuffers,
2504 .releasepage = cifs_release_page, 2449 .releasepage = cifs_release_page,
2505 .invalidatepage = cifs_invalidate_page, 2450 .invalidatepage = cifs_invalidate_page,
2506 /* .direct_IO = */ 2451 .launder_page = cifs_launder_page,
2507}; 2452};
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 8852470b4fbb..de02ed5e25c2 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -878,7 +878,7 @@ retry_iget5_locked:
878} 878}
879 879
880/* gets root inode */ 880/* gets root inode */
881struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) 881struct inode *cifs_root_iget(struct super_block *sb)
882{ 882{
883 int xid; 883 int xid;
884 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 884 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -1683,71 +1683,70 @@ cifs_inode_needs_reval(struct inode *inode)
1683/* 1683/*
1684 * Zap the cache. Called when invalid_mapping flag is set. 1684 * Zap the cache. Called when invalid_mapping flag is set.
1685 */ 1685 */
1686void 1686int
1687cifs_invalidate_mapping(struct inode *inode) 1687cifs_invalidate_mapping(struct inode *inode)
1688{ 1688{
1689 int rc; 1689 int rc = 0;
1690 struct cifsInodeInfo *cifs_i = CIFS_I(inode); 1690 struct cifsInodeInfo *cifs_i = CIFS_I(inode);
1691 1691
1692 cifs_i->invalid_mapping = false; 1692 cifs_i->invalid_mapping = false;
1693 1693
1694 /* write back any cached data */
1695 if (inode->i_mapping && inode->i_mapping->nrpages != 0) { 1694 if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
1696 rc = filemap_write_and_wait(inode->i_mapping); 1695 rc = invalidate_inode_pages2(inode->i_mapping);
1697 mapping_set_error(inode->i_mapping, rc); 1696 if (rc) {
1697 cERROR(1, "%s: could not invalidate inode %p", __func__,
1698 inode);
1699 cifs_i->invalid_mapping = true;
1700 }
1698 } 1701 }
1699 invalidate_remote_inode(inode); 1702
1700 cifs_fscache_reset_inode_cookie(inode); 1703 cifs_fscache_reset_inode_cookie(inode);
1704 return rc;
1701} 1705}
1702 1706
1703int cifs_revalidate_file(struct file *filp) 1707int cifs_revalidate_file_attr(struct file *filp)
1704{ 1708{
1705 int rc = 0; 1709 int rc = 0;
1706 struct inode *inode = filp->f_path.dentry->d_inode; 1710 struct inode *inode = filp->f_path.dentry->d_inode;
1707 struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data; 1711 struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data;
1708 1712
1709 if (!cifs_inode_needs_reval(inode)) 1713 if (!cifs_inode_needs_reval(inode))
1710 goto check_inval; 1714 return rc;
1711 1715
1712 if (tlink_tcon(cfile->tlink)->unix_ext) 1716 if (tlink_tcon(cfile->tlink)->unix_ext)
1713 rc = cifs_get_file_info_unix(filp); 1717 rc = cifs_get_file_info_unix(filp);
1714 else 1718 else
1715 rc = cifs_get_file_info(filp); 1719 rc = cifs_get_file_info(filp);
1716 1720
1717check_inval:
1718 if (CIFS_I(inode)->invalid_mapping)
1719 cifs_invalidate_mapping(inode);
1720
1721 return rc; 1721 return rc;
1722} 1722}
1723 1723
1724/* revalidate a dentry's inode attributes */ 1724int cifs_revalidate_dentry_attr(struct dentry *dentry)
1725int cifs_revalidate_dentry(struct dentry *dentry)
1726{ 1725{
1727 int xid; 1726 int xid;
1728 int rc = 0; 1727 int rc = 0;
1729 char *full_path = NULL;
1730 struct inode *inode = dentry->d_inode; 1728 struct inode *inode = dentry->d_inode;
1731 struct super_block *sb = dentry->d_sb; 1729 struct super_block *sb = dentry->d_sb;
1730 char *full_path = NULL;
1732 1731
1733 if (inode == NULL) 1732 if (inode == NULL)
1734 return -ENOENT; 1733 return -ENOENT;
1735 1734
1736 xid = GetXid();
1737
1738 if (!cifs_inode_needs_reval(inode)) 1735 if (!cifs_inode_needs_reval(inode))
1739 goto check_inval; 1736 return rc;
1737
1738 xid = GetXid();
1740 1739
1741 /* can not safely grab the rename sem here if rename calls revalidate 1740 /* can not safely grab the rename sem here if rename calls revalidate
1742 since that would deadlock */ 1741 since that would deadlock */
1743 full_path = build_path_from_dentry(dentry); 1742 full_path = build_path_from_dentry(dentry);
1744 if (full_path == NULL) { 1743 if (full_path == NULL) {
1745 rc = -ENOMEM; 1744 rc = -ENOMEM;
1746 goto check_inval; 1745 goto out;
1747 } 1746 }
1748 1747
1749 cFYI(1, "Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld " 1748 cFYI(1, "Update attributes: %s inode 0x%p count %d dentry: 0x%p d_time "
1750 "jiffies %ld", full_path, inode, inode->i_count.counter, 1749 "%ld jiffies %ld", full_path, inode, inode->i_count.counter,
1751 dentry, dentry->d_time, jiffies); 1750 dentry, dentry->d_time, jiffies);
1752 1751
1753 if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext) 1752 if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext)
@@ -1756,41 +1755,83 @@ int cifs_revalidate_dentry(struct dentry *dentry)
1756 rc = cifs_get_inode_info(&inode, full_path, NULL, sb, 1755 rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
1757 xid, NULL); 1756 xid, NULL);
1758 1757
1759check_inval: 1758out:
1760 if (CIFS_I(inode)->invalid_mapping)
1761 cifs_invalidate_mapping(inode);
1762
1763 kfree(full_path); 1759 kfree(full_path);
1764 FreeXid(xid); 1760 FreeXid(xid);
1765 return rc; 1761 return rc;
1766} 1762}
1767 1763
1764int cifs_revalidate_file(struct file *filp)
1765{
1766 int rc;
1767 struct inode *inode = filp->f_path.dentry->d_inode;
1768
1769 rc = cifs_revalidate_file_attr(filp);
1770 if (rc)
1771 return rc;
1772
1773 if (CIFS_I(inode)->invalid_mapping)
1774 rc = cifs_invalidate_mapping(inode);
1775 return rc;
1776}
1777
1778/* revalidate a dentry's inode attributes */
1779int cifs_revalidate_dentry(struct dentry *dentry)
1780{
1781 int rc;
1782 struct inode *inode = dentry->d_inode;
1783
1784 rc = cifs_revalidate_dentry_attr(dentry);
1785 if (rc)
1786 return rc;
1787
1788 if (CIFS_I(inode)->invalid_mapping)
1789 rc = cifs_invalidate_mapping(inode);
1790 return rc;
1791}
1792
1768int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry, 1793int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1769 struct kstat *stat) 1794 struct kstat *stat)
1770{ 1795{
1771 struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb); 1796 struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
1772 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 1797 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
1773 int err = cifs_revalidate_dentry(dentry); 1798 struct inode *inode = dentry->d_inode;
1774 1799 int rc;
1775 if (!err) {
1776 generic_fillattr(dentry->d_inode, stat);
1777 stat->blksize = CIFS_MAX_MSGSIZE;
1778 stat->ino = CIFS_I(dentry->d_inode)->uniqueid;
1779 1800
1780 /* 1801 /*
1781 * If on a multiuser mount without unix extensions, and the 1802 * We need to be sure that all dirty pages are written and the server
1782 * admin hasn't overridden them, set the ownership to the 1803 * has actual ctime, mtime and file length.
1783 * fsuid/fsgid of the current process. 1804 */
1784 */ 1805 if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
1785 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) && 1806 inode->i_mapping->nrpages != 0) {
1786 !tcon->unix_ext) { 1807 rc = filemap_fdatawait(inode->i_mapping);
1787 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) 1808 if (rc) {
1788 stat->uid = current_fsuid(); 1809 mapping_set_error(inode->i_mapping, rc);
1789 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) 1810 return rc;
1790 stat->gid = current_fsgid();
1791 } 1811 }
1792 } 1812 }
1793 return err; 1813
1814 rc = cifs_revalidate_dentry_attr(dentry);
1815 if (rc)
1816 return rc;
1817
1818 generic_fillattr(inode, stat);
1819 stat->blksize = CIFS_MAX_MSGSIZE;
1820 stat->ino = CIFS_I(inode)->uniqueid;
1821
1822 /*
1823 * If on a multiuser mount without unix extensions, and the admin hasn't
1824 * overridden them, set the ownership to the fsuid/fsgid of the current
1825 * process.
1826 */
1827 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) &&
1828 !tcon->unix_ext) {
1829 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID))
1830 stat->uid = current_fsuid();
1831 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID))
1832 stat->gid = current_fsgid();
1833 }
1834 return rc;
1794} 1835}
1795 1836
1796static int cifs_truncate_page(struct address_space *mapping, loff_t from) 1837static int cifs_truncate_page(struct address_space *mapping, loff_t from)
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 0c684ae4c071..907531ac5888 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -304,12 +304,10 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
304 304
305 memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */ 305 memset(temp, 0, 256); /* bigger than MAX_CIFS_HDR_SIZE */
306 306
307 buffer->smb_buf_length = 307 buffer->smb_buf_length = cpu_to_be32(
308 (2 * word_count) + sizeof(struct smb_hdr) - 308 (2 * word_count) + sizeof(struct smb_hdr) -
309 4 /* RFC 1001 length field does not count */ + 309 4 /* RFC 1001 length field does not count */ +
310 2 /* for bcc field itself */ ; 310 2 /* for bcc field itself */) ;
311 /* Note that this is the only network field that has to be converted
312 to big endian and it is done just before we send it */
313 311
314 buffer->Protocol[0] = 0xFF; 312 buffer->Protocol[0] = 0xFF;
315 buffer->Protocol[1] = 'S'; 313 buffer->Protocol[1] = 'S';
@@ -424,7 +422,7 @@ check_smb_hdr(struct smb_hdr *smb, __u16 mid)
424int 422int
425checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length) 423checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
426{ 424{
427 __u32 len = smb->smb_buf_length; 425 __u32 len = be32_to_cpu(smb->smb_buf_length);
428 __u32 clc_len; /* calculated length */ 426 __u32 clc_len; /* calculated length */
429 cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len); 427 cFYI(0, "checkSMB Length: 0x%x, smb_buf_length: 0x%x", length, len);
430 428
@@ -464,7 +462,7 @@ checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length)
464 462
465 if (check_smb_hdr(smb, mid)) 463 if (check_smb_hdr(smb, mid))
466 return 1; 464 return 1;
467 clc_len = smbCalcSize_LE(smb); 465 clc_len = smbCalcSize(smb);
468 466
469 if (4 + len != length) { 467 if (4 + len != length) {
470 cERROR(1, "Length read does not match RFC1001 length %d", 468 cERROR(1, "Length read does not match RFC1001 length %d",
@@ -521,7 +519,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
521 (struct smb_com_transaction_change_notify_rsp *)buf; 519 (struct smb_com_transaction_change_notify_rsp *)buf;
522 struct file_notify_information *pnotify; 520 struct file_notify_information *pnotify;
523 __u32 data_offset = 0; 521 __u32 data_offset = 0;
524 if (get_bcc_le(buf) > sizeof(struct file_notify_information)) { 522 if (get_bcc(buf) > sizeof(struct file_notify_information)) {
525 data_offset = le32_to_cpu(pSMBr->DataOffset); 523 data_offset = le32_to_cpu(pSMBr->DataOffset);
526 524
527 pnotify = (struct file_notify_information *) 525 pnotify = (struct file_notify_information *)
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 79f641eeda30..79b71c2c7c9d 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -919,13 +919,6 @@ smbCalcSize(struct smb_hdr *ptr)
919 2 /* size of the bcc field */ + get_bcc(ptr)); 919 2 /* size of the bcc field */ + get_bcc(ptr));
920} 920}
921 921
922unsigned int
923smbCalcSize_LE(struct smb_hdr *ptr)
924{
925 return (sizeof(struct smb_hdr) + (2 * ptr->WordCount) +
926 2 /* size of the bcc field */ + get_bcc_le(ptr));
927}
928
929/* The following are taken from fs/ntfs/util.c */ 922/* The following are taken from fs/ntfs/util.c */
930 923
931#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000) 924#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 645114ad0a10..7dd462100378 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -621,7 +621,7 @@ ssetup_ntlmssp_authenticate:
621 and rest of bcc area. This allows us to avoid 621 and rest of bcc area. This allows us to avoid
622 a large buffer 17K allocation */ 622 a large buffer 17K allocation */
623 iov[0].iov_base = (char *)pSMB; 623 iov[0].iov_base = (char *)pSMB;
624 iov[0].iov_len = smb_buf->smb_buf_length + 4; 624 iov[0].iov_len = be32_to_cpu(smb_buf->smb_buf_length) + 4;
625 625
626 /* setting this here allows the code at the end of the function 626 /* setting this here allows the code at the end of the function
627 to free the request buffer if there's an error */ 627 to free the request buffer if there's an error */
@@ -656,7 +656,7 @@ ssetup_ntlmssp_authenticate:
656 * to use challenge/response method (i.e. Password bit is 1). 656 * to use challenge/response method (i.e. Password bit is 1).
657 */ 657 */
658 658
659 calc_lanman_hash(ses->password, ses->server->cryptkey, 659 rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
660 ses->server->secMode & SECMODE_PW_ENCRYPT ? 660 ses->server->secMode & SECMODE_PW_ENCRYPT ?
661 true : false, lnm_session_key); 661 true : false, lnm_session_key);
662 662
@@ -859,9 +859,10 @@ ssetup_ntlmssp_authenticate:
859 iov[2].iov_len = (long) bcc_ptr - (long) str_area; 859 iov[2].iov_len = (long) bcc_ptr - (long) str_area;
860 860
861 count = iov[1].iov_len + iov[2].iov_len; 861 count = iov[1].iov_len + iov[2].iov_len;
862 smb_buf->smb_buf_length += count; 862 smb_buf->smb_buf_length =
863 cpu_to_be32(be32_to_cpu(smb_buf->smb_buf_length) + count);
863 864
864 put_bcc_le(count, smb_buf); 865 put_bcc(count, smb_buf);
865 866
866 rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type, 867 rc = SendReceive2(xid, ses, iov, 3 /* num_iovecs */, &resp_buf_type,
867 CIFS_LOG_ERROR); 868 CIFS_LOG_ERROR);
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
deleted file mode 100644
index 04721485925d..000000000000
--- a/fs/cifs/smbdes.c
+++ /dev/null
@@ -1,418 +0,0 @@
1/*
2 Unix SMB/Netbios implementation.
3 Version 1.9.
4
5 a partial implementation of DES designed for use in the
6 SMB authentication protocol
7
8 Copyright (C) Andrew Tridgell 1998
9 Modified by Steve French (sfrench@us.ibm.com) 2002,2004
10
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24*/
25
26/* NOTES:
27
28 This code makes no attempt to be fast! In fact, it is a very
29 slow implementation
30
31 This code is NOT a complete DES implementation. It implements only
32 the minimum necessary for SMB authentication, as used by all SMB
33 products (including every copy of Microsoft Windows95 ever sold)
34
35 In particular, it can only do a unchained forward DES pass. This
36 means it is not possible to use this code for encryption/decryption
37 of data, instead it is only useful as a "hash" algorithm.
38
39 There is no entry point into this code that allows normal DES operation.
40
41 I believe this means that this code does not come under ITAR
42 regulations but this is NOT a legal opinion. If you are concerned
43 about the applicability of ITAR regulations to this code then you
44 should confirm it for yourself (and maybe let me know if you come
45 up with a different answer to the one above)
46*/
47#include <linux/slab.h>
48#define uchar unsigned char
49
50static uchar perm1[56] = { 57, 49, 41, 33, 25, 17, 9,
51 1, 58, 50, 42, 34, 26, 18,
52 10, 2, 59, 51, 43, 35, 27,
53 19, 11, 3, 60, 52, 44, 36,
54 63, 55, 47, 39, 31, 23, 15,
55 7, 62, 54, 46, 38, 30, 22,
56 14, 6, 61, 53, 45, 37, 29,
57 21, 13, 5, 28, 20, 12, 4
58};
59
60static uchar perm2[48] = { 14, 17, 11, 24, 1, 5,
61 3, 28, 15, 6, 21, 10,
62 23, 19, 12, 4, 26, 8,
63 16, 7, 27, 20, 13, 2,
64 41, 52, 31, 37, 47, 55,
65 30, 40, 51, 45, 33, 48,
66 44, 49, 39, 56, 34, 53,
67 46, 42, 50, 36, 29, 32
68};
69
70static uchar perm3[64] = { 58, 50, 42, 34, 26, 18, 10, 2,
71 60, 52, 44, 36, 28, 20, 12, 4,
72 62, 54, 46, 38, 30, 22, 14, 6,
73 64, 56, 48, 40, 32, 24, 16, 8,
74 57, 49, 41, 33, 25, 17, 9, 1,
75 59, 51, 43, 35, 27, 19, 11, 3,
76 61, 53, 45, 37, 29, 21, 13, 5,
77 63, 55, 47, 39, 31, 23, 15, 7
78};
79
80static uchar perm4[48] = { 32, 1, 2, 3, 4, 5,
81 4, 5, 6, 7, 8, 9,
82 8, 9, 10, 11, 12, 13,
83 12, 13, 14, 15, 16, 17,
84 16, 17, 18, 19, 20, 21,
85 20, 21, 22, 23, 24, 25,
86 24, 25, 26, 27, 28, 29,
87 28, 29, 30, 31, 32, 1
88};
89
90static uchar perm5[32] = { 16, 7, 20, 21,
91 29, 12, 28, 17,
92 1, 15, 23, 26,
93 5, 18, 31, 10,
94 2, 8, 24, 14,
95 32, 27, 3, 9,
96 19, 13, 30, 6,
97 22, 11, 4, 25
98};
99
100static uchar perm6[64] = { 40, 8, 48, 16, 56, 24, 64, 32,
101 39, 7, 47, 15, 55, 23, 63, 31,
102 38, 6, 46, 14, 54, 22, 62, 30,
103 37, 5, 45, 13, 53, 21, 61, 29,
104 36, 4, 44, 12, 52, 20, 60, 28,
105 35, 3, 43, 11, 51, 19, 59, 27,
106 34, 2, 42, 10, 50, 18, 58, 26,
107 33, 1, 41, 9, 49, 17, 57, 25
108};
109
110static uchar sc[16] = { 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 };
111
112static uchar sbox[8][4][16] = {
113 {{14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7},
114 {0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8},
115 {4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0},
116 {15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13} },
117
118 {{15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10},
119 {3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5},
120 {0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15},
121 {13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9} },
122
123 {{10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8},
124 {13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1},
125 {13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7},
126 {1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12} },
127
128 {{7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15},
129 {13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9},
130 {10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4},
131 {3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14} },
132
133 {{2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9},
134 {14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6},
135 {4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14},
136 {11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3} },
137
138 {{12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11},
139 {10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8},
140 {9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6},
141 {4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13} },
142
143 {{4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1},
144 {13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6},
145 {1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2},
146 {6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12} },
147
148 {{13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7},
149 {1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2},
150 {7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8},
151 {2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11} }
152};
153
154static void
155permute(char *out, char *in, uchar *p, int n)
156{
157 int i;
158 for (i = 0; i < n; i++)
159 out[i] = in[p[i] - 1];
160}
161
162static void
163lshift(char *d, int count, int n)
164{
165 char out[64];
166 int i;
167 for (i = 0; i < n; i++)
168 out[i] = d[(i + count) % n];
169 for (i = 0; i < n; i++)
170 d[i] = out[i];
171}
172
173static void
174concat(char *out, char *in1, char *in2, int l1, int l2)
175{
176 while (l1--)
177 *out++ = *in1++;
178 while (l2--)
179 *out++ = *in2++;
180}
181
182static void
183xor(char *out, char *in1, char *in2, int n)
184{
185 int i;
186 for (i = 0; i < n; i++)
187 out[i] = in1[i] ^ in2[i];
188}
189
190static void
191dohash(char *out, char *in, char *key, int forw)
192{
193 int i, j, k;
194 char *pk1;
195 char c[28];
196 char d[28];
197 char *cd;
198 char (*ki)[48];
199 char *pd1;
200 char l[32], r[32];
201 char *rl;
202
203 /* Have to reduce stack usage */
204 pk1 = kmalloc(56+56+64+64, GFP_KERNEL);
205 if (pk1 == NULL)
206 return;
207
208 ki = kmalloc(16*48, GFP_KERNEL);
209 if (ki == NULL) {
210 kfree(pk1);
211 return;
212 }
213
214 cd = pk1 + 56;
215 pd1 = cd + 56;
216 rl = pd1 + 64;
217
218 permute(pk1, key, perm1, 56);
219
220 for (i = 0; i < 28; i++)
221 c[i] = pk1[i];
222 for (i = 0; i < 28; i++)
223 d[i] = pk1[i + 28];
224
225 for (i = 0; i < 16; i++) {
226 lshift(c, sc[i], 28);
227 lshift(d, sc[i], 28);
228
229 concat(cd, c, d, 28, 28);
230 permute(ki[i], cd, perm2, 48);
231 }
232
233 permute(pd1, in, perm3, 64);
234
235 for (j = 0; j < 32; j++) {
236 l[j] = pd1[j];
237 r[j] = pd1[j + 32];
238 }
239
240 for (i = 0; i < 16; i++) {
241 char *er; /* er[48] */
242 char *erk; /* erk[48] */
243 char b[8][6];
244 char *cb; /* cb[32] */
245 char *pcb; /* pcb[32] */
246 char *r2; /* r2[32] */
247
248 er = kmalloc(48+48+32+32+32, GFP_KERNEL);
249 if (er == NULL) {
250 kfree(pk1);
251 kfree(ki);
252 return;
253 }
254 erk = er+48;
255 cb = erk+48;
256 pcb = cb+32;
257 r2 = pcb+32;
258
259 permute(er, r, perm4, 48);
260
261 xor(erk, er, ki[forw ? i : 15 - i], 48);
262
263 for (j = 0; j < 8; j++)
264 for (k = 0; k < 6; k++)
265 b[j][k] = erk[j * 6 + k];
266
267 for (j = 0; j < 8; j++) {
268 int m, n;
269 m = (b[j][0] << 1) | b[j][5];
270
271 n = (b[j][1] << 3) | (b[j][2] << 2) | (b[j][3] <<
272 1) | b[j][4];
273
274 for (k = 0; k < 4; k++)
275 b[j][k] =
276 (sbox[j][m][n] & (1 << (3 - k))) ? 1 : 0;
277 }
278
279 for (j = 0; j < 8; j++)
280 for (k = 0; k < 4; k++)
281 cb[j * 4 + k] = b[j][k];
282 permute(pcb, cb, perm5, 32);
283
284 xor(r2, l, pcb, 32);
285
286 for (j = 0; j < 32; j++)
287 l[j] = r[j];
288
289 for (j = 0; j < 32; j++)
290 r[j] = r2[j];
291
292 kfree(er);
293 }
294
295 concat(rl, r, l, 32, 32);
296
297 permute(out, rl, perm6, 64);
298 kfree(pk1);
299 kfree(ki);
300}
301
302static void
303str_to_key(unsigned char *str, unsigned char *key)
304{
305 int i;
306
307 key[0] = str[0] >> 1;
308 key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
309 key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
310 key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
311 key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
312 key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
313 key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
314 key[7] = str[6] & 0x7F;
315 for (i = 0; i < 8; i++)
316 key[i] = (key[i] << 1);
317}
318
319static void
320smbhash(unsigned char *out, const unsigned char *in, unsigned char *key,
321 int forw)
322{
323 int i;
324 char *outb; /* outb[64] */
325 char *inb; /* inb[64] */
326 char *keyb; /* keyb[64] */
327 unsigned char key2[8];
328
329 outb = kmalloc(64 * 3, GFP_KERNEL);
330 if (outb == NULL)
331 return;
332
333 inb = outb + 64;
334 keyb = inb + 64;
335
336 str_to_key(key, key2);
337
338 for (i = 0; i < 64; i++) {
339 inb[i] = (in[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
340 keyb[i] = (key2[i / 8] & (1 << (7 - (i % 8)))) ? 1 : 0;
341 outb[i] = 0;
342 }
343
344 dohash(outb, inb, keyb, forw);
345
346 for (i = 0; i < 8; i++)
347 out[i] = 0;
348
349 for (i = 0; i < 64; i++) {
350 if (outb[i])
351 out[i / 8] |= (1 << (7 - (i % 8)));
352 }
353 kfree(outb);
354}
355
356void
357E_P16(unsigned char *p14, unsigned char *p16)
358{
359 unsigned char sp8[8] =
360 { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
361 smbhash(p16, sp8, p14, 1);
362 smbhash(p16 + 8, sp8, p14 + 7, 1);
363}
364
365void
366E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
367{
368 smbhash(p24, c8, p21, 1);
369 smbhash(p24 + 8, c8, p21 + 7, 1);
370 smbhash(p24 + 16, c8, p21 + 14, 1);
371}
372
373#if 0 /* currently unused */
374static void
375D_P16(unsigned char *p14, unsigned char *in, unsigned char *out)
376{
377 smbhash(out, in, p14, 0);
378 smbhash(out + 8, in + 8, p14 + 7, 0);
379}
380
381static void
382E_old_pw_hash(unsigned char *p14, unsigned char *in, unsigned char *out)
383{
384 smbhash(out, in, p14, 1);
385 smbhash(out + 8, in + 8, p14 + 7, 1);
386}
387/* these routines are currently unneeded, but may be
388 needed later */
389void
390cred_hash1(unsigned char *out, unsigned char *in, unsigned char *key)
391{
392 unsigned char buf[8];
393
394 smbhash(buf, in, key, 1);
395 smbhash(out, buf, key + 9, 1);
396}
397
398void
399cred_hash2(unsigned char *out, unsigned char *in, unsigned char *key)
400{
401 unsigned char buf[8];
402 static unsigned char key2[8];
403
404 smbhash(buf, in, key, 1);
405 key2[0] = key[7];
406 smbhash(out, buf, key2, 1);
407}
408
409void
410cred_hash3(unsigned char *out, unsigned char *in, unsigned char *key, int forw)
411{
412 static unsigned char key2[8];
413
414 smbhash(out, in, key, forw);
415 key2[0] = key[7];
416 smbhash(out + 8, in + 8, key2, forw);
417}
418#endif /* unneeded routines */
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index b5041c849981..1525d5e662b6 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -47,6 +47,88 @@
47#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8) 47#define SSVALX(buf,pos,val) (CVAL(buf,pos)=(val)&0xFF,CVAL(buf,pos+1)=(val)>>8)
48#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val))) 48#define SSVAL(buf,pos,val) SSVALX((buf),(pos),((__u16)(val)))
49 49
50static void
51str_to_key(unsigned char *str, unsigned char *key)
52{
53 int i;
54
55 key[0] = str[0] >> 1;
56 key[1] = ((str[0] & 0x01) << 6) | (str[1] >> 2);
57 key[2] = ((str[1] & 0x03) << 5) | (str[2] >> 3);
58 key[3] = ((str[2] & 0x07) << 4) | (str[3] >> 4);
59 key[4] = ((str[3] & 0x0F) << 3) | (str[4] >> 5);
60 key[5] = ((str[4] & 0x1F) << 2) | (str[5] >> 6);
61 key[6] = ((str[5] & 0x3F) << 1) | (str[6] >> 7);
62 key[7] = str[6] & 0x7F;
63 for (i = 0; i < 8; i++)
64 key[i] = (key[i] << 1);
65}
66
67static int
68smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
69{
70 int rc;
71 unsigned char key2[8];
72 struct crypto_blkcipher *tfm_des;
73 struct scatterlist sgin, sgout;
74 struct blkcipher_desc desc;
75
76 str_to_key(key, key2);
77
78 tfm_des = crypto_alloc_blkcipher("ecb(des)", 0, CRYPTO_ALG_ASYNC);
79 if (IS_ERR(tfm_des)) {
80 rc = PTR_ERR(tfm_des);
81 cERROR(1, "could not allocate des crypto API\n");
82 goto smbhash_err;
83 }
84
85 desc.tfm = tfm_des;
86
87 crypto_blkcipher_setkey(tfm_des, key2, 8);
88
89 sg_init_one(&sgin, in, 8);
90 sg_init_one(&sgout, out, 8);
91
92 rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8);
93 if (rc) {
94 cERROR(1, "could not encrypt crypt key rc: %d\n", rc);
95 crypto_free_blkcipher(tfm_des);
96 goto smbhash_err;
97 }
98
99smbhash_err:
100 return rc;
101}
102
103static int
104E_P16(unsigned char *p14, unsigned char *p16)
105{
106 int rc;
107 unsigned char sp8[8] =
108 { 0x4b, 0x47, 0x53, 0x21, 0x40, 0x23, 0x24, 0x25 };
109
110 rc = smbhash(p16, sp8, p14);
111 if (rc)
112 return rc;
113 rc = smbhash(p16 + 8, sp8, p14 + 7);
114 return rc;
115}
116
117static int
118E_P24(unsigned char *p21, const unsigned char *c8, unsigned char *p24)
119{
120 int rc;
121
122 rc = smbhash(p24, c8, p21);
123 if (rc)
124 return rc;
125 rc = smbhash(p24 + 8, c8, p21 + 7);
126 if (rc)
127 return rc;
128 rc = smbhash(p24 + 16, c8, p21 + 14);
129 return rc;
130}
131
50/* produce a md4 message digest from data of length n bytes */ 132/* produce a md4 message digest from data of length n bytes */
51int 133int
52mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len) 134mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len)
@@ -87,40 +169,30 @@ mdfour_err:
87 return rc; 169 return rc;
88} 170}
89 171
90/* Does the des encryption from the NT or LM MD4 hash. */
91static void
92SMBOWFencrypt(unsigned char passwd[16], const unsigned char *c8,
93 unsigned char p24[24])
94{
95 unsigned char p21[21];
96
97 memset(p21, '\0', 21);
98
99 memcpy(p21, passwd, 16);
100 E_P24(p21, c8, p24);
101}
102
103/* 172/*
104 This implements the X/Open SMB password encryption 173 This implements the X/Open SMB password encryption
105 It takes a password, a 8 byte "crypt key" and puts 24 bytes of 174 It takes a password, a 8 byte "crypt key" and puts 24 bytes of
106 encrypted password into p24 */ 175 encrypted password into p24 */
107/* Note that password must be uppercased and null terminated */ 176/* Note that password must be uppercased and null terminated */
108void 177int
109SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24) 178SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24)
110{ 179{
111 unsigned char p14[15], p21[21]; 180 int rc;
181 unsigned char p14[14], p16[16], p21[21];
112 182
113 memset(p21, '\0', 21);
114 memset(p14, '\0', 14); 183 memset(p14, '\0', 14);
115 strncpy((char *) p14, (char *) passwd, 14); 184 memset(p16, '\0', 16);
185 memset(p21, '\0', 21);
116 186
117/* strupper((char *)p14); *//* BB at least uppercase the easy range */ 187 memcpy(p14, passwd, 14);
118 E_P16(p14, p21); 188 rc = E_P16(p14, p16);
189 if (rc)
190 return rc;
119 191
120 SMBOWFencrypt(p21, c8, p24); 192 memcpy(p21, p16, 16);
193 rc = E_P24(p21, c8, p24);
121 194
122 memset(p14, 0, 15); 195 return rc;
123 memset(p21, 0, 21);
124} 196}
125 197
126/* Routines for Windows NT MD4 Hash functions. */ 198/* Routines for Windows NT MD4 Hash functions. */
@@ -279,16 +351,18 @@ int
279SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24) 351SMBNTencrypt(unsigned char *passwd, unsigned char *c8, unsigned char *p24)
280{ 352{
281 int rc; 353 int rc;
282 unsigned char p21[21]; 354 unsigned char p16[16], p21[21];
283 355
356 memset(p16, '\0', 16);
284 memset(p21, '\0', 21); 357 memset(p21, '\0', 21);
285 358
286 rc = E_md4hash(passwd, p21); 359 rc = E_md4hash(passwd, p16);
287 if (rc) { 360 if (rc) {
288 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc); 361 cFYI(1, "%s Can't generate NT hash, error: %d", __func__, rc);
289 return rc; 362 return rc;
290 } 363 }
291 SMBOWFencrypt(p21, c8, p24); 364 memcpy(p21, p16, 16);
365 rc = E_P24(p21, c8, p24);
292 return rc; 366 return rc;
293} 367}
294 368
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 46d8756f2b24..f2513fb8c391 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -129,7 +129,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
129 unsigned int len = iov[0].iov_len; 129 unsigned int len = iov[0].iov_len;
130 unsigned int total_len; 130 unsigned int total_len;
131 int first_vec = 0; 131 int first_vec = 0;
132 unsigned int smb_buf_length = smb_buffer->smb_buf_length; 132 unsigned int smb_buf_length = be32_to_cpu(smb_buffer->smb_buf_length);
133 struct socket *ssocket = server->ssocket; 133 struct socket *ssocket = server->ssocket;
134 134
135 if (ssocket == NULL) 135 if (ssocket == NULL)
@@ -144,17 +144,10 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
144 else 144 else
145 smb_msg.msg_flags = MSG_NOSIGNAL; 145 smb_msg.msg_flags = MSG_NOSIGNAL;
146 146
147 /* smb header is converted in header_assemble. bcc and rest of SMB word
148 area, and byte area if necessary, is converted to littleendian in
149 cifssmb.c and RFC1001 len is converted to bigendian in smb_send
150 Flags2 is converted in SendReceive */
151
152
153 total_len = 0; 147 total_len = 0;
154 for (i = 0; i < n_vec; i++) 148 for (i = 0; i < n_vec; i++)
155 total_len += iov[i].iov_len; 149 total_len += iov[i].iov_len;
156 150
157 smb_buffer->smb_buf_length = cpu_to_be32(smb_buffer->smb_buf_length);
158 cFYI(1, "Sending smb: total_len %d", total_len); 151 cFYI(1, "Sending smb: total_len %d", total_len);
159 dump_smb(smb_buffer, len); 152 dump_smb(smb_buffer, len);
160 153
@@ -243,7 +236,7 @@ smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
243 236
244 /* Don't want to modify the buffer as a 237 /* Don't want to modify the buffer as a
245 side effect of this call. */ 238 side effect of this call. */
246 smb_buffer->smb_buf_length = smb_buf_length; 239 smb_buffer->smb_buf_length = cpu_to_be32(smb_buf_length);
247 240
248 return rc; 241 return rc;
249} 242}
@@ -387,7 +380,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
387#ifdef CONFIG_CIFS_STATS2 380#ifdef CONFIG_CIFS_STATS2
388 atomic_inc(&server->inSend); 381 atomic_inc(&server->inSend);
389#endif 382#endif
390 rc = smb_send(server, in_buf, in_buf->smb_buf_length); 383 rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
391#ifdef CONFIG_CIFS_STATS2 384#ifdef CONFIG_CIFS_STATS2
392 atomic_dec(&server->inSend); 385 atomic_dec(&server->inSend);
393 mid->when_sent = jiffies; 386 mid->when_sent = jiffies;
@@ -422,7 +415,7 @@ SendReceiveNoRsp(const unsigned int xid, struct cifsSesInfo *ses,
422 int resp_buf_type; 415 int resp_buf_type;
423 416
424 iov[0].iov_base = (char *)in_buf; 417 iov[0].iov_base = (char *)in_buf;
425 iov[0].iov_len = in_buf->smb_buf_length + 4; 418 iov[0].iov_len = be32_to_cpu(in_buf->smb_buf_length) + 4;
426 flags |= CIFS_NO_RESP; 419 flags |= CIFS_NO_RESP;
427 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags); 420 rc = SendReceive2(xid, ses, iov, 1, &resp_buf_type, flags);
428 cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc); 421 cFYI(DBG2, "SendRcvNoRsp flags %d rc %d", flags, rc);
@@ -488,10 +481,10 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
488 int rc = 0; 481 int rc = 0;
489 482
490 /* -4 for RFC1001 length and +2 for BCC field */ 483 /* -4 for RFC1001 length and +2 for BCC field */
491 in_buf->smb_buf_length = sizeof(struct smb_hdr) - 4 + 2; 484 in_buf->smb_buf_length = cpu_to_be32(sizeof(struct smb_hdr) - 4 + 2);
492 in_buf->Command = SMB_COM_NT_CANCEL; 485 in_buf->Command = SMB_COM_NT_CANCEL;
493 in_buf->WordCount = 0; 486 in_buf->WordCount = 0;
494 put_bcc_le(0, in_buf); 487 put_bcc(0, in_buf);
495 488
496 mutex_lock(&server->srv_mutex); 489 mutex_lock(&server->srv_mutex);
497 rc = cifs_sign_smb(in_buf, server, &mid->sequence_number); 490 rc = cifs_sign_smb(in_buf, server, &mid->sequence_number);
@@ -499,7 +492,7 @@ send_nt_cancel(struct TCP_Server_Info *server, struct smb_hdr *in_buf,
499 mutex_unlock(&server->srv_mutex); 492 mutex_unlock(&server->srv_mutex);
500 return rc; 493 return rc;
501 } 494 }
502 rc = smb_send(server, in_buf, in_buf->smb_buf_length); 495 rc = smb_send(server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
503 mutex_unlock(&server->srv_mutex); 496 mutex_unlock(&server->srv_mutex);
504 497
505 cFYI(1, "issued NT_CANCEL for mid %u, rc = %d", 498 cFYI(1, "issued NT_CANCEL for mid %u, rc = %d",
@@ -612,7 +605,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
612 return rc; 605 return rc;
613 } 606 }
614 607
615 receive_len = midQ->resp_buf->smb_buf_length; 608 receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
616 609
617 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 610 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
618 cERROR(1, "Frame too large received. Length: %d Xid: %d", 611 cERROR(1, "Frame too large received. Length: %d Xid: %d",
@@ -651,11 +644,6 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
651 rc = map_smb_to_linux_error(midQ->resp_buf, 644 rc = map_smb_to_linux_error(midQ->resp_buf,
652 flags & CIFS_LOG_ERROR); 645 flags & CIFS_LOG_ERROR);
653 646
654 /* convert ByteCount if necessary */
655 if (receive_len >= sizeof(struct smb_hdr) - 4
656 /* do not count RFC1001 header */ +
657 (2 * midQ->resp_buf->WordCount) + 2 /* bcc */ )
658 put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
659 if ((flags & CIFS_NO_RESP) == 0) 647 if ((flags & CIFS_NO_RESP) == 0)
660 midQ->resp_buf = NULL; /* mark it so buf will 648 midQ->resp_buf = NULL; /* mark it so buf will
661 not be freed by 649 not be freed by
@@ -698,9 +686,10 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
698 to the same server. We may make this configurable later or 686 to the same server. We may make this configurable later or
699 use ses->maxReq */ 687 use ses->maxReq */
700 688
701 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 689 if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
690 MAX_CIFS_HDR_SIZE - 4) {
702 cERROR(1, "Illegal length, greater than maximum frame, %d", 691 cERROR(1, "Illegal length, greater than maximum frame, %d",
703 in_buf->smb_buf_length); 692 be32_to_cpu(in_buf->smb_buf_length));
704 return -EIO; 693 return -EIO;
705 } 694 }
706 695
@@ -733,7 +722,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
733#ifdef CONFIG_CIFS_STATS2 722#ifdef CONFIG_CIFS_STATS2
734 atomic_inc(&ses->server->inSend); 723 atomic_inc(&ses->server->inSend);
735#endif 724#endif
736 rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length); 725 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
737#ifdef CONFIG_CIFS_STATS2 726#ifdef CONFIG_CIFS_STATS2
738 atomic_dec(&ses->server->inSend); 727 atomic_dec(&ses->server->inSend);
739 midQ->when_sent = jiffies; 728 midQ->when_sent = jiffies;
@@ -768,7 +757,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
768 return rc; 757 return rc;
769 } 758 }
770 759
771 receive_len = midQ->resp_buf->smb_buf_length; 760 receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
772 761
773 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 762 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
774 cERROR(1, "Frame too large received. Length: %d Xid: %d", 763 cERROR(1, "Frame too large received. Length: %d Xid: %d",
@@ -781,7 +770,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
781 770
782 if (midQ->resp_buf && out_buf 771 if (midQ->resp_buf && out_buf
783 && (midQ->midState == MID_RESPONSE_RECEIVED)) { 772 && (midQ->midState == MID_RESPONSE_RECEIVED)) {
784 out_buf->smb_buf_length = receive_len; 773 out_buf->smb_buf_length = cpu_to_be32(receive_len);
785 memcpy((char *)out_buf + 4, 774 memcpy((char *)out_buf + 4,
786 (char *)midQ->resp_buf + 4, 775 (char *)midQ->resp_buf + 4,
787 receive_len); 776 receive_len);
@@ -800,16 +789,10 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
800 } 789 }
801 } 790 }
802 791
803 *pbytes_returned = out_buf->smb_buf_length; 792 *pbytes_returned = be32_to_cpu(out_buf->smb_buf_length);
804 793
805 /* BB special case reconnect tid and uid here? */ 794 /* BB special case reconnect tid and uid here? */
806 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ ); 795 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
807
808 /* convert ByteCount if necessary */
809 if (receive_len >= sizeof(struct smb_hdr) - 4
810 /* do not count RFC1001 header */ +
811 (2 * out_buf->WordCount) + 2 /* bcc */ )
812 put_bcc(get_bcc_le(midQ->resp_buf), midQ->resp_buf);
813 } else { 796 } else {
814 rc = -EIO; 797 rc = -EIO;
815 cERROR(1, "Bad MID state?"); 798 cERROR(1, "Bad MID state?");
@@ -877,9 +860,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
877 to the same server. We may make this configurable later or 860 to the same server. We may make this configurable later or
878 use ses->maxReq */ 861 use ses->maxReq */
879 862
880 if (in_buf->smb_buf_length > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4) { 863 if (be32_to_cpu(in_buf->smb_buf_length) > CIFSMaxBufSize +
864 MAX_CIFS_HDR_SIZE - 4) {
881 cERROR(1, "Illegal length, greater than maximum frame, %d", 865 cERROR(1, "Illegal length, greater than maximum frame, %d",
882 in_buf->smb_buf_length); 866 be32_to_cpu(in_buf->smb_buf_length));
883 return -EIO; 867 return -EIO;
884 } 868 }
885 869
@@ -910,7 +894,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
910#ifdef CONFIG_CIFS_STATS2 894#ifdef CONFIG_CIFS_STATS2
911 atomic_inc(&ses->server->inSend); 895 atomic_inc(&ses->server->inSend);
912#endif 896#endif
913 rc = smb_send(ses->server, in_buf, in_buf->smb_buf_length); 897 rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length));
914#ifdef CONFIG_CIFS_STATS2 898#ifdef CONFIG_CIFS_STATS2
915 atomic_dec(&ses->server->inSend); 899 atomic_dec(&ses->server->inSend);
916 midQ->when_sent = jiffies; 900 midQ->when_sent = jiffies;
@@ -977,7 +961,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
977 if (rc != 0) 961 if (rc != 0)
978 return rc; 962 return rc;
979 963
980 receive_len = midQ->resp_buf->smb_buf_length; 964 receive_len = be32_to_cpu(midQ->resp_buf->smb_buf_length);
981 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) { 965 if (receive_len > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE) {
982 cERROR(1, "Frame too large received. Length: %d Xid: %d", 966 cERROR(1, "Frame too large received. Length: %d Xid: %d",
983 receive_len, xid); 967 receive_len, xid);
@@ -993,7 +977,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
993 goto out; 977 goto out;
994 } 978 }
995 979
996 out_buf->smb_buf_length = receive_len; 980 out_buf->smb_buf_length = cpu_to_be32(receive_len);
997 memcpy((char *)out_buf + 4, 981 memcpy((char *)out_buf + 4,
998 (char *)midQ->resp_buf + 4, 982 (char *)midQ->resp_buf + 4,
999 receive_len); 983 receive_len);
@@ -1012,17 +996,11 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
1012 } 996 }
1013 } 997 }
1014 998
1015 *pbytes_returned = out_buf->smb_buf_length; 999 *pbytes_returned = be32_to_cpu(out_buf->smb_buf_length);
1016 1000
1017 /* BB special case reconnect tid and uid here? */ 1001 /* BB special case reconnect tid and uid here? */
1018 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ ); 1002 rc = map_smb_to_linux_error(out_buf, 0 /* no log */ );
1019 1003
1020 /* convert ByteCount if necessary */
1021 if (receive_len >= sizeof(struct smb_hdr) - 4
1022 /* do not count RFC1001 header */ +
1023 (2 * out_buf->WordCount) + 2 /* bcc */ )
1024 put_bcc(get_bcc_le(out_buf), out_buf);
1025
1026out: 1004out:
1027 delete_mid(midQ); 1005 delete_mid(midQ);
1028 if (rstart && rc == -EACCES) 1006 if (rstart && rc == -EACCES)
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index eae2a1491608..912995e013ec 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -112,6 +112,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
112 struct cifsTconInfo *pTcon; 112 struct cifsTconInfo *pTcon;
113 struct super_block *sb; 113 struct super_block *sb;
114 char *full_path; 114 char *full_path;
115 struct cifs_ntsd *pacl;
115 116
116 if (direntry == NULL) 117 if (direntry == NULL)
117 return -EIO; 118 return -EIO;
@@ -166,6 +167,25 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
166 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, 167 rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
167 (__u16)value_size, cifs_sb->local_nls, 168 (__u16)value_size, cifs_sb->local_nls,
168 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 169 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
170 } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
171 strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
172 pacl = kmalloc(value_size, GFP_KERNEL);
173 if (!pacl) {
174 cFYI(1, "%s: Can't allocate memory for ACL",
175 __func__);
176 rc = -ENOMEM;
177 } else {
178#ifdef CONFIG_CIFS_ACL
179 memcpy(pacl, ea_value, value_size);
180 rc = set_cifs_acl(pacl, value_size,
181 direntry->d_inode, full_path);
182 if (rc == 0) /* force revalidate of the inode */
183 CIFS_I(direntry->d_inode)->time = 0;
184 kfree(pacl);
185#else
186 cFYI(1, "Set CIFS ACL not supported yet");
187#endif /* CONFIG_CIFS_ACL */
188 }
169 } else { 189 } else {
170 int temp; 190 int temp;
171 temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS, 191 temp = strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
diff --git a/fs/compat.c b/fs/compat.c
index 72fe6cda9108..0ea00832de23 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1306,241 +1306,6 @@ compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int
1306 return do_sys_open(dfd, filename, flags, mode); 1306 return do_sys_open(dfd, filename, flags, mode);
1307} 1307}
1308 1308
1309/*
1310 * compat_count() counts the number of arguments/envelopes. It is basically
1311 * a copy of count() from fs/exec.c, except that it works with 32 bit argv
1312 * and envp pointers.
1313 */
1314static int compat_count(compat_uptr_t __user *argv, int max)
1315{
1316 int i = 0;
1317
1318 if (argv != NULL) {
1319 for (;;) {
1320 compat_uptr_t p;
1321
1322 if (get_user(p, argv))
1323 return -EFAULT;
1324 if (!p)
1325 break;
1326 argv++;
1327 if (i++ >= max)
1328 return -E2BIG;
1329
1330 if (fatal_signal_pending(current))
1331 return -ERESTARTNOHAND;
1332 cond_resched();
1333 }
1334 }
1335 return i;
1336}
1337
1338/*
1339 * compat_copy_strings() is basically a copy of copy_strings() from fs/exec.c
1340 * except that it works with 32 bit argv and envp pointers.
1341 */
1342static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
1343 struct linux_binprm *bprm)
1344{
1345 struct page *kmapped_page = NULL;
1346 char *kaddr = NULL;
1347 unsigned long kpos = 0;
1348 int ret;
1349
1350 while (argc-- > 0) {
1351 compat_uptr_t str;
1352 int len;
1353 unsigned long pos;
1354
1355 if (get_user(str, argv+argc) ||
1356 !(len = strnlen_user(compat_ptr(str), MAX_ARG_STRLEN))) {
1357 ret = -EFAULT;
1358 goto out;
1359 }
1360
1361 if (len > MAX_ARG_STRLEN) {
1362 ret = -E2BIG;
1363 goto out;
1364 }
1365
1366 /* We're going to work our way backwords. */
1367 pos = bprm->p;
1368 str += len;
1369 bprm->p -= len;
1370
1371 while (len > 0) {
1372 int offset, bytes_to_copy;
1373
1374 if (fatal_signal_pending(current)) {
1375 ret = -ERESTARTNOHAND;
1376 goto out;
1377 }
1378 cond_resched();
1379
1380 offset = pos % PAGE_SIZE;
1381 if (offset == 0)
1382 offset = PAGE_SIZE;
1383
1384 bytes_to_copy = offset;
1385 if (bytes_to_copy > len)
1386 bytes_to_copy = len;
1387
1388 offset -= bytes_to_copy;
1389 pos -= bytes_to_copy;
1390 str -= bytes_to_copy;
1391 len -= bytes_to_copy;
1392
1393 if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
1394 struct page *page;
1395
1396 page = get_arg_page(bprm, pos, 1);
1397 if (!page) {
1398 ret = -E2BIG;
1399 goto out;
1400 }
1401
1402 if (kmapped_page) {
1403 flush_kernel_dcache_page(kmapped_page);
1404 kunmap(kmapped_page);
1405 put_page(kmapped_page);
1406 }
1407 kmapped_page = page;
1408 kaddr = kmap(kmapped_page);
1409 kpos = pos & PAGE_MASK;
1410 flush_cache_page(bprm->vma, kpos,
1411 page_to_pfn(kmapped_page));
1412 }
1413 if (copy_from_user(kaddr+offset, compat_ptr(str),
1414 bytes_to_copy)) {
1415 ret = -EFAULT;
1416 goto out;
1417 }
1418 }
1419 }
1420 ret = 0;
1421out:
1422 if (kmapped_page) {
1423 flush_kernel_dcache_page(kmapped_page);
1424 kunmap(kmapped_page);
1425 put_page(kmapped_page);
1426 }
1427 return ret;
1428}
1429
1430/*
1431 * compat_do_execve() is mostly a copy of do_execve(), with the exception
1432 * that it processes 32 bit argv and envp pointers.
1433 */
1434int compat_do_execve(char * filename,
1435 compat_uptr_t __user *argv,
1436 compat_uptr_t __user *envp,
1437 struct pt_regs * regs)
1438{
1439 struct linux_binprm *bprm;
1440 struct file *file;
1441 struct files_struct *displaced;
1442 bool clear_in_exec;
1443 int retval;
1444
1445 retval = unshare_files(&displaced);
1446 if (retval)
1447 goto out_ret;
1448
1449 retval = -ENOMEM;
1450 bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1451 if (!bprm)
1452 goto out_files;
1453
1454 retval = prepare_bprm_creds(bprm);
1455 if (retval)
1456 goto out_free;
1457
1458 retval = check_unsafe_exec(bprm);
1459 if (retval < 0)
1460 goto out_free;
1461 clear_in_exec = retval;
1462 current->in_execve = 1;
1463
1464 file = open_exec(filename);
1465 retval = PTR_ERR(file);
1466 if (IS_ERR(file))
1467 goto out_unmark;
1468
1469 sched_exec();
1470
1471 bprm->file = file;
1472 bprm->filename = filename;
1473 bprm->interp = filename;
1474
1475 retval = bprm_mm_init(bprm);
1476 if (retval)
1477 goto out_file;
1478
1479 bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
1480 if ((retval = bprm->argc) < 0)
1481 goto out;
1482
1483 bprm->envc = compat_count(envp, MAX_ARG_STRINGS);
1484 if ((retval = bprm->envc) < 0)
1485 goto out;
1486
1487 retval = prepare_binprm(bprm);
1488 if (retval < 0)
1489 goto out;
1490
1491 retval = copy_strings_kernel(1, &bprm->filename, bprm);
1492 if (retval < 0)
1493 goto out;
1494
1495 bprm->exec = bprm->p;
1496 retval = compat_copy_strings(bprm->envc, envp, bprm);
1497 if (retval < 0)
1498 goto out;
1499
1500 retval = compat_copy_strings(bprm->argc, argv, bprm);
1501 if (retval < 0)
1502 goto out;
1503
1504 retval = search_binary_handler(bprm, regs);
1505 if (retval < 0)
1506 goto out;
1507
1508 /* execve succeeded */
1509 current->fs->in_exec = 0;
1510 current->in_execve = 0;
1511 acct_update_integrals(current);
1512 free_bprm(bprm);
1513 if (displaced)
1514 put_files_struct(displaced);
1515 return retval;
1516
1517out:
1518 if (bprm->mm) {
1519 acct_arg_size(bprm, 0);
1520 mmput(bprm->mm);
1521 }
1522
1523out_file:
1524 if (bprm->file) {
1525 allow_write_access(bprm->file);
1526 fput(bprm->file);
1527 }
1528
1529out_unmark:
1530 if (clear_in_exec)
1531 current->fs->in_exec = 0;
1532 current->in_execve = 0;
1533
1534out_free:
1535 free_bprm(bprm);
1536
1537out_files:
1538 if (displaced)
1539 reset_files_struct(displaced);
1540out_ret:
1541 return retval;
1542}
1543
1544#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) 1309#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
1545 1310
1546static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, 1311static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
diff --git a/fs/dcache.c b/fs/dcache.c
index 22a0ef41bad1..18b2a1f10ed8 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -35,6 +35,7 @@
35#include <linux/hardirq.h> 35#include <linux/hardirq.h>
36#include <linux/bit_spinlock.h> 36#include <linux/bit_spinlock.h>
37#include <linux/rculist_bl.h> 37#include <linux/rculist_bl.h>
38#include <linux/prefetch.h>
38#include "internal.h" 39#include "internal.h"
39 40
40/* 41/*
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 89d394d8fe24..90f76575c056 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -428,26 +428,17 @@ static ssize_t write_file_bool(struct file *file, const char __user *user_buf,
428 size_t count, loff_t *ppos) 428 size_t count, loff_t *ppos)
429{ 429{
430 char buf[32]; 430 char buf[32];
431 int buf_size; 431 size_t buf_size;
432 bool bv;
432 u32 *val = file->private_data; 433 u32 *val = file->private_data;
433 434
434 buf_size = min(count, (sizeof(buf)-1)); 435 buf_size = min(count, (sizeof(buf)-1));
435 if (copy_from_user(buf, user_buf, buf_size)) 436 if (copy_from_user(buf, user_buf, buf_size))
436 return -EFAULT; 437 return -EFAULT;
437 438
438 switch (buf[0]) { 439 if (strtobool(buf, &bv) == 0)
439 case 'y': 440 *val = bv;
440 case 'Y': 441
441 case '1':
442 *val = 1;
443 break;
444 case 'n':
445 case 'N':
446 case '0':
447 *val = 0;
448 break;
449 }
450
451 return count; 442 return count;
452} 443}
453 444
diff --git a/fs/exec.c b/fs/exec.c
index 5e62d26a4fec..c1cf372f17a7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
55#include <linux/fs_struct.h> 55#include <linux/fs_struct.h>
56#include <linux/pipe_fs_i.h> 56#include <linux/pipe_fs_i.h>
57#include <linux/oom.h> 57#include <linux/oom.h>
58#include <linux/compat.h>
58 59
59#include <asm/uaccess.h> 60#include <asm/uaccess.h>
60#include <asm/mmu_context.h> 61#include <asm/mmu_context.h>
@@ -166,8 +167,13 @@ out:
166} 167}
167 168
168#ifdef CONFIG_MMU 169#ifdef CONFIG_MMU
169 170/*
170void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) 171 * The nascent bprm->mm is not visible until exec_mmap() but it can
172 * use a lot of memory, account these pages in current->mm temporary
173 * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
174 * change the counter back via acct_arg_size(0).
175 */
176static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
171{ 177{
172 struct mm_struct *mm = current->mm; 178 struct mm_struct *mm = current->mm;
173 long diff = (long)(pages - bprm->vma_pages); 179 long diff = (long)(pages - bprm->vma_pages);
@@ -186,7 +192,7 @@ void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
186#endif 192#endif
187} 193}
188 194
189struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, 195static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
190 int write) 196 int write)
191{ 197{
192 struct page *page; 198 struct page *page;
@@ -305,11 +311,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
305 311
306#else 312#else
307 313
308void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) 314static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
309{ 315{
310} 316}
311 317
312struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, 318static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
313 int write) 319 int write)
314{ 320{
315 struct page *page; 321 struct page *page;
@@ -398,22 +404,56 @@ err:
398 return err; 404 return err;
399} 405}
400 406
407struct user_arg_ptr {
408#ifdef CONFIG_COMPAT
409 bool is_compat;
410#endif
411 union {
412 const char __user *const __user *native;
413#ifdef CONFIG_COMPAT
414 compat_uptr_t __user *compat;
415#endif
416 } ptr;
417};
418
419static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
420{
421 const char __user *native;
422
423#ifdef CONFIG_COMPAT
424 if (unlikely(argv.is_compat)) {
425 compat_uptr_t compat;
426
427 if (get_user(compat, argv.ptr.compat + nr))
428 return ERR_PTR(-EFAULT);
429
430 return compat_ptr(compat);
431 }
432#endif
433
434 if (get_user(native, argv.ptr.native + nr))
435 return ERR_PTR(-EFAULT);
436
437 return native;
438}
439
401/* 440/*
402 * count() counts the number of strings in array ARGV. 441 * count() counts the number of strings in array ARGV.
403 */ 442 */
404static int count(const char __user * const __user * argv, int max) 443static int count(struct user_arg_ptr argv, int max)
405{ 444{
406 int i = 0; 445 int i = 0;
407 446
408 if (argv != NULL) { 447 if (argv.ptr.native != NULL) {
409 for (;;) { 448 for (;;) {
410 const char __user * p; 449 const char __user *p = get_user_arg_ptr(argv, i);
411 450
412 if (get_user(p, argv))
413 return -EFAULT;
414 if (!p) 451 if (!p)
415 break; 452 break;
416 argv++; 453
454 if (IS_ERR(p))
455 return -EFAULT;
456
417 if (i++ >= max) 457 if (i++ >= max)
418 return -E2BIG; 458 return -E2BIG;
419 459
@@ -430,7 +470,7 @@ static int count(const char __user * const __user * argv, int max)
430 * processes's memory to the new process's stack. The call to get_user_pages() 470 * processes's memory to the new process's stack. The call to get_user_pages()
431 * ensures the destination page is created and not swapped out. 471 * ensures the destination page is created and not swapped out.
432 */ 472 */
433static int copy_strings(int argc, const char __user *const __user *argv, 473static int copy_strings(int argc, struct user_arg_ptr argv,
434 struct linux_binprm *bprm) 474 struct linux_binprm *bprm)
435{ 475{
436 struct page *kmapped_page = NULL; 476 struct page *kmapped_page = NULL;
@@ -443,16 +483,18 @@ static int copy_strings(int argc, const char __user *const __user *argv,
443 int len; 483 int len;
444 unsigned long pos; 484 unsigned long pos;
445 485
446 if (get_user(str, argv+argc) || 486 ret = -EFAULT;
447 !(len = strnlen_user(str, MAX_ARG_STRLEN))) { 487 str = get_user_arg_ptr(argv, argc);
448 ret = -EFAULT; 488 if (IS_ERR(str))
449 goto out; 489 goto out;
450 }
451 490
452 if (!valid_arg_len(bprm, len)) { 491 len = strnlen_user(str, MAX_ARG_STRLEN);
453 ret = -E2BIG; 492 if (!len)
493 goto out;
494
495 ret = -E2BIG;
496 if (!valid_arg_len(bprm, len))
454 goto out; 497 goto out;
455 }
456 498
457 /* We're going to work our way backwords. */ 499 /* We're going to work our way backwords. */
458 pos = bprm->p; 500 pos = bprm->p;
@@ -519,14 +561,19 @@ out:
519/* 561/*
520 * Like copy_strings, but get argv and its values from kernel memory. 562 * Like copy_strings, but get argv and its values from kernel memory.
521 */ 563 */
522int copy_strings_kernel(int argc, const char *const *argv, 564int copy_strings_kernel(int argc, const char *const *__argv,
523 struct linux_binprm *bprm) 565 struct linux_binprm *bprm)
524{ 566{
525 int r; 567 int r;
526 mm_segment_t oldfs = get_fs(); 568 mm_segment_t oldfs = get_fs();
569 struct user_arg_ptr argv = {
570 .ptr.native = (const char __user *const __user *)__argv,
571 };
572
527 set_fs(KERNEL_DS); 573 set_fs(KERNEL_DS);
528 r = copy_strings(argc, (const char __user *const __user *)argv, bprm); 574 r = copy_strings(argc, argv, bprm);
529 set_fs(oldfs); 575 set_fs(oldfs);
576
530 return r; 577 return r;
531} 578}
532EXPORT_SYMBOL(copy_strings_kernel); 579EXPORT_SYMBOL(copy_strings_kernel);
@@ -1004,6 +1051,7 @@ char *get_task_comm(char *buf, struct task_struct *tsk)
1004 task_unlock(tsk); 1051 task_unlock(tsk);
1005 return buf; 1052 return buf;
1006} 1053}
1054EXPORT_SYMBOL_GPL(get_task_comm);
1007 1055
1008void set_task_comm(struct task_struct *tsk, char *buf) 1056void set_task_comm(struct task_struct *tsk, char *buf)
1009{ 1057{
@@ -1379,10 +1427,10 @@ EXPORT_SYMBOL(search_binary_handler);
1379/* 1427/*
1380 * sys_execve() executes a new program. 1428 * sys_execve() executes a new program.
1381 */ 1429 */
1382int do_execve(const char * filename, 1430static int do_execve_common(const char *filename,
1383 const char __user *const __user *argv, 1431 struct user_arg_ptr argv,
1384 const char __user *const __user *envp, 1432 struct user_arg_ptr envp,
1385 struct pt_regs * regs) 1433 struct pt_regs *regs)
1386{ 1434{
1387 struct linux_binprm *bprm; 1435 struct linux_binprm *bprm;
1388 struct file *file; 1436 struct file *file;
@@ -1489,6 +1537,34 @@ out_ret:
1489 return retval; 1537 return retval;
1490} 1538}
1491 1539
1540int do_execve(const char *filename,
1541 const char __user *const __user *__argv,
1542 const char __user *const __user *__envp,
1543 struct pt_regs *regs)
1544{
1545 struct user_arg_ptr argv = { .ptr.native = __argv };
1546 struct user_arg_ptr envp = { .ptr.native = __envp };
1547 return do_execve_common(filename, argv, envp, regs);
1548}
1549
1550#ifdef CONFIG_COMPAT
1551int compat_do_execve(char *filename,
1552 compat_uptr_t __user *__argv,
1553 compat_uptr_t __user *__envp,
1554 struct pt_regs *regs)
1555{
1556 struct user_arg_ptr argv = {
1557 .is_compat = true,
1558 .ptr.compat = __argv,
1559 };
1560 struct user_arg_ptr envp = {
1561 .is_compat = true,
1562 .ptr.compat = __envp,
1563 };
1564 return do_execve_common(filename, argv, envp, regs);
1565}
1566#endif
1567
1492void set_binfmt(struct linux_binfmt *new) 1568void set_binfmt(struct linux_binfmt *new)
1493{ 1569{
1494 struct mm_struct *mm = current->mm; 1570 struct mm_struct *mm = current->mm;
@@ -1659,6 +1735,7 @@ static int zap_process(struct task_struct *start, int exit_code)
1659 1735
1660 t = start; 1736 t = start;
1661 do { 1737 do {
1738 task_clear_group_stop_pending(t);
1662 if (t != current && t->mm) { 1739 if (t != current && t->mm) {
1663 sigaddset(&t->pending.signal, SIGKILL); 1740 sigaddset(&t->pending.signal, SIGKILL);
1664 signal_wake_up(t, 1); 1741 signal_wake_up(t, 1);
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index ae8200f84e39..1cc7038e273d 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -151,6 +151,13 @@ static void fat_cache_add(struct inode *inode, struct fat_cache_id *new)
151 spin_unlock(&MSDOS_I(inode)->cache_lru_lock); 151 spin_unlock(&MSDOS_I(inode)->cache_lru_lock);
152 152
153 tmp = fat_cache_alloc(inode); 153 tmp = fat_cache_alloc(inode);
154 if (!tmp) {
155 spin_lock(&MSDOS_I(inode)->cache_lru_lock);
156 MSDOS_I(inode)->nr_caches--;
157 spin_unlock(&MSDOS_I(inode)->cache_lru_lock);
158 return;
159 }
160
154 spin_lock(&MSDOS_I(inode)->cache_lru_lock); 161 spin_lock(&MSDOS_I(inode)->cache_lru_lock);
155 cache = fat_cache_merge(inode, new); 162 cache = fat_cache_merge(inode, new);
156 if (cache != NULL) { 163 if (cache != NULL) {
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index ee42b9e0b16a..4ad64732cbce 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -98,7 +98,7 @@ next:
98 98
99 *bh = sb_bread(sb, phys); 99 *bh = sb_bread(sb, phys);
100 if (*bh == NULL) { 100 if (*bh == NULL) {
101 printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n", 101 fat_msg(sb, KERN_ERR, "Directory bread(block %llu) failed",
102 (llu)phys); 102 (llu)phys);
103 /* skip this block */ 103 /* skip this block */
104 *pos = (iblock + 1) << sb->s_blocksize_bits; 104 *pos = (iblock + 1) << sb->s_blocksize_bits;
@@ -136,9 +136,10 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
136 * but ignore that right now. 136 * but ignore that right now.
137 * Ahem... Stack smashing in ring 0 isn't fun. Fixed. 137 * Ahem... Stack smashing in ring 0 isn't fun. Fixed.
138 */ 138 */
139static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len, 139static int uni16_to_x8(struct super_block *sb, unsigned char *ascii,
140 int uni_xlate, struct nls_table *nls) 140 const wchar_t *uni, int len, struct nls_table *nls)
141{ 141{
142 int uni_xlate = MSDOS_SB(sb)->options.unicode_xlate;
142 const wchar_t *ip; 143 const wchar_t *ip;
143 wchar_t ec; 144 wchar_t ec;
144 unsigned char *op; 145 unsigned char *op;
@@ -166,23 +167,23 @@ static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
166 } 167 }
167 168
168 if (unlikely(*ip)) { 169 if (unlikely(*ip)) {
169 printk(KERN_WARNING "FAT: filename was truncated while " 170 fat_msg(sb, KERN_WARNING, "filename was truncated while "
170 "converting."); 171 "converting.");
171 } 172 }
172 173
173 *op = 0; 174 *op = 0;
174 return (op - ascii); 175 return (op - ascii);
175} 176}
176 177
177static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni, 178static inline int fat_uni_to_x8(struct super_block *sb, const wchar_t *uni,
178 unsigned char *buf, int size) 179 unsigned char *buf, int size)
179{ 180{
181 struct msdos_sb_info *sbi = MSDOS_SB(sb);
180 if (sbi->options.utf8) 182 if (sbi->options.utf8)
181 return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS, 183 return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS,
182 UTF16_HOST_ENDIAN, buf, size); 184 UTF16_HOST_ENDIAN, buf, size);
183 else 185 else
184 return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate, 186 return uni16_to_x8(sb, buf, uni, size, sbi->nls_io);
185 sbi->nls_io);
186} 187}
187 188
188static inline int 189static inline int
@@ -419,7 +420,7 @@ parse_record:
419 420
420 /* Compare shortname */ 421 /* Compare shortname */
421 bufuname[last_u] = 0x0000; 422 bufuname[last_u] = 0x0000;
422 len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname)); 423 len = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
423 if (fat_name_match(sbi, name, name_len, bufname, len)) 424 if (fat_name_match(sbi, name, name_len, bufname, len))
424 goto found; 425 goto found;
425 426
@@ -428,7 +429,7 @@ parse_record:
428 int size = PATH_MAX - FAT_MAX_UNI_SIZE; 429 int size = PATH_MAX - FAT_MAX_UNI_SIZE;
429 430
430 /* Compare longname */ 431 /* Compare longname */
431 len = fat_uni_to_x8(sbi, unicode, longname, size); 432 len = fat_uni_to_x8(sb, unicode, longname, size);
432 if (fat_name_match(sbi, name, name_len, longname, len)) 433 if (fat_name_match(sbi, name, name_len, longname, len))
433 goto found; 434 goto found;
434 } 435 }
@@ -545,7 +546,7 @@ parse_record:
545 if (nr_slots) { 546 if (nr_slots) {
546 void *longname = unicode + FAT_MAX_UNI_CHARS; 547 void *longname = unicode + FAT_MAX_UNI_CHARS;
547 int size = PATH_MAX - FAT_MAX_UNI_SIZE; 548 int size = PATH_MAX - FAT_MAX_UNI_SIZE;
548 int len = fat_uni_to_x8(sbi, unicode, longname, size); 549 int len = fat_uni_to_x8(sb, unicode, longname, size);
549 550
550 fill_name = longname; 551 fill_name = longname;
551 fill_len = len; 552 fill_len = len;
@@ -621,7 +622,7 @@ parse_record:
621 622
622 if (isvfat) { 623 if (isvfat) {
623 bufuname[j] = 0x0000; 624 bufuname[j] = 0x0000;
624 i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname)); 625 i = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
625 } 626 }
626 if (nr_slots) { 627 if (nr_slots) {
627 /* hack for fat_ioctl_filldir() */ 628 /* hack for fat_ioctl_filldir() */
@@ -979,6 +980,7 @@ static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
979 980
980int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo) 981int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
981{ 982{
983 struct super_block *sb = dir->i_sb;
982 struct msdos_dir_entry *de; 984 struct msdos_dir_entry *de;
983 struct buffer_head *bh; 985 struct buffer_head *bh;
984 int err = 0, nr_slots; 986 int err = 0, nr_slots;
@@ -1013,8 +1015,8 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
1013 */ 1015 */
1014 err = __fat_remove_entries(dir, sinfo->slot_off, nr_slots); 1016 err = __fat_remove_entries(dir, sinfo->slot_off, nr_slots);
1015 if (err) { 1017 if (err) {
1016 printk(KERN_WARNING 1018 fat_msg(sb, KERN_WARNING,
1017 "FAT: Couldn't remove the long name slots\n"); 1019 "Couldn't remove the long name slots");
1018 } 1020 }
1019 } 1021 }
1020 1022
@@ -1265,7 +1267,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
1265 if (sbi->fat_bits != 32) 1267 if (sbi->fat_bits != 32)
1266 goto error; 1268 goto error;
1267 } else if (MSDOS_I(dir)->i_start == 0) { 1269 } else if (MSDOS_I(dir)->i_start == 0) {
1268 printk(KERN_ERR "FAT: Corrupted directory (i_pos %lld)\n", 1270 fat_msg(sb, KERN_ERR, "Corrupted directory (i_pos %lld)",
1269 MSDOS_I(dir)->i_pos); 1271 MSDOS_I(dir)->i_pos);
1270 err = -EIO; 1272 err = -EIO;
1271 goto error; 1273 goto error;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index f50408901f7e..8276cc282dec 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -319,19 +319,20 @@ extern struct inode *fat_build_inode(struct super_block *sb,
319 struct msdos_dir_entry *de, loff_t i_pos); 319 struct msdos_dir_entry *de, loff_t i_pos);
320extern int fat_sync_inode(struct inode *inode); 320extern int fat_sync_inode(struct inode *inode);
321extern int fat_fill_super(struct super_block *sb, void *data, int silent, 321extern int fat_fill_super(struct super_block *sb, void *data, int silent,
322 const struct inode_operations *fs_dir_inode_ops, 322 int isvfat, void (*setup)(struct super_block *));
323 int isvfat, void (*setup)(struct super_block *));
324 323
325extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, 324extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
326 struct inode *i2); 325 struct inode *i2);
327/* fat/misc.c */ 326/* fat/misc.c */
328extern void 327extern void
329__fat_fs_error(struct super_block *s, int report, const char *fmt, ...) 328__fat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
329 __attribute__ ((format (printf, 3, 4))) __cold;
330#define fat_fs_error(sb, fmt, args...) \
331 __fat_fs_error(sb, 1, fmt , ## args)
332#define fat_fs_error_ratelimit(sb, fmt, args...) \
333 __fat_fs_error(sb, __ratelimit(&MSDOS_SB(sb)->ratelimit), fmt , ## args)
334void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...)
330 __attribute__ ((format (printf, 3, 4))) __cold; 335 __attribute__ ((format (printf, 3, 4))) __cold;
331#define fat_fs_error(s, fmt, args...) \
332 __fat_fs_error(s, 1, fmt , ## args)
333#define fat_fs_error_ratelimit(s, fmt, args...) \
334 __fat_fs_error(s, __ratelimit(&MSDOS_SB(s)->ratelimit), fmt , ## args)
335extern int fat_clusters_flush(struct super_block *sb); 336extern int fat_clusters_flush(struct super_block *sb);
336extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); 337extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
337extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, 338extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index b47d2c9f4fa1..2e81ac0df7e2 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -95,7 +95,7 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent,
95err_brelse: 95err_brelse:
96 brelse(bhs[0]); 96 brelse(bhs[0]);
97err: 97err:
98 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", (llu)blocknr); 98 fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr);
99 return -EIO; 99 return -EIO;
100} 100}
101 101
@@ -108,7 +108,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent,
108 fatent->fat_inode = MSDOS_SB(sb)->fat_inode; 108 fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
109 fatent->bhs[0] = sb_bread(sb, blocknr); 109 fatent->bhs[0] = sb_bread(sb, blocknr);
110 if (!fatent->bhs[0]) { 110 if (!fatent->bhs[0]) {
111 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", 111 fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)",
112 (llu)blocknr); 112 (llu)blocknr);
113 return -EIO; 113 return -EIO;
114 } 114 }
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 8d68690bdcf1..cb8d8391ac0b 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -581,7 +581,8 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
581 buf->f_bavail = sbi->free_clusters; 581 buf->f_bavail = sbi->free_clusters;
582 buf->f_fsid.val[0] = (u32)id; 582 buf->f_fsid.val[0] = (u32)id;
583 buf->f_fsid.val[1] = (u32)(id >> 32); 583 buf->f_fsid.val[1] = (u32)(id >> 32);
584 buf->f_namelen = sbi->options.isvfat ? FAT_LFN_LEN : 12; 584 buf->f_namelen =
585 (sbi->options.isvfat ? FAT_LFN_LEN : 12) * NLS_MAX_CHARSET_SIZE;
585 586
586 return 0; 587 return 0;
587} 588}
@@ -619,8 +620,8 @@ retry:
619 620
620 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); 621 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
621 if (!bh) { 622 if (!bh) {
622 printk(KERN_ERR "FAT: unable to read inode block " 623 fat_msg(sb, KERN_ERR, "unable to read inode block "
623 "for updating (i_pos %lld)\n", i_pos); 624 "for updating (i_pos %lld)", i_pos);
624 return -EIO; 625 return -EIO;
625 } 626 }
626 spin_lock(&sbi->inode_hash_lock); 627 spin_lock(&sbi->inode_hash_lock);
@@ -976,8 +977,8 @@ static const match_table_t vfat_tokens = {
976 {Opt_err, NULL} 977 {Opt_err, NULL}
977}; 978};
978 979
979static int parse_options(char *options, int is_vfat, int silent, int *debug, 980static int parse_options(struct super_block *sb, char *options, int is_vfat,
980 struct fat_mount_options *opts) 981 int silent, int *debug, struct fat_mount_options *opts)
981{ 982{
982 char *p; 983 char *p;
983 substring_t args[MAX_OPT_ARGS]; 984 substring_t args[MAX_OPT_ARGS];
@@ -1168,15 +1169,15 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1168 1169
1169 /* obsolete mount options */ 1170 /* obsolete mount options */
1170 case Opt_obsolate: 1171 case Opt_obsolate:
1171 printk(KERN_INFO "FAT: \"%s\" option is obsolete, " 1172 fat_msg(sb, KERN_INFO, "\"%s\" option is obsolete, "
1172 "not supported now\n", p); 1173 "not supported now", p);
1173 break; 1174 break;
1174 /* unknown option */ 1175 /* unknown option */
1175 default: 1176 default:
1176 if (!silent) { 1177 if (!silent) {
1177 printk(KERN_ERR 1178 fat_msg(sb, KERN_ERR,
1178 "FAT: Unrecognized mount option \"%s\" " 1179 "Unrecognized mount option \"%s\" "
1179 "or missing value\n", p); 1180 "or missing value", p);
1180 } 1181 }
1181 return -EINVAL; 1182 return -EINVAL;
1182 } 1183 }
@@ -1185,7 +1186,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1185out: 1186out:
1186 /* UTF-8 doesn't provide FAT semantics */ 1187 /* UTF-8 doesn't provide FAT semantics */
1187 if (!strcmp(opts->iocharset, "utf8")) { 1188 if (!strcmp(opts->iocharset, "utf8")) {
1188 printk(KERN_ERR "FAT: utf8 is not a recommended IO charset" 1189 fat_msg(sb, KERN_ERR, "utf8 is not a recommended IO charset"
1189 " for FAT filesystems, filesystem will be " 1190 " for FAT filesystems, filesystem will be "
1190 "case sensitive!\n"); 1191 "case sensitive!\n");
1191 } 1192 }
@@ -1238,8 +1239,7 @@ static int fat_read_root(struct inode *inode)
1238/* 1239/*
1239 * Read the super block of an MS-DOS FS. 1240 * Read the super block of an MS-DOS FS.
1240 */ 1241 */
1241int fat_fill_super(struct super_block *sb, void *data, int silent, 1242int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
1242 const struct inode_operations *fs_dir_inode_ops, int isvfat,
1243 void (*setup)(struct super_block *)) 1243 void (*setup)(struct super_block *))
1244{ 1244{
1245 struct inode *root_inode = NULL, *fat_inode = NULL; 1245 struct inode *root_inode = NULL, *fat_inode = NULL;
@@ -1268,11 +1268,10 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1268 sb->s_magic = MSDOS_SUPER_MAGIC; 1268 sb->s_magic = MSDOS_SUPER_MAGIC;
1269 sb->s_op = &fat_sops; 1269 sb->s_op = &fat_sops;
1270 sb->s_export_op = &fat_export_ops; 1270 sb->s_export_op = &fat_export_ops;
1271 sbi->dir_ops = fs_dir_inode_ops;
1272 ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL, 1271 ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL,
1273 DEFAULT_RATELIMIT_BURST); 1272 DEFAULT_RATELIMIT_BURST);
1274 1273
1275 error = parse_options(data, isvfat, silent, &debug, &sbi->options); 1274 error = parse_options(sb, data, isvfat, silent, &debug, &sbi->options);
1276 if (error) 1275 if (error)
1277 goto out_fail; 1276 goto out_fail;
1278 1277
@@ -1282,20 +1281,20 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1282 sb_min_blocksize(sb, 512); 1281 sb_min_blocksize(sb, 512);
1283 bh = sb_bread(sb, 0); 1282 bh = sb_bread(sb, 0);
1284 if (bh == NULL) { 1283 if (bh == NULL) {
1285 printk(KERN_ERR "FAT: unable to read boot sector\n"); 1284 fat_msg(sb, KERN_ERR, "unable to read boot sector");
1286 goto out_fail; 1285 goto out_fail;
1287 } 1286 }
1288 1287
1289 b = (struct fat_boot_sector *) bh->b_data; 1288 b = (struct fat_boot_sector *) bh->b_data;
1290 if (!b->reserved) { 1289 if (!b->reserved) {
1291 if (!silent) 1290 if (!silent)
1292 printk(KERN_ERR "FAT: bogus number of reserved sectors\n"); 1291 fat_msg(sb, KERN_ERR, "bogus number of reserved sectors");
1293 brelse(bh); 1292 brelse(bh);
1294 goto out_invalid; 1293 goto out_invalid;
1295 } 1294 }
1296 if (!b->fats) { 1295 if (!b->fats) {
1297 if (!silent) 1296 if (!silent)
1298 printk(KERN_ERR "FAT: bogus number of FAT structure\n"); 1297 fat_msg(sb, KERN_ERR, "bogus number of FAT structure");
1299 brelse(bh); 1298 brelse(bh);
1300 goto out_invalid; 1299 goto out_invalid;
1301 } 1300 }
@@ -1308,7 +1307,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1308 media = b->media; 1307 media = b->media;
1309 if (!fat_valid_media(media)) { 1308 if (!fat_valid_media(media)) {
1310 if (!silent) 1309 if (!silent)
1311 printk(KERN_ERR "FAT: invalid media value (0x%02x)\n", 1310 fat_msg(sb, KERN_ERR, "invalid media value (0x%02x)",
1312 media); 1311 media);
1313 brelse(bh); 1312 brelse(bh);
1314 goto out_invalid; 1313 goto out_invalid;
@@ -1318,7 +1317,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1318 || (logical_sector_size < 512) 1317 || (logical_sector_size < 512)
1319 || (logical_sector_size > 4096)) { 1318 || (logical_sector_size > 4096)) {
1320 if (!silent) 1319 if (!silent)
1321 printk(KERN_ERR "FAT: bogus logical sector size %u\n", 1320 fat_msg(sb, KERN_ERR, "bogus logical sector size %u",
1322 logical_sector_size); 1321 logical_sector_size);
1323 brelse(bh); 1322 brelse(bh);
1324 goto out_invalid; 1323 goto out_invalid;
@@ -1326,15 +1325,15 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1326 sbi->sec_per_clus = b->sec_per_clus; 1325 sbi->sec_per_clus = b->sec_per_clus;
1327 if (!is_power_of_2(sbi->sec_per_clus)) { 1326 if (!is_power_of_2(sbi->sec_per_clus)) {
1328 if (!silent) 1327 if (!silent)
1329 printk(KERN_ERR "FAT: bogus sectors per cluster %u\n", 1328 fat_msg(sb, KERN_ERR, "bogus sectors per cluster %u",
1330 sbi->sec_per_clus); 1329 sbi->sec_per_clus);
1331 brelse(bh); 1330 brelse(bh);
1332 goto out_invalid; 1331 goto out_invalid;
1333 } 1332 }
1334 1333
1335 if (logical_sector_size < sb->s_blocksize) { 1334 if (logical_sector_size < sb->s_blocksize) {
1336 printk(KERN_ERR "FAT: logical sector size too small for device" 1335 fat_msg(sb, KERN_ERR, "logical sector size too small for device"
1337 " (logical sector size = %u)\n", logical_sector_size); 1336 " (logical sector size = %u)", logical_sector_size);
1338 brelse(bh); 1337 brelse(bh);
1339 goto out_fail; 1338 goto out_fail;
1340 } 1339 }
@@ -1342,14 +1341,14 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1342 brelse(bh); 1341 brelse(bh);
1343 1342
1344 if (!sb_set_blocksize(sb, logical_sector_size)) { 1343 if (!sb_set_blocksize(sb, logical_sector_size)) {
1345 printk(KERN_ERR "FAT: unable to set blocksize %u\n", 1344 fat_msg(sb, KERN_ERR, "unable to set blocksize %u",
1346 logical_sector_size); 1345 logical_sector_size);
1347 goto out_fail; 1346 goto out_fail;
1348 } 1347 }
1349 bh = sb_bread(sb, 0); 1348 bh = sb_bread(sb, 0);
1350 if (bh == NULL) { 1349 if (bh == NULL) {
1351 printk(KERN_ERR "FAT: unable to read boot sector" 1350 fat_msg(sb, KERN_ERR, "unable to read boot sector"
1352 " (logical sector size = %lu)\n", 1351 " (logical sector size = %lu)",
1353 sb->s_blocksize); 1352 sb->s_blocksize);
1354 goto out_fail; 1353 goto out_fail;
1355 } 1354 }
@@ -1385,16 +1384,16 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1385 1384
1386 fsinfo_bh = sb_bread(sb, sbi->fsinfo_sector); 1385 fsinfo_bh = sb_bread(sb, sbi->fsinfo_sector);
1387 if (fsinfo_bh == NULL) { 1386 if (fsinfo_bh == NULL) {
1388 printk(KERN_ERR "FAT: bread failed, FSINFO block" 1387 fat_msg(sb, KERN_ERR, "bread failed, FSINFO block"
1389 " (sector = %lu)\n", sbi->fsinfo_sector); 1388 " (sector = %lu)", sbi->fsinfo_sector);
1390 brelse(bh); 1389 brelse(bh);
1391 goto out_fail; 1390 goto out_fail;
1392 } 1391 }
1393 1392
1394 fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data; 1393 fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data;
1395 if (!IS_FSINFO(fsinfo)) { 1394 if (!IS_FSINFO(fsinfo)) {
1396 printk(KERN_WARNING "FAT: Invalid FSINFO signature: " 1395 fat_msg(sb, KERN_WARNING, "Invalid FSINFO signature: "
1397 "0x%08x, 0x%08x (sector = %lu)\n", 1396 "0x%08x, 0x%08x (sector = %lu)",
1398 le32_to_cpu(fsinfo->signature1), 1397 le32_to_cpu(fsinfo->signature1),
1399 le32_to_cpu(fsinfo->signature2), 1398 le32_to_cpu(fsinfo->signature2),
1400 sbi->fsinfo_sector); 1399 sbi->fsinfo_sector);
@@ -1415,8 +1414,8 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1415 sbi->dir_entries = get_unaligned_le16(&b->dir_entries); 1414 sbi->dir_entries = get_unaligned_le16(&b->dir_entries);
1416 if (sbi->dir_entries & (sbi->dir_per_block - 1)) { 1415 if (sbi->dir_entries & (sbi->dir_per_block - 1)) {
1417 if (!silent) 1416 if (!silent)
1418 printk(KERN_ERR "FAT: bogus directroy-entries per block" 1417 fat_msg(sb, KERN_ERR, "bogus directroy-entries per block"
1419 " (%u)\n", sbi->dir_entries); 1418 " (%u)", sbi->dir_entries);
1420 brelse(bh); 1419 brelse(bh);
1421 goto out_invalid; 1420 goto out_invalid;
1422 } 1421 }
@@ -1438,7 +1437,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1438 total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT); 1437 total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT);
1439 if (total_clusters > MAX_FAT(sb)) { 1438 if (total_clusters > MAX_FAT(sb)) {
1440 if (!silent) 1439 if (!silent)
1441 printk(KERN_ERR "FAT: count of clusters too big (%u)\n", 1440 fat_msg(sb, KERN_ERR, "count of clusters too big (%u)",
1442 total_clusters); 1441 total_clusters);
1443 brelse(bh); 1442 brelse(bh);
1444 goto out_invalid; 1443 goto out_invalid;
@@ -1471,7 +1470,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1471 sprintf(buf, "cp%d", sbi->options.codepage); 1470 sprintf(buf, "cp%d", sbi->options.codepage);
1472 sbi->nls_disk = load_nls(buf); 1471 sbi->nls_disk = load_nls(buf);
1473 if (!sbi->nls_disk) { 1472 if (!sbi->nls_disk) {
1474 printk(KERN_ERR "FAT: codepage %s not found\n", buf); 1473 fat_msg(sb, KERN_ERR, "codepage %s not found", buf);
1475 goto out_fail; 1474 goto out_fail;
1476 } 1475 }
1477 1476
@@ -1479,7 +1478,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1479 if (sbi->options.isvfat) { 1478 if (sbi->options.isvfat) {
1480 sbi->nls_io = load_nls(sbi->options.iocharset); 1479 sbi->nls_io = load_nls(sbi->options.iocharset);
1481 if (!sbi->nls_io) { 1480 if (!sbi->nls_io) {
1482 printk(KERN_ERR "FAT: IO charset %s not found\n", 1481 fat_msg(sb, KERN_ERR, "IO charset %s not found",
1483 sbi->options.iocharset); 1482 sbi->options.iocharset);
1484 goto out_fail; 1483 goto out_fail;
1485 } 1484 }
@@ -1503,7 +1502,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1503 insert_inode_hash(root_inode); 1502 insert_inode_hash(root_inode);
1504 sb->s_root = d_alloc_root(root_inode); 1503 sb->s_root = d_alloc_root(root_inode);
1505 if (!sb->s_root) { 1504 if (!sb->s_root) {
1506 printk(KERN_ERR "FAT: get root inode failed\n"); 1505 fat_msg(sb, KERN_ERR, "get root inode failed");
1507 goto out_fail; 1506 goto out_fail;
1508 } 1507 }
1509 1508
@@ -1512,8 +1511,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
1512out_invalid: 1511out_invalid:
1513 error = -EINVAL; 1512 error = -EINVAL;
1514 if (!silent) 1513 if (!silent)
1515 printk(KERN_INFO "VFS: Can't find a valid FAT filesystem" 1514 fat_msg(sb, KERN_INFO, "Can't find a valid FAT filesystem");
1516 " on dev %s.\n", sb->s_id);
1517 1515
1518out_fail: 1516out_fail:
1519 if (fat_inode) 1517 if (fat_inode)
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 970e682ea754..6d93360ca0cc 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -20,30 +20,46 @@
20 * In case the file system is remounted read-only, it can be made writable 20 * In case the file system is remounted read-only, it can be made writable
21 * again by remounting it. 21 * again by remounting it.
22 */ 22 */
23void __fat_fs_error(struct super_block *s, int report, const char *fmt, ...) 23void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...)
24{ 24{
25 struct fat_mount_options *opts = &MSDOS_SB(s)->options; 25 struct fat_mount_options *opts = &MSDOS_SB(sb)->options;
26 va_list args; 26 va_list args;
27 struct va_format vaf;
27 28
28 if (report) { 29 if (report) {
29 printk(KERN_ERR "FAT: Filesystem error (dev %s)\n", s->s_id);
30
31 printk(KERN_ERR " ");
32 va_start(args, fmt); 30 va_start(args, fmt);
33 vprintk(fmt, args); 31 vaf.fmt = fmt;
32 vaf.va = &args;
33 printk(KERN_ERR "FAT-fs (%s): error, %pV\n", sb->s_id, &vaf);
34 va_end(args); 34 va_end(args);
35 printk("\n");
36 } 35 }
37 36
38 if (opts->errors == FAT_ERRORS_PANIC) 37 if (opts->errors == FAT_ERRORS_PANIC)
39 panic("FAT: fs panic from previous error\n"); 38 panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id);
40 else if (opts->errors == FAT_ERRORS_RO && !(s->s_flags & MS_RDONLY)) { 39 else if (opts->errors == FAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) {
41 s->s_flags |= MS_RDONLY; 40 sb->s_flags |= MS_RDONLY;
42 printk(KERN_ERR "FAT: Filesystem has been set read-only\n"); 41 printk(KERN_ERR "FAT-fs (%s): Filesystem has been "
42 "set read-only\n", sb->s_id);
43 } 43 }
44} 44}
45EXPORT_SYMBOL_GPL(__fat_fs_error); 45EXPORT_SYMBOL_GPL(__fat_fs_error);
46 46
47/**
48 * fat_msg() - print preformated FAT specific messages. Every thing what is
49 * not fat_fs_error() should be fat_msg().
50 */
51void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...)
52{
53 struct va_format vaf;
54 va_list args;
55
56 va_start(args, fmt);
57 vaf.fmt = fmt;
58 vaf.va = &args;
59 printk("%sFAT-fs (%s): %pV\n", level, sb->s_id, &vaf);
60 va_end(args);
61}
62
47/* Flushes the number of free clusters on FAT32 */ 63/* Flushes the number of free clusters on FAT32 */
48/* XXX: Need to write one per FSINFO block. Currently only writes 1 */ 64/* XXX: Need to write one per FSINFO block. Currently only writes 1 */
49int fat_clusters_flush(struct super_block *sb) 65int fat_clusters_flush(struct super_block *sb)
@@ -57,15 +73,15 @@ int fat_clusters_flush(struct super_block *sb)
57 73
58 bh = sb_bread(sb, sbi->fsinfo_sector); 74 bh = sb_bread(sb, sbi->fsinfo_sector);
59 if (bh == NULL) { 75 if (bh == NULL) {
60 printk(KERN_ERR "FAT: bread failed in fat_clusters_flush\n"); 76 fat_msg(sb, KERN_ERR, "bread failed in fat_clusters_flush");
61 return -EIO; 77 return -EIO;
62 } 78 }
63 79
64 fsinfo = (struct fat_boot_fsinfo *)bh->b_data; 80 fsinfo = (struct fat_boot_fsinfo *)bh->b_data;
65 /* Sanity check */ 81 /* Sanity check */
66 if (!IS_FSINFO(fsinfo)) { 82 if (!IS_FSINFO(fsinfo)) {
67 printk(KERN_ERR "FAT: Invalid FSINFO signature: " 83 fat_msg(sb, KERN_ERR, "Invalid FSINFO signature: "
68 "0x%08x, 0x%08x (sector = %lu)\n", 84 "0x%08x, 0x%08x (sector = %lu)",
69 le32_to_cpu(fsinfo->signature1), 85 le32_to_cpu(fsinfo->signature1),
70 le32_to_cpu(fsinfo->signature2), 86 le32_to_cpu(fsinfo->signature2),
71 sbi->fsinfo_sector); 87 sbi->fsinfo_sector);
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 711499040eb6..3b222dafd15b 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -659,14 +659,14 @@ static const struct inode_operations msdos_dir_inode_operations = {
659 659
660static void setup(struct super_block *sb) 660static void setup(struct super_block *sb)
661{ 661{
662 MSDOS_SB(sb)->dir_ops = &msdos_dir_inode_operations;
662 sb->s_d_op = &msdos_dentry_operations; 663 sb->s_d_op = &msdos_dentry_operations;
663 sb->s_flags |= MS_NOATIME; 664 sb->s_flags |= MS_NOATIME;
664} 665}
665 666
666static int msdos_fill_super(struct super_block *sb, void *data, int silent) 667static int msdos_fill_super(struct super_block *sb, void *data, int silent)
667{ 668{
668 return fat_fill_super(sb, data, silent, &msdos_dir_inode_operations, 669 return fat_fill_super(sb, data, silent, 0, setup);
669 0, setup);
670} 670}
671 671
672static struct dentry *msdos_mount(struct file_system_type *fs_type, 672static struct dentry *msdos_mount(struct file_system_type *fs_type,
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index adae3fb7451a..20b4ea53fdc4 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1065,6 +1065,7 @@ static const struct inode_operations vfat_dir_inode_operations = {
1065 1065
1066static void setup(struct super_block *sb) 1066static void setup(struct super_block *sb)
1067{ 1067{
1068 MSDOS_SB(sb)->dir_ops = &vfat_dir_inode_operations;
1068 if (MSDOS_SB(sb)->options.name_check != 's') 1069 if (MSDOS_SB(sb)->options.name_check != 's')
1069 sb->s_d_op = &vfat_ci_dentry_ops; 1070 sb->s_d_op = &vfat_ci_dentry_ops;
1070 else 1071 else
@@ -1073,8 +1074,7 @@ static void setup(struct super_block *sb)
1073 1074
1074static int vfat_fill_super(struct super_block *sb, void *data, int silent) 1075static int vfat_fill_super(struct super_block *sb, void *data, int silent)
1075{ 1076{
1076 return fat_fill_super(sb, data, silent, &vfat_dir_inode_operations, 1077 return fat_fill_super(sb, data, silent, 1, setup);
1077 1, setup);
1078} 1078}
1079 1079
1080static struct dentry *vfat_mount(struct file_system_type *fs_type, 1080static struct dentry *vfat_mount(struct file_system_type *fs_type,
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 2ba6719ac612..1a4311437a8b 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -272,7 +272,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip)
272 * *ip: VFS inode 272 * *ip: VFS inode
273 * 273 *
274 * Description: 274 * Description:
275 * vxfs_put_fake_inode frees all data asssociated with @ip. 275 * vxfs_put_fake_inode frees all data associated with @ip.
276 */ 276 */
277void 277void
278vxfs_put_fake_inode(struct inode *ip) 278vxfs_put_fake_inode(struct inode *ip)
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index f3d23ef4e876..86128202384f 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,9 +1,9 @@
1ccflags-y := -I$(src) 1ccflags-y := -I$(src)
2obj-$(CONFIG_GFS2_FS) += gfs2.o 2obj-$(CONFIG_GFS2_FS) += gfs2.o
3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \ 3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
4 glops.o inode.o log.o lops.o main.o meta_io.o \ 4 glops.o log.o lops.o main.o meta_io.o \
5 aops.o dentry.o export.o file.o \ 5 aops.o dentry.o export.o file.o \
6 ops_fstype.o ops_inode.o quota.o \ 6 ops_fstype.o inode.o quota.o \
7 recovery.o rgrp.o super.o sys.o trans.o util.o 7 recovery.o rgrp.o super.o sys.o trans.o util.o
8 8
9gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o 9gfs2-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 0f5c4f9d5d62..802ac5eeba28 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1076,8 +1076,8 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1076 bd = bh->b_private; 1076 bd = bh->b_private;
1077 if (bd && bd->bd_ail) 1077 if (bd && bd->bd_ail)
1078 goto cannot_release; 1078 goto cannot_release;
1079 gfs2_assert_warn(sdp, !buffer_pinned(bh)); 1079 if (buffer_pinned(bh) || buffer_dirty(bh))
1080 gfs2_assert_warn(sdp, !buffer_dirty(bh)); 1080 goto not_possible;
1081 bh = bh->b_this_page; 1081 bh = bh->b_this_page;
1082 } while(bh != head); 1082 } while(bh != head);
1083 gfs2_log_unlock(sdp); 1083 gfs2_log_unlock(sdp);
@@ -1107,6 +1107,10 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
1107 } while (bh != head); 1107 } while (bh != head);
1108 1108
1109 return try_to_free_buffers(page); 1109 return try_to_free_buffers(page);
1110
1111not_possible: /* Should never happen */
1112 WARN_ON(buffer_dirty(bh));
1113 WARN_ON(buffer_pinned(bh));
1110cannot_release: 1114cannot_release:
1111 gfs2_log_unlock(sdp); 1115 gfs2_log_unlock(sdp);
1112 return 0; 1116 return 0;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 74add2ddcc3f..e65493a8ac00 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -780,6 +780,8 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
780 metadata = (height != ip->i_height - 1); 780 metadata = (height != ip->i_height - 1);
781 if (metadata) 781 if (metadata)
782 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; 782 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
783 else if (ip->i_depth)
784 revokes = sdp->sd_inptrs;
783 785
784 if (ip != GFS2_I(sdp->sd_rindex)) 786 if (ip != GFS2_I(sdp->sd_rindex))
785 error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); 787 error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index f789c5732b7c..091ee4779538 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -82,12 +82,9 @@
82struct qstr gfs2_qdot __read_mostly; 82struct qstr gfs2_qdot __read_mostly;
83struct qstr gfs2_qdotdot __read_mostly; 83struct qstr gfs2_qdotdot __read_mostly;
84 84
85typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len,
86 u64 leaf_no, void *data);
87typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, 85typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent,
88 const struct qstr *name, void *opaque); 86 const struct qstr *name, void *opaque);
89 87
90
91int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, 88int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
92 struct buffer_head **bhp) 89 struct buffer_head **bhp)
93{ 90{
@@ -1600,7 +1597,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
1600 */ 1597 */
1601 1598
1602int gfs2_dir_add(struct inode *inode, const struct qstr *name, 1599int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1603 const struct gfs2_inode *nip, unsigned type) 1600 const struct gfs2_inode *nip)
1604{ 1601{
1605 struct gfs2_inode *ip = GFS2_I(inode); 1602 struct gfs2_inode *ip = GFS2_I(inode);
1606 struct buffer_head *bh; 1603 struct buffer_head *bh;
@@ -1616,7 +1613,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1616 return PTR_ERR(dent); 1613 return PTR_ERR(dent);
1617 dent = gfs2_init_dirent(inode, dent, name, bh); 1614 dent = gfs2_init_dirent(inode, dent, name, bh);
1618 gfs2_inum_out(nip, dent); 1615 gfs2_inum_out(nip, dent);
1619 dent->de_type = cpu_to_be16(type); 1616 dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode));
1620 if (ip->i_diskflags & GFS2_DIF_EXHASH) { 1617 if (ip->i_diskflags & GFS2_DIF_EXHASH) {
1621 leaf = (struct gfs2_leaf *)bh->b_data; 1618 leaf = (struct gfs2_leaf *)bh->b_data;
1622 be16_add_cpu(&leaf->lf_entries, 1); 1619 be16_add_cpu(&leaf->lf_entries, 1);
@@ -1628,6 +1625,8 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1628 gfs2_trans_add_bh(ip->i_gl, bh, 1); 1625 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1629 ip->i_entries++; 1626 ip->i_entries++;
1630 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1627 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1628 if (S_ISDIR(nip->i_inode.i_mode))
1629 inc_nlink(&ip->i_inode);
1631 gfs2_dinode_out(ip, bh->b_data); 1630 gfs2_dinode_out(ip, bh->b_data);
1632 brelse(bh); 1631 brelse(bh);
1633 error = 0; 1632 error = 0;
@@ -1672,8 +1671,9 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
1672 * Returns: 0 on success, error code on failure 1671 * Returns: 0 on success, error code on failure
1673 */ 1672 */
1674 1673
1675int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name) 1674int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
1676{ 1675{
1676 const struct qstr *name = &dentry->d_name;
1677 struct gfs2_dirent *dent, *prev = NULL; 1677 struct gfs2_dirent *dent, *prev = NULL;
1678 struct buffer_head *bh; 1678 struct buffer_head *bh;
1679 int error; 1679 int error;
@@ -1714,6 +1714,8 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
1714 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1714 gfs2_trans_add_bh(dip->i_gl, bh, 1);
1715 dip->i_entries--; 1715 dip->i_entries--;
1716 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME; 1716 dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
1717 if (S_ISDIR(dentry->d_inode->i_mode))
1718 drop_nlink(&dip->i_inode);
1717 gfs2_dinode_out(dip, bh->b_data); 1719 gfs2_dinode_out(dip, bh->b_data);
1718 brelse(bh); 1720 brelse(bh);
1719 mark_inode_dirty(&dip->i_inode); 1721 mark_inode_dirty(&dip->i_inode);
@@ -1768,94 +1770,20 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
1768} 1770}
1769 1771
1770/** 1772/**
1771 * foreach_leaf - call a function for each leaf in a directory
1772 * @dip: the directory
1773 * @lc: the function to call for each each
1774 * @data: private data to pass to it
1775 *
1776 * Returns: errno
1777 */
1778
1779static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
1780{
1781 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1782 struct buffer_head *bh;
1783 struct gfs2_leaf *leaf;
1784 u32 hsize, len;
1785 u32 ht_offset, lp_offset, ht_offset_cur = -1;
1786 u32 index = 0;
1787 __be64 *lp;
1788 u64 leaf_no;
1789 int error = 0;
1790
1791 hsize = 1 << dip->i_depth;
1792 if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
1793 gfs2_consist_inode(dip);
1794 return -EIO;
1795 }
1796
1797 lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
1798 if (!lp)
1799 return -ENOMEM;
1800
1801 while (index < hsize) {
1802 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1803 ht_offset = index - lp_offset;
1804
1805 if (ht_offset_cur != ht_offset) {
1806 error = gfs2_dir_read_data(dip, (char *)lp,
1807 ht_offset * sizeof(__be64),
1808 sdp->sd_hash_bsize, 1);
1809 if (error != sdp->sd_hash_bsize) {
1810 if (error >= 0)
1811 error = -EIO;
1812 goto out;
1813 }
1814 ht_offset_cur = ht_offset;
1815 }
1816
1817 leaf_no = be64_to_cpu(lp[lp_offset]);
1818 if (leaf_no) {
1819 error = get_leaf(dip, leaf_no, &bh);
1820 if (error)
1821 goto out;
1822 leaf = (struct gfs2_leaf *)bh->b_data;
1823 len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
1824 brelse(bh);
1825
1826 error = lc(dip, index, len, leaf_no, data);
1827 if (error)
1828 goto out;
1829
1830 index = (index & ~(len - 1)) + len;
1831 } else
1832 index++;
1833 }
1834
1835 if (index != hsize) {
1836 gfs2_consist_inode(dip);
1837 error = -EIO;
1838 }
1839
1840out:
1841 kfree(lp);
1842
1843 return error;
1844}
1845
1846/**
1847 * leaf_dealloc - Deallocate a directory leaf 1773 * leaf_dealloc - Deallocate a directory leaf
1848 * @dip: the directory 1774 * @dip: the directory
1849 * @index: the hash table offset in the directory 1775 * @index: the hash table offset in the directory
1850 * @len: the number of pointers to this leaf 1776 * @len: the number of pointers to this leaf
1851 * @leaf_no: the leaf number 1777 * @leaf_no: the leaf number
1852 * @data: not used 1778 * @leaf_bh: buffer_head for the starting leaf
1779 * last_dealloc: 1 if this is the final dealloc for the leaf, else 0
1853 * 1780 *
1854 * Returns: errno 1781 * Returns: errno
1855 */ 1782 */
1856 1783
1857static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, 1784static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1858 u64 leaf_no, void *data) 1785 u64 leaf_no, struct buffer_head *leaf_bh,
1786 int last_dealloc)
1859{ 1787{
1860 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1788 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1861 struct gfs2_leaf *tmp_leaf; 1789 struct gfs2_leaf *tmp_leaf;
@@ -1887,14 +1815,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1887 goto out_qs; 1815 goto out_qs;
1888 1816
1889 /* Count the number of leaves */ 1817 /* Count the number of leaves */
1818 bh = leaf_bh;
1890 1819
1891 for (blk = leaf_no; blk; blk = nblk) { 1820 for (blk = leaf_no; blk; blk = nblk) {
1892 error = get_leaf(dip, blk, &bh); 1821 if (blk != leaf_no) {
1893 if (error) 1822 error = get_leaf(dip, blk, &bh);
1894 goto out_rlist; 1823 if (error)
1824 goto out_rlist;
1825 }
1895 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1826 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1896 nblk = be64_to_cpu(tmp_leaf->lf_next); 1827 nblk = be64_to_cpu(tmp_leaf->lf_next);
1897 brelse(bh); 1828 if (blk != leaf_no)
1829 brelse(bh);
1898 1830
1899 gfs2_rlist_add(sdp, &rlist, blk); 1831 gfs2_rlist_add(sdp, &rlist, blk);
1900 l_blocks++; 1832 l_blocks++;
@@ -1918,13 +1850,18 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1918 if (error) 1850 if (error)
1919 goto out_rg_gunlock; 1851 goto out_rg_gunlock;
1920 1852
1853 bh = leaf_bh;
1854
1921 for (blk = leaf_no; blk; blk = nblk) { 1855 for (blk = leaf_no; blk; blk = nblk) {
1922 error = get_leaf(dip, blk, &bh); 1856 if (blk != leaf_no) {
1923 if (error) 1857 error = get_leaf(dip, blk, &bh);
1924 goto out_end_trans; 1858 if (error)
1859 goto out_end_trans;
1860 }
1925 tmp_leaf = (struct gfs2_leaf *)bh->b_data; 1861 tmp_leaf = (struct gfs2_leaf *)bh->b_data;
1926 nblk = be64_to_cpu(tmp_leaf->lf_next); 1862 nblk = be64_to_cpu(tmp_leaf->lf_next);
1927 brelse(bh); 1863 if (blk != leaf_no)
1864 brelse(bh);
1928 1865
1929 gfs2_free_meta(dip, blk, 1); 1866 gfs2_free_meta(dip, blk, 1);
1930 gfs2_add_inode_blocks(&dip->i_inode, -1); 1867 gfs2_add_inode_blocks(&dip->i_inode, -1);
@@ -1942,6 +1879,10 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1942 goto out_end_trans; 1879 goto out_end_trans;
1943 1880
1944 gfs2_trans_add_bh(dip->i_gl, dibh, 1); 1881 gfs2_trans_add_bh(dip->i_gl, dibh, 1);
1882 /* On the last dealloc, make this a regular file in case we crash.
1883 (We don't want to free these blocks a second time.) */
1884 if (last_dealloc)
1885 dip->i_inode.i_mode = S_IFREG;
1945 gfs2_dinode_out(dip, dibh->b_data); 1886 gfs2_dinode_out(dip, dibh->b_data);
1946 brelse(dibh); 1887 brelse(dibh);
1947 1888
@@ -1975,29 +1916,67 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
1975{ 1916{
1976 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1917 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1977 struct buffer_head *bh; 1918 struct buffer_head *bh;
1978 int error; 1919 struct gfs2_leaf *leaf;
1920 u32 hsize, len;
1921 u32 ht_offset, lp_offset, ht_offset_cur = -1;
1922 u32 index = 0, next_index;
1923 __be64 *lp;
1924 u64 leaf_no;
1925 int error = 0, last;
1979 1926
1980 /* Dealloc on-disk leaves to FREEMETA state */ 1927 hsize = 1 << dip->i_depth;
1981 error = foreach_leaf(dip, leaf_dealloc, NULL); 1928 if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
1982 if (error) 1929 gfs2_consist_inode(dip);
1983 return error; 1930 return -EIO;
1931 }
1984 1932
1985 /* Make this a regular file in case we crash. 1933 lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
1986 (We don't want to free these blocks a second time.) */ 1934 if (!lp)
1935 return -ENOMEM;
1987 1936
1988 error = gfs2_trans_begin(sdp, RES_DINODE, 0); 1937 while (index < hsize) {
1989 if (error) 1938 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1990 return error; 1939 ht_offset = index - lp_offset;
1991 1940
1992 error = gfs2_meta_inode_buffer(dip, &bh); 1941 if (ht_offset_cur != ht_offset) {
1993 if (!error) { 1942 error = gfs2_dir_read_data(dip, (char *)lp,
1994 gfs2_trans_add_bh(dip->i_gl, bh, 1); 1943 ht_offset * sizeof(__be64),
1995 ((struct gfs2_dinode *)bh->b_data)->di_mode = 1944 sdp->sd_hash_bsize, 1);
1996 cpu_to_be32(S_IFREG); 1945 if (error != sdp->sd_hash_bsize) {
1997 brelse(bh); 1946 if (error >= 0)
1947 error = -EIO;
1948 goto out;
1949 }
1950 ht_offset_cur = ht_offset;
1951 }
1952
1953 leaf_no = be64_to_cpu(lp[lp_offset]);
1954 if (leaf_no) {
1955 error = get_leaf(dip, leaf_no, &bh);
1956 if (error)
1957 goto out;
1958 leaf = (struct gfs2_leaf *)bh->b_data;
1959 len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
1960
1961 next_index = (index & ~(len - 1)) + len;
1962 last = ((next_index >= hsize) ? 1 : 0);
1963 error = leaf_dealloc(dip, index, len, leaf_no, bh,
1964 last);
1965 brelse(bh);
1966 if (error)
1967 goto out;
1968 index = next_index;
1969 } else
1970 index++;
1998 } 1971 }
1999 1972
2000 gfs2_trans_end(sdp); 1973 if (index != hsize) {
1974 gfs2_consist_inode(dip);
1975 error = -EIO;
1976 }
1977
1978out:
1979 kfree(lp);
2001 1980
2002 return error; 1981 return error;
2003} 1982}
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index a98f644bd3df..e686af11becd 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -22,8 +22,8 @@ extern struct inode *gfs2_dir_search(struct inode *dir,
22extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, 22extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
23 const struct gfs2_inode *ip); 23 const struct gfs2_inode *ip);
24extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, 24extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
25 const struct gfs2_inode *ip, unsigned int type); 25 const struct gfs2_inode *ip);
26extern int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); 26extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
27extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, 27extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
28 filldir_t filldir); 28 filldir_t filldir);
29extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, 29extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index b5a5e60df0d5..fe9945f2ff72 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -139,7 +139,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
139 struct gfs2_sbd *sdp = sb->s_fs_info; 139 struct gfs2_sbd *sdp = sb->s_fs_info;
140 struct inode *inode; 140 struct inode *inode;
141 141
142 inode = gfs2_ilookup(sb, inum->no_addr); 142 inode = gfs2_ilookup(sb, inum->no_addr, 0);
143 if (inode) { 143 if (inode) {
144 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { 144 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
145 iput(inode); 145 iput(inode);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index e48310885c48..a9f5cbe45cd9 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -545,18 +545,10 @@ static int gfs2_close(struct inode *inode, struct file *file)
545/** 545/**
546 * gfs2_fsync - sync the dirty data for a file (across the cluster) 546 * gfs2_fsync - sync the dirty data for a file (across the cluster)
547 * @file: the file that points to the dentry (we ignore this) 547 * @file: the file that points to the dentry (we ignore this)
548 * @dentry: the dentry that points to the inode to sync 548 * @datasync: set if we can ignore timestamp changes
549 * 549 *
550 * The VFS will flush "normal" data for us. We only need to worry 550 * The VFS will flush data for us. We only need to worry
551 * about metadata here. For journaled data, we just do a log flush 551 * about metadata here.
552 * as we can't avoid it. Otherwise we can just bale out if datasync
553 * is set. For stuffed inodes we must flush the log in order to
554 * ensure that all data is on disk.
555 *
556 * The call to write_inode_now() is there to write back metadata and
557 * the inode itself. It does also try and write the data, but thats
558 * (hopefully) a no-op due to the VFS having already called filemap_fdatawrite()
559 * for us.
560 * 552 *
561 * Returns: errno 553 * Returns: errno
562 */ 554 */
@@ -565,22 +557,20 @@ static int gfs2_fsync(struct file *file, int datasync)
565{ 557{
566 struct inode *inode = file->f_mapping->host; 558 struct inode *inode = file->f_mapping->host;
567 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); 559 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
568 int ret = 0; 560 struct gfs2_inode *ip = GFS2_I(inode);
569 561 int ret;
570 if (gfs2_is_jdata(GFS2_I(inode))) {
571 gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
572 return 0;
573 }
574 562
575 if (sync_state != 0) { 563 if (datasync)
576 if (!datasync) 564 sync_state &= ~I_DIRTY_SYNC;
577 ret = write_inode_now(inode, 0);
578 565
579 if (gfs2_is_stuffed(GFS2_I(inode))) 566 if (sync_state) {
580 gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl); 567 ret = sync_inode_metadata(inode, 1);
568 if (ret)
569 return ret;
570 gfs2_ail_flush(ip->i_gl);
581 } 571 }
582 572
583 return ret; 573 return 0;
584} 574}
585 575
586/** 576/**
@@ -826,6 +816,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
826 loff_t bytes, max_bytes; 816 loff_t bytes, max_bytes;
827 struct gfs2_alloc *al; 817 struct gfs2_alloc *al;
828 int error; 818 int error;
819 loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
829 loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; 820 loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
830 next = (next + 1) << sdp->sd_sb.sb_bsize_shift; 821 next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
831 822
@@ -833,13 +824,15 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
833 if (mode & ~FALLOC_FL_KEEP_SIZE) 824 if (mode & ~FALLOC_FL_KEEP_SIZE)
834 return -EOPNOTSUPP; 825 return -EOPNOTSUPP;
835 826
836 offset = (offset >> sdp->sd_sb.sb_bsize_shift) << 827 offset &= bsize_mask;
837 sdp->sd_sb.sb_bsize_shift;
838 828
839 len = next - offset; 829 len = next - offset;
840 bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; 830 bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
841 if (!bytes) 831 if (!bytes)
842 bytes = UINT_MAX; 832 bytes = UINT_MAX;
833 bytes &= bsize_mask;
834 if (bytes == 0)
835 bytes = sdp->sd_sb.sb_bsize;
843 836
844 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); 837 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
845 error = gfs2_glock_nq(&ip->i_gh); 838 error = gfs2_glock_nq(&ip->i_gh);
@@ -870,6 +863,9 @@ retry:
870 if (error) { 863 if (error) {
871 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { 864 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
872 bytes >>= 1; 865 bytes >>= 1;
866 bytes &= bsize_mask;
867 if (bytes == 0)
868 bytes = sdp->sd_sb.sb_bsize;
873 goto retry; 869 goto retry;
874 } 870 }
875 goto out_qunlock; 871 goto out_qunlock;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 7a4fb630a320..a2a6abbccc07 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -143,14 +143,9 @@ static int demote_ok(const struct gfs2_glock *gl)
143{ 143{
144 const struct gfs2_glock_operations *glops = gl->gl_ops; 144 const struct gfs2_glock_operations *glops = gl->gl_ops;
145 145
146 /* assert_spin_locked(&gl->gl_spin); */
147
148 if (gl->gl_state == LM_ST_UNLOCKED) 146 if (gl->gl_state == LM_ST_UNLOCKED)
149 return 0; 147 return 0;
150 if (test_bit(GLF_LFLUSH, &gl->gl_flags)) 148 if (!list_empty(&gl->gl_holders))
151 return 0;
152 if ((gl->gl_name.ln_type != LM_TYPE_INODE) &&
153 !list_empty(&gl->gl_holders))
154 return 0; 149 return 0;
155 if (glops->go_demote_ok) 150 if (glops->go_demote_ok)
156 return glops->go_demote_ok(gl); 151 return glops->go_demote_ok(gl);
@@ -158,6 +153,31 @@ static int demote_ok(const struct gfs2_glock *gl)
158} 153}
159 154
160 155
156void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
157{
158 spin_lock(&lru_lock);
159
160 if (!list_empty(&gl->gl_lru))
161 list_del_init(&gl->gl_lru);
162 else
163 atomic_inc(&lru_count);
164
165 list_add_tail(&gl->gl_lru, &lru_list);
166 set_bit(GLF_LRU, &gl->gl_flags);
167 spin_unlock(&lru_lock);
168}
169
170static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
171{
172 spin_lock(&lru_lock);
173 if (!list_empty(&gl->gl_lru)) {
174 list_del_init(&gl->gl_lru);
175 atomic_dec(&lru_count);
176 clear_bit(GLF_LRU, &gl->gl_flags);
177 }
178 spin_unlock(&lru_lock);
179}
180
161/** 181/**
162 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list 182 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
163 * @gl: the glock 183 * @gl: the glock
@@ -168,24 +188,8 @@ static int demote_ok(const struct gfs2_glock *gl)
168 188
169static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) 189static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
170{ 190{
171 if (demote_ok(gl)) { 191 if (demote_ok(gl))
172 spin_lock(&lru_lock); 192 gfs2_glock_add_to_lru(gl);
173
174 if (!list_empty(&gl->gl_lru))
175 list_del_init(&gl->gl_lru);
176 else
177 atomic_inc(&lru_count);
178
179 list_add_tail(&gl->gl_lru, &lru_list);
180 spin_unlock(&lru_lock);
181 }
182}
183
184void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
185{
186 spin_lock(&gl->gl_spin);
187 __gfs2_glock_schedule_for_reclaim(gl);
188 spin_unlock(&gl->gl_spin);
189} 193}
190 194
191/** 195/**
@@ -217,12 +221,7 @@ void gfs2_glock_put(struct gfs2_glock *gl)
217 spin_lock_bucket(gl->gl_hash); 221 spin_lock_bucket(gl->gl_hash);
218 hlist_bl_del_rcu(&gl->gl_list); 222 hlist_bl_del_rcu(&gl->gl_list);
219 spin_unlock_bucket(gl->gl_hash); 223 spin_unlock_bucket(gl->gl_hash);
220 spin_lock(&lru_lock); 224 gfs2_glock_remove_from_lru(gl);
221 if (!list_empty(&gl->gl_lru)) {
222 list_del_init(&gl->gl_lru);
223 atomic_dec(&lru_count);
224 }
225 spin_unlock(&lru_lock);
226 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 225 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
227 GLOCK_BUG_ON(gl, mapping && mapping->nrpages); 226 GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
228 trace_gfs2_glock_put(gl); 227 trace_gfs2_glock_put(gl);
@@ -542,11 +541,6 @@ __acquires(&gl->gl_spin)
542 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); 541 clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
543 542
544 gfs2_glock_hold(gl); 543 gfs2_glock_hold(gl);
545 if (target != LM_ST_UNLOCKED && (gl->gl_state == LM_ST_SHARED ||
546 gl->gl_state == LM_ST_DEFERRED) &&
547 !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
548 lck_flags |= LM_FLAG_TRY_1CB;
549
550 if (sdp->sd_lockstruct.ls_ops->lm_lock) { 544 if (sdp->sd_lockstruct.ls_ops->lm_lock) {
551 /* lock_dlm */ 545 /* lock_dlm */
552 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); 546 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
@@ -648,7 +642,7 @@ static void delete_work_func(struct work_struct *work)
648 /* Note: Unsafe to dereference ip as we don't hold right refs/locks */ 642 /* Note: Unsafe to dereference ip as we don't hold right refs/locks */
649 643
650 if (ip) 644 if (ip)
651 inode = gfs2_ilookup(sdp->sd_vfs, no_addr); 645 inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1);
652 else 646 else
653 inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED); 647 inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
654 if (inode && !IS_ERR(inode)) { 648 if (inode && !IS_ERR(inode)) {
@@ -1025,6 +1019,9 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
1025 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 1019 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
1026 return -EIO; 1020 return -EIO;
1027 1021
1022 if (test_bit(GLF_LRU, &gl->gl_flags))
1023 gfs2_glock_remove_from_lru(gl);
1024
1028 spin_lock(&gl->gl_spin); 1025 spin_lock(&gl->gl_spin);
1029 add_to_queue(gh); 1026 add_to_queue(gh);
1030 if ((LM_FLAG_NOEXP & gh->gh_flags) && 1027 if ((LM_FLAG_NOEXP & gh->gh_flags) &&
@@ -1082,7 +1079,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1082 !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1079 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1083 fast_path = 1; 1080 fast_path = 1;
1084 } 1081 }
1085 __gfs2_glock_schedule_for_reclaim(gl); 1082 if (!test_bit(GLF_LFLUSH, &gl->gl_flags))
1083 __gfs2_glock_schedule_for_reclaim(gl);
1086 trace_gfs2_glock_queue(gh, 0); 1084 trace_gfs2_glock_queue(gh, 0);
1087 spin_unlock(&gl->gl_spin); 1085 spin_unlock(&gl->gl_spin);
1088 if (likely(fast_path)) 1086 if (likely(fast_path))
@@ -1365,6 +1363,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
1365 while(nr && !list_empty(&lru_list)) { 1363 while(nr && !list_empty(&lru_list)) {
1366 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru); 1364 gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
1367 list_del_init(&gl->gl_lru); 1365 list_del_init(&gl->gl_lru);
1366 clear_bit(GLF_LRU, &gl->gl_flags);
1368 atomic_dec(&lru_count); 1367 atomic_dec(&lru_count);
1369 1368
1370 /* Test for being demotable */ 1369 /* Test for being demotable */
@@ -1387,6 +1386,7 @@ static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_m
1387 } 1386 }
1388 nr_skipped++; 1387 nr_skipped++;
1389 list_add(&gl->gl_lru, &skipped); 1388 list_add(&gl->gl_lru, &skipped);
1389 set_bit(GLF_LRU, &gl->gl_flags);
1390 } 1390 }
1391 list_splice(&skipped, &lru_list); 1391 list_splice(&skipped, &lru_list);
1392 atomic_add(nr_skipped, &lru_count); 1392 atomic_add(nr_skipped, &lru_count);
@@ -1459,12 +1459,7 @@ static void thaw_glock(struct gfs2_glock *gl)
1459 1459
1460static void clear_glock(struct gfs2_glock *gl) 1460static void clear_glock(struct gfs2_glock *gl)
1461{ 1461{
1462 spin_lock(&lru_lock); 1462 gfs2_glock_remove_from_lru(gl);
1463 if (!list_empty(&gl->gl_lru)) {
1464 list_del_init(&gl->gl_lru);
1465 atomic_dec(&lru_count);
1466 }
1467 spin_unlock(&lru_lock);
1468 1463
1469 spin_lock(&gl->gl_spin); 1464 spin_lock(&gl->gl_spin);
1470 if (gl->gl_state != LM_ST_UNLOCKED) 1465 if (gl->gl_state != LM_ST_UNLOCKED)
@@ -1599,9 +1594,11 @@ static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
1599 return 0; 1594 return 0;
1600} 1595}
1601 1596
1602static const char *gflags2str(char *buf, const unsigned long *gflags) 1597static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
1603{ 1598{
1599 const unsigned long *gflags = &gl->gl_flags;
1604 char *p = buf; 1600 char *p = buf;
1601
1605 if (test_bit(GLF_LOCK, gflags)) 1602 if (test_bit(GLF_LOCK, gflags))
1606 *p++ = 'l'; 1603 *p++ = 'l';
1607 if (test_bit(GLF_DEMOTE, gflags)) 1604 if (test_bit(GLF_DEMOTE, gflags))
@@ -1624,6 +1621,10 @@ static const char *gflags2str(char *buf, const unsigned long *gflags)
1624 *p++ = 'F'; 1621 *p++ = 'F';
1625 if (test_bit(GLF_QUEUED, gflags)) 1622 if (test_bit(GLF_QUEUED, gflags))
1626 *p++ = 'q'; 1623 *p++ = 'q';
1624 if (test_bit(GLF_LRU, gflags))
1625 *p++ = 'L';
1626 if (gl->gl_object)
1627 *p++ = 'o';
1627 *p = 0; 1628 *p = 0;
1628 return buf; 1629 return buf;
1629} 1630}
@@ -1658,14 +1659,15 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
1658 dtime *= 1000000/HZ; /* demote time in uSec */ 1659 dtime *= 1000000/HZ; /* demote time in uSec */
1659 if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) 1660 if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
1660 dtime = 0; 1661 dtime = 0;
1661 gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d r:%d\n", 1662 gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n",
1662 state2str(gl->gl_state), 1663 state2str(gl->gl_state),
1663 gl->gl_name.ln_type, 1664 gl->gl_name.ln_type,
1664 (unsigned long long)gl->gl_name.ln_number, 1665 (unsigned long long)gl->gl_name.ln_number,
1665 gflags2str(gflags_buf, &gl->gl_flags), 1666 gflags2str(gflags_buf, gl),
1666 state2str(gl->gl_target), 1667 state2str(gl->gl_target),
1667 state2str(gl->gl_demote_state), dtime, 1668 state2str(gl->gl_demote_state), dtime,
1668 atomic_read(&gl->gl_ail_count), 1669 atomic_read(&gl->gl_ail_count),
1670 atomic_read(&gl->gl_revokes),
1669 atomic_read(&gl->gl_ref)); 1671 atomic_read(&gl->gl_ref));
1670 1672
1671 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 1673 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index aea160690e94..6b2f757b9281 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -225,11 +225,10 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
225 225
226extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); 226extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
227extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret); 227extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
228extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
229extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); 228extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
230extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip); 229extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
231extern void gfs2_glock_thaw(struct gfs2_sbd *sdp); 230extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
232extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); 231extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl);
233extern void gfs2_glock_free(struct gfs2_glock *gl); 232extern void gfs2_glock_free(struct gfs2_glock *gl);
234 233
235extern int __init gfs2_glock_init(void); 234extern int __init gfs2_glock_init(void);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 25eeb2bcee47..8ef70f464731 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -28,33 +28,18 @@
28#include "trans.h" 28#include "trans.h"
29 29
30/** 30/**
31 * ail_empty_gl - remove all buffers for a given lock from the AIL 31 * __gfs2_ail_flush - remove all buffers for a given lock from the AIL
32 * @gl: the glock 32 * @gl: the glock
33 * 33 *
34 * None of the buffers should be dirty, locked, or pinned. 34 * None of the buffers should be dirty, locked, or pinned.
35 */ 35 */
36 36
37static void gfs2_ail_empty_gl(struct gfs2_glock *gl) 37static void __gfs2_ail_flush(struct gfs2_glock *gl)
38{ 38{
39 struct gfs2_sbd *sdp = gl->gl_sbd; 39 struct gfs2_sbd *sdp = gl->gl_sbd;
40 struct list_head *head = &gl->gl_ail_list; 40 struct list_head *head = &gl->gl_ail_list;
41 struct gfs2_bufdata *bd; 41 struct gfs2_bufdata *bd;
42 struct buffer_head *bh; 42 struct buffer_head *bh;
43 struct gfs2_trans tr;
44
45 memset(&tr, 0, sizeof(tr));
46 tr.tr_revokes = atomic_read(&gl->gl_ail_count);
47
48 if (!tr.tr_revokes)
49 return;
50
51 /* A shortened, inline version of gfs2_trans_begin() */
52 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
53 tr.tr_ip = (unsigned long)__builtin_return_address(0);
54 INIT_LIST_HEAD(&tr.tr_list_buf);
55 gfs2_log_reserve(sdp, tr.tr_reserved);
56 BUG_ON(current->journal_info);
57 current->journal_info = &tr;
58 43
59 spin_lock(&sdp->sd_ail_lock); 44 spin_lock(&sdp->sd_ail_lock);
60 while (!list_empty(head)) { 45 while (!list_empty(head)) {
@@ -76,7 +61,47 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
76 } 61 }
77 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); 62 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
78 spin_unlock(&sdp->sd_ail_lock); 63 spin_unlock(&sdp->sd_ail_lock);
64}
65
66
67static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
68{
69 struct gfs2_sbd *sdp = gl->gl_sbd;
70 struct gfs2_trans tr;
71
72 memset(&tr, 0, sizeof(tr));
73 tr.tr_revokes = atomic_read(&gl->gl_ail_count);
74
75 if (!tr.tr_revokes)
76 return;
77
78 /* A shortened, inline version of gfs2_trans_begin() */
79 tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64));
80 tr.tr_ip = (unsigned long)__builtin_return_address(0);
81 INIT_LIST_HEAD(&tr.tr_list_buf);
82 gfs2_log_reserve(sdp, tr.tr_reserved);
83 BUG_ON(current->journal_info);
84 current->journal_info = &tr;
85
86 __gfs2_ail_flush(gl);
87
88 gfs2_trans_end(sdp);
89 gfs2_log_flush(sdp, NULL);
90}
91
92void gfs2_ail_flush(struct gfs2_glock *gl)
93{
94 struct gfs2_sbd *sdp = gl->gl_sbd;
95 unsigned int revokes = atomic_read(&gl->gl_ail_count);
96 int ret;
97
98 if (!revokes)
99 return;
79 100
101 ret = gfs2_trans_begin(sdp, 0, revokes);
102 if (ret)
103 return;
104 __gfs2_ail_flush(gl);
80 gfs2_trans_end(sdp); 105 gfs2_trans_end(sdp);
81 gfs2_log_flush(sdp, NULL); 106 gfs2_log_flush(sdp, NULL);
82} 107}
@@ -227,6 +252,119 @@ static int inode_go_demote_ok(const struct gfs2_glock *gl)
227} 252}
228 253
229/** 254/**
255 * gfs2_set_nlink - Set the inode's link count based on on-disk info
256 * @inode: The inode in question
257 * @nlink: The link count
258 *
259 * If the link count has hit zero, it must never be raised, whatever the
260 * on-disk inode might say. When new struct inodes are created the link
261 * count is set to 1, so that we can safely use this test even when reading
262 * in on disk information for the first time.
263 */
264
265static void gfs2_set_nlink(struct inode *inode, u32 nlink)
266{
267 /*
268 * We will need to review setting the nlink count here in the
269 * light of the forthcoming ro bind mount work. This is a reminder
270 * to do that.
271 */
272 if ((inode->i_nlink != nlink) && (inode->i_nlink != 0)) {
273 if (nlink == 0)
274 clear_nlink(inode);
275 else
276 inode->i_nlink = nlink;
277 }
278}
279
280static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
281{
282 const struct gfs2_dinode *str = buf;
283 struct timespec atime;
284 u16 height, depth;
285
286 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
287 goto corrupt;
288 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
289 ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
290 ip->i_inode.i_rdev = 0;
291 switch (ip->i_inode.i_mode & S_IFMT) {
292 case S_IFBLK:
293 case S_IFCHR:
294 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
295 be32_to_cpu(str->di_minor));
296 break;
297 };
298
299 ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
300 ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
301 gfs2_set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
302 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
303 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
304 atime.tv_sec = be64_to_cpu(str->di_atime);
305 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
306 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
307 ip->i_inode.i_atime = atime;
308 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
309 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
310 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
311 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
312
313 ip->i_goal = be64_to_cpu(str->di_goal_meta);
314 ip->i_generation = be64_to_cpu(str->di_generation);
315
316 ip->i_diskflags = be32_to_cpu(str->di_flags);
317 gfs2_set_inode_flags(&ip->i_inode);
318 height = be16_to_cpu(str->di_height);
319 if (unlikely(height > GFS2_MAX_META_HEIGHT))
320 goto corrupt;
321 ip->i_height = (u8)height;
322
323 depth = be16_to_cpu(str->di_depth);
324 if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
325 goto corrupt;
326 ip->i_depth = (u8)depth;
327 ip->i_entries = be32_to_cpu(str->di_entries);
328
329 ip->i_eattr = be64_to_cpu(str->di_eattr);
330 if (S_ISREG(ip->i_inode.i_mode))
331 gfs2_set_aops(&ip->i_inode);
332
333 return 0;
334corrupt:
335 gfs2_consist_inode(ip);
336 return -EIO;
337}
338
339/**
340 * gfs2_inode_refresh - Refresh the incore copy of the dinode
341 * @ip: The GFS2 inode
342 *
343 * Returns: errno
344 */
345
346int gfs2_inode_refresh(struct gfs2_inode *ip)
347{
348 struct buffer_head *dibh;
349 int error;
350
351 error = gfs2_meta_inode_buffer(ip, &dibh);
352 if (error)
353 return error;
354
355 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
356 brelse(dibh);
357 return -EIO;
358 }
359
360 error = gfs2_dinode_in(ip, dibh->b_data);
361 brelse(dibh);
362 clear_bit(GIF_INVALID, &ip->i_flags);
363
364 return error;
365}
366
367/**
230 * inode_go_lock - operation done after an inode lock is locked by a process 368 * inode_go_lock - operation done after an inode lock is locked by a process
231 * @gl: the glock 369 * @gl: the glock
232 * @flags: 370 * @flags:
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index b3aa2e3210fd..6fce409b5a50 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -23,4 +23,6 @@ extern const struct gfs2_glock_operations gfs2_quota_glops;
23extern const struct gfs2_glock_operations gfs2_journal_glops; 23extern const struct gfs2_glock_operations gfs2_journal_glops;
24extern const struct gfs2_glock_operations *gfs2_glops_list[]; 24extern const struct gfs2_glock_operations *gfs2_glops_list[];
25 25
26extern void gfs2_ail_flush(struct gfs2_glock *gl);
27
26#endif /* __GLOPS_DOT_H__ */ 28#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 870a89d6d4dc..0a064e91ac70 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -20,7 +20,6 @@
20 20
21#define DIO_WAIT 0x00000010 21#define DIO_WAIT 0x00000010
22#define DIO_METADATA 0x00000020 22#define DIO_METADATA 0x00000020
23#define DIO_ALL 0x00000100
24 23
25struct gfs2_log_operations; 24struct gfs2_log_operations;
26struct gfs2_log_element; 25struct gfs2_log_element;
@@ -200,6 +199,8 @@ enum {
200 GLF_INITIAL = 10, 199 GLF_INITIAL = 10,
201 GLF_FROZEN = 11, 200 GLF_FROZEN = 11,
202 GLF_QUEUED = 12, 201 GLF_QUEUED = 12,
202 GLF_LRU = 13,
203 GLF_OBJECT = 14, /* Used only for tracing */
203}; 204};
204 205
205struct gfs2_glock { 206struct gfs2_glock {
@@ -234,6 +235,7 @@ struct gfs2_glock {
234 235
235 struct list_head gl_ail_list; 236 struct list_head gl_ail_list;
236 atomic_t gl_ail_count; 237 atomic_t gl_ail_count;
238 atomic_t gl_revokes;
237 struct delayed_work gl_work; 239 struct delayed_work gl_work;
238 struct work_struct gl_delete; 240 struct work_struct gl_delete;
239 struct rcu_head gl_rcu; 241 struct rcu_head gl_rcu;
@@ -374,8 +376,6 @@ struct gfs2_ail {
374 unsigned int ai_first; 376 unsigned int ai_first;
375 struct list_head ai_ail1_list; 377 struct list_head ai_ail1_list;
376 struct list_head ai_ail2_list; 378 struct list_head ai_ail2_list;
377
378 u64 ai_sync_gen;
379}; 379};
380 380
381struct gfs2_journal_extent { 381struct gfs2_journal_extent {
@@ -488,7 +488,6 @@ struct gfs2_sb_host {
488 488
489 char sb_lockproto[GFS2_LOCKNAME_LEN]; 489 char sb_lockproto[GFS2_LOCKNAME_LEN];
490 char sb_locktable[GFS2_LOCKNAME_LEN]; 490 char sb_locktable[GFS2_LOCKNAME_LEN];
491 u8 sb_uuid[16];
492}; 491};
493 492
494/* 493/*
@@ -654,7 +653,6 @@ struct gfs2_sbd {
654 spinlock_t sd_ail_lock; 653 spinlock_t sd_ail_lock;
655 struct list_head sd_ail1_list; 654 struct list_head sd_ail1_list;
656 struct list_head sd_ail2_list; 655 struct list_head sd_ail2_list;
657 u64 sd_ail_sync_gen;
658 656
659 /* Replay stuff */ 657 /* Replay stuff */
660 658
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 9134dcb89479..03e0c529063e 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1,23 +1,25 @@
1/* 1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 3 * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
4 * 4 *
5 * This copyrighted material is made available to anyone wishing to use, 5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions 6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2. 7 * of the GNU General Public License version 2.
8 */ 8 */
9 9
10#include <linux/sched.h>
11#include <linux/slab.h> 10#include <linux/slab.h>
12#include <linux/spinlock.h> 11#include <linux/spinlock.h>
13#include <linux/completion.h> 12#include <linux/completion.h>
14#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
14#include <linux/namei.h>
15#include <linux/mm.h>
16#include <linux/xattr.h>
15#include <linux/posix_acl.h> 17#include <linux/posix_acl.h>
16#include <linux/sort.h>
17#include <linux/gfs2_ondisk.h> 18#include <linux/gfs2_ondisk.h>
18#include <linux/crc32.h> 19#include <linux/crc32.h>
20#include <linux/fiemap.h>
19#include <linux/security.h> 21#include <linux/security.h>
20#include <linux/time.h> 22#include <asm/uaccess.h>
21 23
22#include "gfs2.h" 24#include "gfs2.h"
23#include "incore.h" 25#include "incore.h"
@@ -26,19 +28,14 @@
26#include "dir.h" 28#include "dir.h"
27#include "xattr.h" 29#include "xattr.h"
28#include "glock.h" 30#include "glock.h"
29#include "glops.h"
30#include "inode.h" 31#include "inode.h"
31#include "log.h"
32#include "meta_io.h" 32#include "meta_io.h"
33#include "quota.h" 33#include "quota.h"
34#include "rgrp.h" 34#include "rgrp.h"
35#include "trans.h" 35#include "trans.h"
36#include "util.h" 36#include "util.h"
37 37#include "super.h"
38struct gfs2_inum_range_host { 38#include "glops.h"
39 u64 ir_start;
40 u64 ir_length;
41};
42 39
43struct gfs2_skip_data { 40struct gfs2_skip_data {
44 u64 no_addr; 41 u64 no_addr;
@@ -74,14 +71,14 @@ static int iget_set(struct inode *inode, void *opaque)
74 return 0; 71 return 0;
75} 72}
76 73
77struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) 74struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
78{ 75{
79 unsigned long hash = (unsigned long)no_addr; 76 unsigned long hash = (unsigned long)no_addr;
80 struct gfs2_skip_data data; 77 struct gfs2_skip_data data;
81 78
82 data.no_addr = no_addr; 79 data.no_addr = no_addr;
83 data.skipped = 0; 80 data.skipped = 0;
84 data.non_block = 0; 81 data.non_block = non_block;
85 return ilookup5(sb, hash, iget_test, &data); 82 return ilookup5(sb, hash, iget_test, &data);
86} 83}
87 84
@@ -248,203 +245,6 @@ fail_iput:
248 goto fail; 245 goto fail;
249} 246}
250 247
251static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
252{
253 const struct gfs2_dinode *str = buf;
254 struct timespec atime;
255 u16 height, depth;
256
257 if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
258 goto corrupt;
259 ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
260 ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
261 ip->i_inode.i_rdev = 0;
262 switch (ip->i_inode.i_mode & S_IFMT) {
263 case S_IFBLK:
264 case S_IFCHR:
265 ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
266 be32_to_cpu(str->di_minor));
267 break;
268 };
269
270 ip->i_inode.i_uid = be32_to_cpu(str->di_uid);
271 ip->i_inode.i_gid = be32_to_cpu(str->di_gid);
272 /*
273 * We will need to review setting the nlink count here in the
274 * light of the forthcoming ro bind mount work. This is a reminder
275 * to do that.
276 */
277 ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink);
278 i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
279 gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
280 atime.tv_sec = be64_to_cpu(str->di_atime);
281 atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
282 if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0)
283 ip->i_inode.i_atime = atime;
284 ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
285 ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
286 ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
287 ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
288
289 ip->i_goal = be64_to_cpu(str->di_goal_meta);
290 ip->i_generation = be64_to_cpu(str->di_generation);
291
292 ip->i_diskflags = be32_to_cpu(str->di_flags);
293 gfs2_set_inode_flags(&ip->i_inode);
294 height = be16_to_cpu(str->di_height);
295 if (unlikely(height > GFS2_MAX_META_HEIGHT))
296 goto corrupt;
297 ip->i_height = (u8)height;
298
299 depth = be16_to_cpu(str->di_depth);
300 if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
301 goto corrupt;
302 ip->i_depth = (u8)depth;
303 ip->i_entries = be32_to_cpu(str->di_entries);
304
305 ip->i_eattr = be64_to_cpu(str->di_eattr);
306 if (S_ISREG(ip->i_inode.i_mode))
307 gfs2_set_aops(&ip->i_inode);
308
309 return 0;
310corrupt:
311 if (gfs2_consist_inode(ip))
312 gfs2_dinode_print(ip);
313 return -EIO;
314}
315
316/**
317 * gfs2_inode_refresh - Refresh the incore copy of the dinode
318 * @ip: The GFS2 inode
319 *
320 * Returns: errno
321 */
322
323int gfs2_inode_refresh(struct gfs2_inode *ip)
324{
325 struct buffer_head *dibh;
326 int error;
327
328 error = gfs2_meta_inode_buffer(ip, &dibh);
329 if (error)
330 return error;
331
332 if (gfs2_metatype_check(GFS2_SB(&ip->i_inode), dibh, GFS2_METATYPE_DI)) {
333 brelse(dibh);
334 return -EIO;
335 }
336
337 error = gfs2_dinode_in(ip, dibh->b_data);
338 brelse(dibh);
339 clear_bit(GIF_INVALID, &ip->i_flags);
340
341 return error;
342}
343
344int gfs2_dinode_dealloc(struct gfs2_inode *ip)
345{
346 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
347 struct gfs2_alloc *al;
348 struct gfs2_rgrpd *rgd;
349 int error;
350
351 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
352 if (gfs2_consist_inode(ip))
353 gfs2_dinode_print(ip);
354 return -EIO;
355 }
356
357 al = gfs2_alloc_get(ip);
358 if (!al)
359 return -ENOMEM;
360
361 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
362 if (error)
363 goto out;
364
365 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
366 if (error)
367 goto out_qs;
368
369 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
370 if (!rgd) {
371 gfs2_consist_inode(ip);
372 error = -EIO;
373 goto out_rindex_relse;
374 }
375
376 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
377 &al->al_rgd_gh);
378 if (error)
379 goto out_rindex_relse;
380
381 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 1);
382 if (error)
383 goto out_rg_gunlock;
384
385 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
386 set_bit(GLF_LFLUSH, &ip->i_gl->gl_flags);
387
388 gfs2_free_di(rgd, ip);
389
390 gfs2_trans_end(sdp);
391
392out_rg_gunlock:
393 gfs2_glock_dq_uninit(&al->al_rgd_gh);
394out_rindex_relse:
395 gfs2_glock_dq_uninit(&al->al_ri_gh);
396out_qs:
397 gfs2_quota_unhold(ip);
398out:
399 gfs2_alloc_put(ip);
400 return error;
401}
402
403/**
404 * gfs2_change_nlink - Change nlink count on inode
405 * @ip: The GFS2 inode
406 * @diff: The change in the nlink count required
407 *
408 * Returns: errno
409 */
410int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
411{
412 struct buffer_head *dibh;
413 u32 nlink;
414 int error;
415
416 BUG_ON(diff != 1 && diff != -1);
417 nlink = ip->i_inode.i_nlink + diff;
418
419 /* If we are reducing the nlink count, but the new value ends up being
420 bigger than the old one, we must have underflowed. */
421 if (diff < 0 && nlink > ip->i_inode.i_nlink) {
422 if (gfs2_consist_inode(ip))
423 gfs2_dinode_print(ip);
424 return -EIO;
425 }
426
427 error = gfs2_meta_inode_buffer(ip, &dibh);
428 if (error)
429 return error;
430
431 if (diff > 0)
432 inc_nlink(&ip->i_inode);
433 else
434 drop_nlink(&ip->i_inode);
435
436 ip->i_inode.i_ctime = CURRENT_TIME;
437
438 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
439 gfs2_dinode_out(ip, dibh->b_data);
440 brelse(dibh);
441 mark_inode_dirty(&ip->i_inode);
442
443 if (ip->i_inode.i_nlink == 0)
444 gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
445
446 return error;
447}
448 248
449struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) 249struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
450{ 250{
@@ -543,7 +343,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
543 343
544 /* Don't create entries in an unlinked directory */ 344 /* Don't create entries in an unlinked directory */
545 if (!dip->i_inode.i_nlink) 345 if (!dip->i_inode.i_nlink)
546 return -EPERM; 346 return -ENOENT;
547 347
548 error = gfs2_dir_check(&dip->i_inode, name, NULL); 348 error = gfs2_dir_check(&dip->i_inode, name, NULL);
549 switch (error) { 349 switch (error) {
@@ -613,21 +413,44 @@ out:
613 return error; 413 return error;
614} 414}
615 415
416static void gfs2_init_dir(struct buffer_head *dibh,
417 const struct gfs2_inode *parent)
418{
419 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
420 struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
421
422 gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
423 dent->de_inum = di->di_num; /* already GFS2 endian */
424 dent->de_type = cpu_to_be16(DT_DIR);
425
426 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
427 gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
428 gfs2_inum_out(parent, dent);
429 dent->de_type = cpu_to_be16(DT_DIR);
430
431}
432
616/** 433/**
617 * init_dinode - Fill in a new dinode structure 434 * init_dinode - Fill in a new dinode structure
618 * @dip: the directory this inode is being created in 435 * @dip: The directory this inode is being created in
619 * @gl: The glock covering the new inode 436 * @gl: The glock covering the new inode
620 * @inum: the inode number 437 * @inum: The inode number
621 * @mode: the file permissions 438 * @mode: The file permissions
622 * @uid: 439 * @uid: The uid of the new inode
623 * @gid: 440 * @gid: The gid of the new inode
441 * @generation: The generation number of the new inode
442 * @dev: The device number (if a device node)
443 * @symname: The symlink destination (if a symlink)
444 * @size: The inode size (ignored for directories)
445 * @bhp: The buffer head (returned to caller)
624 * 446 *
625 */ 447 */
626 448
627static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 449static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
628 const struct gfs2_inum_host *inum, unsigned int mode, 450 const struct gfs2_inum_host *inum, unsigned int mode,
629 unsigned int uid, unsigned int gid, 451 unsigned int uid, unsigned int gid,
630 const u64 *generation, dev_t dev, struct buffer_head **bhp) 452 const u64 *generation, dev_t dev, const char *symname,
453 unsigned size, struct buffer_head **bhp)
631{ 454{
632 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 455 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
633 struct gfs2_dinode *di; 456 struct gfs2_dinode *di;
@@ -646,7 +469,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
646 di->di_uid = cpu_to_be32(uid); 469 di->di_uid = cpu_to_be32(uid);
647 di->di_gid = cpu_to_be32(gid); 470 di->di_gid = cpu_to_be32(gid);
648 di->di_nlink = 0; 471 di->di_nlink = 0;
649 di->di_size = 0; 472 di->di_size = cpu_to_be64(size);
650 di->di_blocks = cpu_to_be64(1); 473 di->di_blocks = cpu_to_be64(1);
651 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec); 474 di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
652 di->di_major = cpu_to_be32(MAJOR(dev)); 475 di->di_major = cpu_to_be32(MAJOR(dev));
@@ -654,16 +477,6 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
654 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr); 477 di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
655 di->di_generation = cpu_to_be64(*generation); 478 di->di_generation = cpu_to_be64(*generation);
656 di->di_flags = 0; 479 di->di_flags = 0;
657
658 if (S_ISREG(mode)) {
659 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
660 gfs2_tune_get(sdp, gt_new_files_jdata))
661 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
662 } else if (S_ISDIR(mode)) {
663 di->di_flags |= cpu_to_be32(dip->i_diskflags &
664 GFS2_DIF_INHERIT_JDATA);
665 }
666
667 di->__pad1 = 0; 480 di->__pad1 = 0;
668 di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0); 481 di->di_payload_format = cpu_to_be32(S_ISDIR(mode) ? GFS2_FORMAT_DE : 0);
669 di->di_height = 0; 482 di->di_height = 0;
@@ -677,7 +490,26 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
677 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); 490 di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
678 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); 491 di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
679 memset(&di->di_reserved, 0, sizeof(di->di_reserved)); 492 memset(&di->di_reserved, 0, sizeof(di->di_reserved));
680 493
494 switch(mode & S_IFMT) {
495 case S_IFREG:
496 if ((dip->i_diskflags & GFS2_DIF_INHERIT_JDATA) ||
497 gfs2_tune_get(sdp, gt_new_files_jdata))
498 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
499 break;
500 case S_IFDIR:
501 di->di_flags |= cpu_to_be32(dip->i_diskflags &
502 GFS2_DIF_INHERIT_JDATA);
503 di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
504 di->di_size = cpu_to_be64(sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
505 di->di_entries = cpu_to_be32(2);
506 gfs2_init_dir(dibh, dip);
507 break;
508 case S_IFLNK:
509 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, size);
510 break;
511 }
512
681 set_buffer_uptodate(dibh); 513 set_buffer_uptodate(dibh);
682 514
683 *bhp = dibh; 515 *bhp = dibh;
@@ -685,7 +517,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
685 517
686static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 518static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
687 unsigned int mode, const struct gfs2_inum_host *inum, 519 unsigned int mode, const struct gfs2_inum_host *inum,
688 const u64 *generation, dev_t dev, struct buffer_head **bhp) 520 const u64 *generation, dev_t dev, const char *symname,
521 unsigned int size, struct buffer_head **bhp)
689{ 522{
690 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 523 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
691 unsigned int uid, gid; 524 unsigned int uid, gid;
@@ -707,7 +540,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
707 if (error) 540 if (error)
708 goto out_quota; 541 goto out_quota;
709 542
710 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp); 543 init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, symname, size, bhp);
711 gfs2_quota_change(dip, +1, uid, gid); 544 gfs2_quota_change(dip, +1, uid, gid);
712 gfs2_trans_end(sdp); 545 gfs2_trans_end(sdp);
713 546
@@ -761,14 +594,16 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
761 goto fail_quota_locks; 594 goto fail_quota_locks;
762 } 595 }
763 596
764 error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode)); 597 error = gfs2_dir_add(&dip->i_inode, name, ip);
765 if (error) 598 if (error)
766 goto fail_end_trans; 599 goto fail_end_trans;
767 600
768 error = gfs2_meta_inode_buffer(ip, &dibh); 601 error = gfs2_meta_inode_buffer(ip, &dibh);
769 if (error) 602 if (error)
770 goto fail_end_trans; 603 goto fail_end_trans;
771 ip->i_inode.i_nlink = 1; 604 inc_nlink(&ip->i_inode);
605 if (S_ISDIR(ip->i_inode.i_mode))
606 inc_nlink(&ip->i_inode);
772 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 607 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
773 gfs2_dinode_out(ip, dibh->b_data); 608 gfs2_dinode_out(ip, dibh->b_data);
774 brelse(dibh); 609 brelse(dibh);
@@ -815,27 +650,25 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
815} 650}
816 651
817/** 652/**
818 * gfs2_createi - Create a new inode 653 * gfs2_create_inode - Create a new inode
819 * @ghs: An array of two holders 654 * @dir: The parent directory
820 * @name: The name of the new file 655 * @dentry: The new dentry
821 * @mode: the permissions on the new inode 656 * @mode: The permissions on the new inode
822 * 657 * @dev: For device nodes, this is the device number
823 * @ghs[0] is an initialized holder for the directory 658 * @symname: For symlinks, this is the link destination
824 * @ghs[1] is the holder for the inode lock 659 * @size: The initial size of the inode (ignored for directories)
825 * 660 *
826 * If the return value is not NULL, the glocks on both the directory and the new 661 * Returns: 0 on success, or error code
827 * file are held. A transaction has been started and an inplace reservation
828 * is held, as well.
829 *
830 * Returns: An inode
831 */ 662 */
832 663
833struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, 664static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
834 unsigned int mode, dev_t dev) 665 unsigned int mode, dev_t dev, const char *symname,
666 unsigned int size)
835{ 667{
668 const struct qstr *name = &dentry->d_name;
669 struct gfs2_holder ghs[2];
836 struct inode *inode = NULL; 670 struct inode *inode = NULL;
837 struct gfs2_inode *dip = ghs->gh_gl->gl_object; 671 struct gfs2_inode *dip = GFS2_I(dir);
838 struct inode *dir = &dip->i_inode;
839 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 672 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
840 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; 673 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
841 int error; 674 int error;
@@ -843,10 +676,9 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
843 struct buffer_head *bh = NULL; 676 struct buffer_head *bh = NULL;
844 677
845 if (!name->len || name->len > GFS2_FNAMESIZE) 678 if (!name->len || name->len > GFS2_FNAMESIZE)
846 return ERR_PTR(-ENAMETOOLONG); 679 return -ENAMETOOLONG;
847 680
848 gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); 681 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
849 error = gfs2_glock_nq(ghs);
850 if (error) 682 if (error)
851 goto fail; 683 goto fail;
852 684
@@ -864,7 +696,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
864 if (error) 696 if (error)
865 goto fail_gunlock; 697 goto fail_gunlock;
866 698
867 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh); 699 error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, symname, size, &bh);
868 if (error) 700 if (error)
869 goto fail_gunlock2; 701 goto fail_gunlock2;
870 702
@@ -891,18 +723,852 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
891 723
892 if (bh) 724 if (bh)
893 brelse(bh); 725 brelse(bh);
894 return inode; 726
727 gfs2_trans_end(sdp);
728 if (dip->i_alloc->al_rgd)
729 gfs2_inplace_release(dip);
730 gfs2_quota_unlock(dip);
731 gfs2_alloc_put(dip);
732 gfs2_glock_dq_uninit_m(2, ghs);
733 mark_inode_dirty(inode);
734 d_instantiate(dentry, inode);
735 return 0;
895 736
896fail_gunlock2: 737fail_gunlock2:
897 gfs2_glock_dq_uninit(ghs + 1); 738 gfs2_glock_dq_uninit(ghs + 1);
898 if (inode && !IS_ERR(inode)) 739 if (inode && !IS_ERR(inode))
899 iput(inode); 740 iput(inode);
900fail_gunlock: 741fail_gunlock:
901 gfs2_glock_dq(ghs); 742 gfs2_glock_dq_uninit(ghs);
902fail: 743fail:
903 if (bh) 744 if (bh)
904 brelse(bh); 745 brelse(bh);
905 return ERR_PTR(error); 746 return error;
747}
748
749/**
750 * gfs2_create - Create a file
751 * @dir: The directory in which to create the file
752 * @dentry: The dentry of the new file
753 * @mode: The mode of the new file
754 *
755 * Returns: errno
756 */
757
758static int gfs2_create(struct inode *dir, struct dentry *dentry,
759 int mode, struct nameidata *nd)
760{
761 struct inode *inode;
762 int ret;
763
764 for (;;) {
765 ret = gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0);
766 if (ret != -EEXIST || (nd && (nd->flags & LOOKUP_EXCL)))
767 return ret;
768
769 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
770 if (inode) {
771 if (!IS_ERR(inode))
772 break;
773 return PTR_ERR(inode);
774 }
775 }
776
777 d_instantiate(dentry, inode);
778 return 0;
779}
780
781/**
782 * gfs2_lookup - Look up a filename in a directory and return its inode
783 * @dir: The directory inode
784 * @dentry: The dentry of the new inode
785 * @nd: passed from Linux VFS, ignored by us
786 *
787 * Called by the VFS layer. Lock dir and call gfs2_lookupi()
788 *
789 * Returns: errno
790 */
791
792static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
793 struct nameidata *nd)
794{
795 struct inode *inode = NULL;
796
797 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
798 if (inode && IS_ERR(inode))
799 return ERR_CAST(inode);
800
801 if (inode) {
802 struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
803 struct gfs2_holder gh;
804 int error;
805 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
806 if (error) {
807 iput(inode);
808 return ERR_PTR(error);
809 }
810 gfs2_glock_dq_uninit(&gh);
811 return d_splice_alias(inode, dentry);
812 }
813 d_add(dentry, inode);
814
815 return NULL;
816}
817
818/**
819 * gfs2_link - Link to a file
820 * @old_dentry: The inode to link
821 * @dir: Add link to this directory
822 * @dentry: The name of the link
823 *
824 * Link the inode in "old_dentry" into the directory "dir" with the
825 * name in "dentry".
826 *
827 * Returns: errno
828 */
829
830static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
831 struct dentry *dentry)
832{
833 struct gfs2_inode *dip = GFS2_I(dir);
834 struct gfs2_sbd *sdp = GFS2_SB(dir);
835 struct inode *inode = old_dentry->d_inode;
836 struct gfs2_inode *ip = GFS2_I(inode);
837 struct gfs2_holder ghs[2];
838 struct buffer_head *dibh;
839 int alloc_required;
840 int error;
841
842 if (S_ISDIR(inode->i_mode))
843 return -EPERM;
844
845 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
846 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
847
848 error = gfs2_glock_nq(ghs); /* parent */
849 if (error)
850 goto out_parent;
851
852 error = gfs2_glock_nq(ghs + 1); /* child */
853 if (error)
854 goto out_child;
855
856 error = -ENOENT;
857 if (inode->i_nlink == 0)
858 goto out_gunlock;
859
860 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
861 if (error)
862 goto out_gunlock;
863
864 error = gfs2_dir_check(dir, &dentry->d_name, NULL);
865 switch (error) {
866 case -ENOENT:
867 break;
868 case 0:
869 error = -EEXIST;
870 default:
871 goto out_gunlock;
872 }
873
874 error = -EINVAL;
875 if (!dip->i_inode.i_nlink)
876 goto out_gunlock;
877 error = -EFBIG;
878 if (dip->i_entries == (u32)-1)
879 goto out_gunlock;
880 error = -EPERM;
881 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
882 goto out_gunlock;
883 error = -EINVAL;
884 if (!ip->i_inode.i_nlink)
885 goto out_gunlock;
886 error = -EMLINK;
887 if (ip->i_inode.i_nlink == (u32)-1)
888 goto out_gunlock;
889
890 alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
891 if (error < 0)
892 goto out_gunlock;
893 error = 0;
894
895 if (alloc_required) {
896 struct gfs2_alloc *al = gfs2_alloc_get(dip);
897 if (!al) {
898 error = -ENOMEM;
899 goto out_gunlock;
900 }
901
902 error = gfs2_quota_lock_check(dip);
903 if (error)
904 goto out_alloc;
905
906 al->al_requested = sdp->sd_max_dirres;
907
908 error = gfs2_inplace_reserve(dip);
909 if (error)
910 goto out_gunlock_q;
911
912 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
913 gfs2_rg_blocks(al) +
914 2 * RES_DINODE + RES_STATFS +
915 RES_QUOTA, 0);
916 if (error)
917 goto out_ipres;
918 } else {
919 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
920 if (error)
921 goto out_ipres;
922 }
923
924 error = gfs2_meta_inode_buffer(ip, &dibh);
925 if (error)
926 goto out_end_trans;
927
928 error = gfs2_dir_add(dir, &dentry->d_name, ip);
929 if (error)
930 goto out_brelse;
931
932 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
933 inc_nlink(&ip->i_inode);
934 ip->i_inode.i_ctime = CURRENT_TIME;
935 gfs2_dinode_out(ip, dibh->b_data);
936 mark_inode_dirty(&ip->i_inode);
937
938out_brelse:
939 brelse(dibh);
940out_end_trans:
941 gfs2_trans_end(sdp);
942out_ipres:
943 if (alloc_required)
944 gfs2_inplace_release(dip);
945out_gunlock_q:
946 if (alloc_required)
947 gfs2_quota_unlock(dip);
948out_alloc:
949 if (alloc_required)
950 gfs2_alloc_put(dip);
951out_gunlock:
952 gfs2_glock_dq(ghs + 1);
953out_child:
954 gfs2_glock_dq(ghs);
955out_parent:
956 gfs2_holder_uninit(ghs);
957 gfs2_holder_uninit(ghs + 1);
958 if (!error) {
959 ihold(inode);
960 d_instantiate(dentry, inode);
961 mark_inode_dirty(inode);
962 }
963 return error;
964}
965
966/*
967 * gfs2_unlink_ok - check to see that a inode is still in a directory
968 * @dip: the directory
969 * @name: the name of the file
970 * @ip: the inode
971 *
972 * Assumes that the lock on (at least) @dip is held.
973 *
974 * Returns: 0 if the parent/child relationship is correct, errno if it isn't
975 */
976
977static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
978 const struct gfs2_inode *ip)
979{
980 int error;
981
982 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
983 return -EPERM;
984
985 if ((dip->i_inode.i_mode & S_ISVTX) &&
986 dip->i_inode.i_uid != current_fsuid() &&
987 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
988 return -EPERM;
989
990 if (IS_APPEND(&dip->i_inode))
991 return -EPERM;
992
993 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
994 if (error)
995 return error;
996
997 error = gfs2_dir_check(&dip->i_inode, name, ip);
998 if (error)
999 return error;
1000
1001 return 0;
1002}
1003
1004/**
1005 * gfs2_unlink_inode - Removes an inode from its parent dir and unlinks it
1006 * @dip: The parent directory
1007 * @name: The name of the entry in the parent directory
1008 * @bh: The inode buffer for the inode to be removed
1009 * @inode: The inode to be removed
1010 *
1011 * Called with all the locks and in a transaction. This will only be
1012 * called for a directory after it has been checked to ensure it is empty.
1013 *
1014 * Returns: 0 on success, or an error
1015 */
1016
1017static int gfs2_unlink_inode(struct gfs2_inode *dip,
1018 const struct dentry *dentry,
1019 struct buffer_head *bh)
1020{
1021 struct inode *inode = dentry->d_inode;
1022 struct gfs2_inode *ip = GFS2_I(inode);
1023 int error;
1024
1025 error = gfs2_dir_del(dip, dentry);
1026 if (error)
1027 return error;
1028
1029 ip->i_entries = 0;
1030 inode->i_ctime = CURRENT_TIME;
1031 if (S_ISDIR(inode->i_mode))
1032 clear_nlink(inode);
1033 else
1034 drop_nlink(inode);
1035 gfs2_trans_add_bh(ip->i_gl, bh, 1);
1036 gfs2_dinode_out(ip, bh->b_data);
1037 mark_inode_dirty(inode);
1038 if (inode->i_nlink == 0)
1039 gfs2_unlink_di(inode);
1040 return 0;
1041}
1042
1043
1044/**
1045 * gfs2_unlink - Unlink an inode (this does rmdir as well)
1046 * @dir: The inode of the directory containing the inode to unlink
1047 * @dentry: The file itself
1048 *
1049 * This routine uses the type of the inode as a flag to figure out
1050 * whether this is an unlink or an rmdir.
1051 *
1052 * Returns: errno
1053 */
1054
1055static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
1056{
1057 struct gfs2_inode *dip = GFS2_I(dir);
1058 struct gfs2_sbd *sdp = GFS2_SB(dir);
1059 struct inode *inode = dentry->d_inode;
1060 struct gfs2_inode *ip = GFS2_I(inode);
1061 struct buffer_head *bh;
1062 struct gfs2_holder ghs[3];
1063 struct gfs2_rgrpd *rgd;
1064 struct gfs2_holder ri_gh;
1065 int error;
1066
1067 error = gfs2_rindex_hold(sdp, &ri_gh);
1068 if (error)
1069 return error;
1070
1071 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1072 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
1073
1074 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
1075 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
1076
1077
1078 error = gfs2_glock_nq(ghs); /* parent */
1079 if (error)
1080 goto out_parent;
1081
1082 error = gfs2_glock_nq(ghs + 1); /* child */
1083 if (error)
1084 goto out_child;
1085
1086 error = -ENOENT;
1087 if (inode->i_nlink == 0)
1088 goto out_rgrp;
1089
1090 if (S_ISDIR(inode->i_mode)) {
1091 error = -ENOTEMPTY;
1092 if (ip->i_entries > 2 || inode->i_nlink > 2)
1093 goto out_rgrp;
1094 }
1095
1096 error = gfs2_glock_nq(ghs + 2); /* rgrp */
1097 if (error)
1098 goto out_rgrp;
1099
1100 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
1101 if (error)
1102 goto out_gunlock;
1103
1104 error = gfs2_trans_begin(sdp, 2*RES_DINODE + 3*RES_LEAF + RES_RG_BIT, 0);
1105 if (error)
1106 goto out_gunlock;
1107
1108 error = gfs2_meta_inode_buffer(ip, &bh);
1109 if (error)
1110 goto out_end_trans;
1111
1112 error = gfs2_unlink_inode(dip, dentry, bh);
1113 brelse(bh);
1114
1115out_end_trans:
1116 gfs2_trans_end(sdp);
1117out_gunlock:
1118 gfs2_glock_dq(ghs + 2);
1119out_rgrp:
1120 gfs2_holder_uninit(ghs + 2);
1121 gfs2_glock_dq(ghs + 1);
1122out_child:
1123 gfs2_holder_uninit(ghs + 1);
1124 gfs2_glock_dq(ghs);
1125out_parent:
1126 gfs2_holder_uninit(ghs);
1127 gfs2_glock_dq_uninit(&ri_gh);
1128 return error;
1129}
1130
1131/**
1132 * gfs2_symlink - Create a symlink
1133 * @dir: The directory to create the symlink in
1134 * @dentry: The dentry to put the symlink in
1135 * @symname: The thing which the link points to
1136 *
1137 * Returns: errno
1138 */
1139
1140static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
1141 const char *symname)
1142{
1143 struct gfs2_sbd *sdp = GFS2_SB(dir);
1144 unsigned int size;
1145
1146 size = strlen(symname);
1147 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
1148 return -ENAMETOOLONG;
1149
1150 return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size);
1151}
1152
1153/**
1154 * gfs2_mkdir - Make a directory
1155 * @dir: The parent directory of the new one
1156 * @dentry: The dentry of the new directory
1157 * @mode: The mode of the new directory
1158 *
1159 * Returns: errno
1160 */
1161
1162static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1163{
1164 return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0);
1165}
1166
1167/**
1168 * gfs2_mknod - Make a special file
1169 * @dir: The directory in which the special file will reside
1170 * @dentry: The dentry of the special file
1171 * @mode: The mode of the special file
1172 * @dev: The device specification of the special file
1173 *
1174 */
1175
1176static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
1177 dev_t dev)
1178{
1179 return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0);
1180}
1181
1182/*
1183 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
1184 * @this: move this
1185 * @to: to here
1186 *
1187 * Follow @to back to the root and make sure we don't encounter @this
1188 * Assumes we already hold the rename lock.
1189 *
1190 * Returns: errno
1191 */
1192
1193static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
1194{
1195 struct inode *dir = &to->i_inode;
1196 struct super_block *sb = dir->i_sb;
1197 struct inode *tmp;
1198 int error = 0;
1199
1200 igrab(dir);
1201
1202 for (;;) {
1203 if (dir == &this->i_inode) {
1204 error = -EINVAL;
1205 break;
1206 }
1207 if (dir == sb->s_root->d_inode) {
1208 error = 0;
1209 break;
1210 }
1211
1212 tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
1213 if (IS_ERR(tmp)) {
1214 error = PTR_ERR(tmp);
1215 break;
1216 }
1217
1218 iput(dir);
1219 dir = tmp;
1220 }
1221
1222 iput(dir);
1223
1224 return error;
1225}
1226
1227/**
1228 * gfs2_rename - Rename a file
1229 * @odir: Parent directory of old file name
1230 * @odentry: The old dentry of the file
1231 * @ndir: Parent directory of new file name
1232 * @ndentry: The new dentry of the file
1233 *
1234 * Returns: errno
1235 */
1236
1237static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1238 struct inode *ndir, struct dentry *ndentry)
1239{
1240 struct gfs2_inode *odip = GFS2_I(odir);
1241 struct gfs2_inode *ndip = GFS2_I(ndir);
1242 struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
1243 struct gfs2_inode *nip = NULL;
1244 struct gfs2_sbd *sdp = GFS2_SB(odir);
1245 struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
1246 struct gfs2_rgrpd *nrgd;
1247 unsigned int num_gh;
1248 int dir_rename = 0;
1249 int alloc_required = 0;
1250 unsigned int x;
1251 int error;
1252
1253 if (ndentry->d_inode) {
1254 nip = GFS2_I(ndentry->d_inode);
1255 if (ip == nip)
1256 return 0;
1257 }
1258
1259 error = gfs2_rindex_hold(sdp, &ri_gh);
1260 if (error)
1261 return error;
1262
1263 if (odip != ndip) {
1264 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
1265 0, &r_gh);
1266 if (error)
1267 goto out;
1268
1269 if (S_ISDIR(ip->i_inode.i_mode)) {
1270 dir_rename = 1;
1271 /* don't move a dirctory into it's subdir */
1272 error = gfs2_ok_to_move(ip, ndip);
1273 if (error)
1274 goto out_gunlock_r;
1275 }
1276 }
1277
1278 num_gh = 1;
1279 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1280 if (odip != ndip) {
1281 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1282 num_gh++;
1283 }
1284 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1285 num_gh++;
1286
1287 if (nip) {
1288 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
1289 num_gh++;
1290 /* grab the resource lock for unlink flag twiddling
1291 * this is the case of the target file already existing
1292 * so we unlink before doing the rename
1293 */
1294 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
1295 if (nrgd)
1296 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
1297 }
1298
1299 for (x = 0; x < num_gh; x++) {
1300 error = gfs2_glock_nq(ghs + x);
1301 if (error)
1302 goto out_gunlock;
1303 }
1304
1305 error = -ENOENT;
1306 if (ip->i_inode.i_nlink == 0)
1307 goto out_gunlock;
1308
1309 /* Check out the old directory */
1310
1311 error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
1312 if (error)
1313 goto out_gunlock;
1314
1315 /* Check out the new directory */
1316
1317 if (nip) {
1318 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
1319 if (error)
1320 goto out_gunlock;
1321
1322 if (nip->i_inode.i_nlink == 0) {
1323 error = -EAGAIN;
1324 goto out_gunlock;
1325 }
1326
1327 if (S_ISDIR(nip->i_inode.i_mode)) {
1328 if (nip->i_entries < 2) {
1329 gfs2_consist_inode(nip);
1330 error = -EIO;
1331 goto out_gunlock;
1332 }
1333 if (nip->i_entries > 2) {
1334 error = -ENOTEMPTY;
1335 goto out_gunlock;
1336 }
1337 }
1338 } else {
1339 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
1340 if (error)
1341 goto out_gunlock;
1342
1343 error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
1344 switch (error) {
1345 case -ENOENT:
1346 error = 0;
1347 break;
1348 case 0:
1349 error = -EEXIST;
1350 default:
1351 goto out_gunlock;
1352 };
1353
1354 if (odip != ndip) {
1355 if (!ndip->i_inode.i_nlink) {
1356 error = -ENOENT;
1357 goto out_gunlock;
1358 }
1359 if (ndip->i_entries == (u32)-1) {
1360 error = -EFBIG;
1361 goto out_gunlock;
1362 }
1363 if (S_ISDIR(ip->i_inode.i_mode) &&
1364 ndip->i_inode.i_nlink == (u32)-1) {
1365 error = -EMLINK;
1366 goto out_gunlock;
1367 }
1368 }
1369 }
1370
1371 /* Check out the dir to be renamed */
1372
1373 if (dir_rename) {
1374 error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
1375 if (error)
1376 goto out_gunlock;
1377 }
1378
1379 if (nip == NULL)
1380 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
1381 error = alloc_required;
1382 if (error < 0)
1383 goto out_gunlock;
1384 error = 0;
1385
1386 if (alloc_required) {
1387 struct gfs2_alloc *al = gfs2_alloc_get(ndip);
1388 if (!al) {
1389 error = -ENOMEM;
1390 goto out_gunlock;
1391 }
1392
1393 error = gfs2_quota_lock_check(ndip);
1394 if (error)
1395 goto out_alloc;
1396
1397 al->al_requested = sdp->sd_max_dirres;
1398
1399 error = gfs2_inplace_reserve_ri(ndip);
1400 if (error)
1401 goto out_gunlock_q;
1402
1403 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
1404 gfs2_rg_blocks(al) +
1405 4 * RES_DINODE + 4 * RES_LEAF +
1406 RES_STATFS + RES_QUOTA + 4, 0);
1407 if (error)
1408 goto out_ipreserv;
1409 } else {
1410 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
1411 5 * RES_LEAF + 4, 0);
1412 if (error)
1413 goto out_gunlock;
1414 }
1415
1416 /* Remove the target file, if it exists */
1417
1418 if (nip) {
1419 struct buffer_head *bh;
1420 error = gfs2_meta_inode_buffer(nip, &bh);
1421 if (error)
1422 goto out_end_trans;
1423 error = gfs2_unlink_inode(ndip, ndentry, bh);
1424 brelse(bh);
1425 }
1426
1427 if (dir_rename) {
1428 error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
1429 if (error)
1430 goto out_end_trans;
1431 } else {
1432 struct buffer_head *dibh;
1433 error = gfs2_meta_inode_buffer(ip, &dibh);
1434 if (error)
1435 goto out_end_trans;
1436 ip->i_inode.i_ctime = CURRENT_TIME;
1437 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1438 gfs2_dinode_out(ip, dibh->b_data);
1439 brelse(dibh);
1440 }
1441
1442 error = gfs2_dir_del(odip, odentry);
1443 if (error)
1444 goto out_end_trans;
1445
1446 error = gfs2_dir_add(ndir, &ndentry->d_name, ip);
1447 if (error)
1448 goto out_end_trans;
1449
1450out_end_trans:
1451 gfs2_trans_end(sdp);
1452out_ipreserv:
1453 if (alloc_required)
1454 gfs2_inplace_release(ndip);
1455out_gunlock_q:
1456 if (alloc_required)
1457 gfs2_quota_unlock(ndip);
1458out_alloc:
1459 if (alloc_required)
1460 gfs2_alloc_put(ndip);
1461out_gunlock:
1462 while (x--) {
1463 gfs2_glock_dq(ghs + x);
1464 gfs2_holder_uninit(ghs + x);
1465 }
1466out_gunlock_r:
1467 if (r_gh.gh_gl)
1468 gfs2_glock_dq_uninit(&r_gh);
1469out:
1470 gfs2_glock_dq_uninit(&ri_gh);
1471 return error;
1472}
1473
1474/**
1475 * gfs2_follow_link - Follow a symbolic link
1476 * @dentry: The dentry of the link
1477 * @nd: Data that we pass to vfs_follow_link()
1478 *
1479 * This can handle symlinks of any size.
1480 *
1481 * Returns: 0 on success or error code
1482 */
1483
1484static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
1485{
1486 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
1487 struct gfs2_holder i_gh;
1488 struct buffer_head *dibh;
1489 unsigned int size;
1490 char *buf;
1491 int error;
1492
1493 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
1494 error = gfs2_glock_nq(&i_gh);
1495 if (error) {
1496 gfs2_holder_uninit(&i_gh);
1497 nd_set_link(nd, ERR_PTR(error));
1498 return NULL;
1499 }
1500
1501 size = (unsigned int)i_size_read(&ip->i_inode);
1502 if (size == 0) {
1503 gfs2_consist_inode(ip);
1504 buf = ERR_PTR(-EIO);
1505 goto out;
1506 }
1507
1508 error = gfs2_meta_inode_buffer(ip, &dibh);
1509 if (error) {
1510 buf = ERR_PTR(error);
1511 goto out;
1512 }
1513
1514 buf = kzalloc(size + 1, GFP_NOFS);
1515 if (!buf)
1516 buf = ERR_PTR(-ENOMEM);
1517 else
1518 memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size);
1519 brelse(dibh);
1520out:
1521 gfs2_glock_dq_uninit(&i_gh);
1522 nd_set_link(nd, buf);
1523 return NULL;
1524}
1525
1526static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1527{
1528 char *s = nd_get_link(nd);
1529 if (!IS_ERR(s))
1530 kfree(s);
1531}
1532
1533/**
1534 * gfs2_permission -
1535 * @inode: The inode
1536 * @mask: The mask to be tested
1537 * @flags: Indicates whether this is an RCU path walk or not
1538 *
1539 * This may be called from the VFS directly, or from within GFS2 with the
1540 * inode locked, so we look to see if the glock is already locked and only
1541 * lock the glock if its not already been done.
1542 *
1543 * Returns: errno
1544 */
1545
1546int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1547{
1548 struct gfs2_inode *ip;
1549 struct gfs2_holder i_gh;
1550 int error;
1551 int unlock = 0;
1552
1553
1554 ip = GFS2_I(inode);
1555 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1556 if (flags & IPERM_FLAG_RCU)
1557 return -ECHILD;
1558 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1559 if (error)
1560 return error;
1561 unlock = 1;
1562 }
1563
1564 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
1565 error = -EACCES;
1566 else
1567 error = generic_permission(inode, mask, flags, gfs2_check_acl);
1568 if (unlock)
1569 gfs2_glock_dq_uninit(&i_gh);
1570
1571 return error;
906} 1572}
907 1573
908static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) 1574static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
@@ -928,8 +1594,6 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
928 * @ip: 1594 * @ip:
929 * @attr: 1595 * @attr:
930 * 1596 *
931 * Called with a reference on the vnode.
932 *
933 * Returns: errno 1597 * Returns: errno
934 */ 1598 */
935 1599
@@ -949,60 +1613,280 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
949 return error; 1613 return error;
950} 1614}
951 1615
952void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) 1616static int setattr_chown(struct inode *inode, struct iattr *attr)
953{ 1617{
954 struct gfs2_dinode *str = buf; 1618 struct gfs2_inode *ip = GFS2_I(inode);
955 1619 struct gfs2_sbd *sdp = GFS2_SB(inode);
956 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 1620 u32 ouid, ogid, nuid, ngid;
957 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); 1621 int error;
958 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); 1622
959 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); 1623 ouid = inode->i_uid;
960 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); 1624 ogid = inode->i_gid;
961 str->di_mode = cpu_to_be32(ip->i_inode.i_mode); 1625 nuid = attr->ia_uid;
962 str->di_uid = cpu_to_be32(ip->i_inode.i_uid); 1626 ngid = attr->ia_gid;
963 str->di_gid = cpu_to_be32(ip->i_inode.i_gid); 1627
964 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); 1628 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
965 str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); 1629 ouid = nuid = NO_QUOTA_CHANGE;
966 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); 1630 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
967 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); 1631 ogid = ngid = NO_QUOTA_CHANGE;
968 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); 1632
969 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec); 1633 if (!gfs2_alloc_get(ip))
970 1634 return -ENOMEM;
971 str->di_goal_meta = cpu_to_be64(ip->i_goal); 1635
972 str->di_goal_data = cpu_to_be64(ip->i_goal); 1636 error = gfs2_quota_lock(ip, nuid, ngid);
973 str->di_generation = cpu_to_be64(ip->i_generation); 1637 if (error)
974 1638 goto out_alloc;
975 str->di_flags = cpu_to_be32(ip->i_diskflags); 1639
976 str->di_height = cpu_to_be16(ip->i_height); 1640 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
977 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) && 1641 error = gfs2_quota_check(ip, nuid, ngid);
978 !(ip->i_diskflags & GFS2_DIF_EXHASH) ? 1642 if (error)
979 GFS2_FORMAT_DE : 0); 1643 goto out_gunlock_q;
980 str->di_depth = cpu_to_be16(ip->i_depth); 1644 }
981 str->di_entries = cpu_to_be32(ip->i_entries); 1645
982 1646 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
983 str->di_eattr = cpu_to_be64(ip->i_eattr); 1647 if (error)
984 str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); 1648 goto out_gunlock_q;
985 str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec); 1649
986 str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec); 1650 error = gfs2_setattr_simple(ip, attr);
987} 1651 if (error)
988 1652 goto out_end_trans;
989void gfs2_dinode_print(const struct gfs2_inode *ip) 1653
990{ 1654 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
991 printk(KERN_INFO " no_formal_ino = %llu\n", 1655 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
992 (unsigned long long)ip->i_no_formal_ino); 1656 gfs2_quota_change(ip, -blocks, ouid, ogid);
993 printk(KERN_INFO " no_addr = %llu\n", 1657 gfs2_quota_change(ip, blocks, nuid, ngid);
994 (unsigned long long)ip->i_no_addr); 1658 }
995 printk(KERN_INFO " i_size = %llu\n", 1659
996 (unsigned long long)i_size_read(&ip->i_inode)); 1660out_end_trans:
997 printk(KERN_INFO " blocks = %llu\n", 1661 gfs2_trans_end(sdp);
998 (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); 1662out_gunlock_q:
999 printk(KERN_INFO " i_goal = %llu\n", 1663 gfs2_quota_unlock(ip);
1000 (unsigned long long)ip->i_goal); 1664out_alloc:
1001 printk(KERN_INFO " i_diskflags = 0x%.8X\n", ip->i_diskflags); 1665 gfs2_alloc_put(ip);
1002 printk(KERN_INFO " i_height = %u\n", ip->i_height); 1666 return error;
1003 printk(KERN_INFO " i_depth = %u\n", ip->i_depth); 1667}
1004 printk(KERN_INFO " i_entries = %u\n", ip->i_entries); 1668
1005 printk(KERN_INFO " i_eattr = %llu\n", 1669/**
1006 (unsigned long long)ip->i_eattr); 1670 * gfs2_setattr - Change attributes on an inode
1671 * @dentry: The dentry which is changing
1672 * @attr: The structure describing the change
1673 *
1674 * The VFS layer wants to change one or more of an inodes attributes. Write
1675 * that change out to disk.
1676 *
1677 * Returns: errno
1678 */
1679
1680static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
1681{
1682 struct inode *inode = dentry->d_inode;
1683 struct gfs2_inode *ip = GFS2_I(inode);
1684 struct gfs2_holder i_gh;
1685 int error;
1686
1687 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1688 if (error)
1689 return error;
1690
1691 error = -EPERM;
1692 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1693 goto out;
1694
1695 error = inode_change_ok(inode, attr);
1696 if (error)
1697 goto out;
1698
1699 if (attr->ia_valid & ATTR_SIZE)
1700 error = gfs2_setattr_size(inode, attr->ia_size);
1701 else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
1702 error = setattr_chown(inode, attr);
1703 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
1704 error = gfs2_acl_chmod(ip, attr);
1705 else
1706 error = gfs2_setattr_simple(ip, attr);
1707
1708out:
1709 gfs2_glock_dq_uninit(&i_gh);
1710 if (!error)
1711 mark_inode_dirty(inode);
1712 return error;
1713}
1714
1715/**
1716 * gfs2_getattr - Read out an inode's attributes
1717 * @mnt: The vfsmount the inode is being accessed from
1718 * @dentry: The dentry to stat
1719 * @stat: The inode's stats
1720 *
1721 * This may be called from the VFS directly, or from within GFS2 with the
1722 * inode locked, so we look to see if the glock is already locked and only
1723 * lock the glock if its not already been done. Note that its the NFS
1724 * readdirplus operation which causes this to be called (from filldir)
1725 * with the glock already held.
1726 *
1727 * Returns: errno
1728 */
1729
1730static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1731 struct kstat *stat)
1732{
1733 struct inode *inode = dentry->d_inode;
1734 struct gfs2_inode *ip = GFS2_I(inode);
1735 struct gfs2_holder gh;
1736 int error;
1737 int unlock = 0;
1738
1739 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1740 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1741 if (error)
1742 return error;
1743 unlock = 1;
1744 }
1745
1746 generic_fillattr(inode, stat);
1747 if (unlock)
1748 gfs2_glock_dq_uninit(&gh);
1749
1750 return 0;
1751}
1752
1753static int gfs2_setxattr(struct dentry *dentry, const char *name,
1754 const void *data, size_t size, int flags)
1755{
1756 struct inode *inode = dentry->d_inode;
1757 struct gfs2_inode *ip = GFS2_I(inode);
1758 struct gfs2_holder gh;
1759 int ret;
1760
1761 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1762 ret = gfs2_glock_nq(&gh);
1763 if (ret == 0) {
1764 ret = generic_setxattr(dentry, name, data, size, flags);
1765 gfs2_glock_dq(&gh);
1766 }
1767 gfs2_holder_uninit(&gh);
1768 return ret;
1769}
1770
1771static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1772 void *data, size_t size)
1773{
1774 struct inode *inode = dentry->d_inode;
1775 struct gfs2_inode *ip = GFS2_I(inode);
1776 struct gfs2_holder gh;
1777 int ret;
1778
1779 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1780 ret = gfs2_glock_nq(&gh);
1781 if (ret == 0) {
1782 ret = generic_getxattr(dentry, name, data, size);
1783 gfs2_glock_dq(&gh);
1784 }
1785 gfs2_holder_uninit(&gh);
1786 return ret;
1787}
1788
1789static int gfs2_removexattr(struct dentry *dentry, const char *name)
1790{
1791 struct inode *inode = dentry->d_inode;
1792 struct gfs2_inode *ip = GFS2_I(inode);
1793 struct gfs2_holder gh;
1794 int ret;
1795
1796 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1797 ret = gfs2_glock_nq(&gh);
1798 if (ret == 0) {
1799 ret = generic_removexattr(dentry, name);
1800 gfs2_glock_dq(&gh);
1801 }
1802 gfs2_holder_uninit(&gh);
1803 return ret;
1804}
1805
1806static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1807 u64 start, u64 len)
1808{
1809 struct gfs2_inode *ip = GFS2_I(inode);
1810 struct gfs2_holder gh;
1811 int ret;
1812
1813 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
1814 if (ret)
1815 return ret;
1816
1817 mutex_lock(&inode->i_mutex);
1818
1819 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
1820 if (ret)
1821 goto out;
1822
1823 if (gfs2_is_stuffed(ip)) {
1824 u64 phys = ip->i_no_addr << inode->i_blkbits;
1825 u64 size = i_size_read(inode);
1826 u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
1827 FIEMAP_EXTENT_DATA_INLINE;
1828 phys += sizeof(struct gfs2_dinode);
1829 phys += start;
1830 if (start + len > size)
1831 len = size - start;
1832 if (start < size)
1833 ret = fiemap_fill_next_extent(fieinfo, start, phys,
1834 len, flags);
1835 if (ret == 1)
1836 ret = 0;
1837 } else {
1838 ret = __generic_block_fiemap(inode, fieinfo, start, len,
1839 gfs2_block_map);
1840 }
1841
1842 gfs2_glock_dq_uninit(&gh);
1843out:
1844 mutex_unlock(&inode->i_mutex);
1845 return ret;
1007} 1846}
1008 1847
1848const struct inode_operations gfs2_file_iops = {
1849 .permission = gfs2_permission,
1850 .setattr = gfs2_setattr,
1851 .getattr = gfs2_getattr,
1852 .setxattr = gfs2_setxattr,
1853 .getxattr = gfs2_getxattr,
1854 .listxattr = gfs2_listxattr,
1855 .removexattr = gfs2_removexattr,
1856 .fiemap = gfs2_fiemap,
1857};
1858
1859const struct inode_operations gfs2_dir_iops = {
1860 .create = gfs2_create,
1861 .lookup = gfs2_lookup,
1862 .link = gfs2_link,
1863 .unlink = gfs2_unlink,
1864 .symlink = gfs2_symlink,
1865 .mkdir = gfs2_mkdir,
1866 .rmdir = gfs2_unlink,
1867 .mknod = gfs2_mknod,
1868 .rename = gfs2_rename,
1869 .permission = gfs2_permission,
1870 .setattr = gfs2_setattr,
1871 .getattr = gfs2_getattr,
1872 .setxattr = gfs2_setxattr,
1873 .getxattr = gfs2_getxattr,
1874 .listxattr = gfs2_listxattr,
1875 .removexattr = gfs2_removexattr,
1876 .fiemap = gfs2_fiemap,
1877};
1878
1879const struct inode_operations gfs2_symlink_iops = {
1880 .readlink = generic_readlink,
1881 .follow_link = gfs2_follow_link,
1882 .put_link = gfs2_put_link,
1883 .permission = gfs2_permission,
1884 .setattr = gfs2_setattr,
1885 .getattr = gfs2_getattr,
1886 .setxattr = gfs2_setxattr,
1887 .getxattr = gfs2_getxattr,
1888 .listxattr = gfs2_listxattr,
1889 .removexattr = gfs2_removexattr,
1890 .fiemap = gfs2_fiemap,
1891};
1892
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 099ca305e518..31606076f701 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -102,22 +102,16 @@ extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
102extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, 102extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
103 u64 *no_formal_ino, 103 u64 *no_formal_ino,
104 unsigned int blktype); 104 unsigned int blktype);
105extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); 105extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int nonblock);
106 106
107extern int gfs2_inode_refresh(struct gfs2_inode *ip); 107extern int gfs2_inode_refresh(struct gfs2_inode *ip);
108 108
109extern int gfs2_dinode_dealloc(struct gfs2_inode *inode);
110extern int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
111extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, 109extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
112 int is_root); 110 int is_root);
113extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
114 const struct qstr *name,
115 unsigned int mode, dev_t dev);
116extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags); 111extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags);
117extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); 112extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
118extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); 113extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
119extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); 114extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
120extern void gfs2_dinode_print(const struct gfs2_inode *ip);
121 115
122extern const struct inode_operations gfs2_file_iops; 116extern const struct inode_operations gfs2_file_iops;
123extern const struct inode_operations gfs2_dir_iops; 117extern const struct inode_operations gfs2_dir_iops;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 5b102c1887fd..903115f2bb34 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -18,6 +18,7 @@
18#include <linux/kthread.h> 18#include <linux/kthread.h>
19#include <linux/freezer.h> 19#include <linux/freezer.h>
20#include <linux/bio.h> 20#include <linux/bio.h>
21#include <linux/writeback.h>
21 22
22#include "gfs2.h" 23#include "gfs2.h"
23#include "incore.h" 24#include "incore.h"
@@ -83,55 +84,97 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
83/** 84/**
84 * gfs2_ail1_start_one - Start I/O on a part of the AIL 85 * gfs2_ail1_start_one - Start I/O on a part of the AIL
85 * @sdp: the filesystem 86 * @sdp: the filesystem
86 * @tr: the part of the AIL 87 * @wbc: The writeback control structure
88 * @ai: The ail structure
87 * 89 *
88 */ 90 */
89 91
90static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 92static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
93 struct writeback_control *wbc,
94 struct gfs2_ail *ai)
91__releases(&sdp->sd_ail_lock) 95__releases(&sdp->sd_ail_lock)
92__acquires(&sdp->sd_ail_lock) 96__acquires(&sdp->sd_ail_lock)
93{ 97{
98 struct gfs2_glock *gl = NULL;
99 struct address_space *mapping;
94 struct gfs2_bufdata *bd, *s; 100 struct gfs2_bufdata *bd, *s;
95 struct buffer_head *bh; 101 struct buffer_head *bh;
96 int retry;
97 102
98 do { 103 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, bd_ail_st_list) {
99 retry = 0; 104 bh = bd->bd_bh;
100 105
101 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, 106 gfs2_assert(sdp, bd->bd_ail == ai);
102 bd_ail_st_list) {
103 bh = bd->bd_bh;
104 107
105 gfs2_assert(sdp, bd->bd_ail == ai); 108 if (!buffer_busy(bh)) {
109 if (!buffer_uptodate(bh))
110 gfs2_io_error_bh(sdp, bh);
111 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
112 continue;
113 }
106 114
107 if (!buffer_busy(bh)) { 115 if (!buffer_dirty(bh))
108 if (!buffer_uptodate(bh)) 116 continue;
109 gfs2_io_error_bh(sdp, bh); 117 if (gl == bd->bd_gl)
110 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); 118 continue;
111 continue; 119 gl = bd->bd_gl;
112 } 120 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
121 mapping = bh->b_page->mapping;
122 if (!mapping)
123 continue;
124 spin_unlock(&sdp->sd_ail_lock);
125 generic_writepages(mapping, wbc);
126 spin_lock(&sdp->sd_ail_lock);
127 if (wbc->nr_to_write <= 0)
128 break;
129 return 1;
130 }
113 131
114 if (!buffer_dirty(bh)) 132 return 0;
115 continue; 133}
116 134
117 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
118 135
119 get_bh(bh); 136/**
120 spin_unlock(&sdp->sd_ail_lock); 137 * gfs2_ail1_flush - start writeback of some ail1 entries
121 lock_buffer(bh); 138 * @sdp: The super block
122 if (test_clear_buffer_dirty(bh)) { 139 * @wbc: The writeback control structure
123 bh->b_end_io = end_buffer_write_sync; 140 *
124 submit_bh(WRITE_SYNC, bh); 141 * Writes back some ail1 entries, according to the limits in the
125 } else { 142 * writeback control structure
126 unlock_buffer(bh); 143 */
127 brelse(bh); 144
128 } 145void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
129 spin_lock(&sdp->sd_ail_lock); 146{
130 147 struct list_head *head = &sdp->sd_ail1_list;
131 retry = 1; 148 struct gfs2_ail *ai;
149
150 trace_gfs2_ail_flush(sdp, wbc, 1);
151 spin_lock(&sdp->sd_ail_lock);
152restart:
153 list_for_each_entry_reverse(ai, head, ai_list) {
154 if (wbc->nr_to_write <= 0)
132 break; 155 break;
133 } 156 if (gfs2_ail1_start_one(sdp, wbc, ai))
134 } while (retry); 157 goto restart;
158 }
159 spin_unlock(&sdp->sd_ail_lock);
160 trace_gfs2_ail_flush(sdp, wbc, 0);
161}
162
163/**
164 * gfs2_ail1_start - start writeback of all ail1 entries
165 * @sdp: The superblock
166 */
167
168static void gfs2_ail1_start(struct gfs2_sbd *sdp)
169{
170 struct writeback_control wbc = {
171 .sync_mode = WB_SYNC_NONE,
172 .nr_to_write = LONG_MAX,
173 .range_start = 0,
174 .range_end = LLONG_MAX,
175 };
176
177 return gfs2_ail1_flush(sdp, &wbc);
135} 178}
136 179
137/** 180/**
@@ -141,7 +184,7 @@ __acquires(&sdp->sd_ail_lock)
141 * 184 *
142 */ 185 */
143 186
144static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags) 187static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
145{ 188{
146 struct gfs2_bufdata *bd, *s; 189 struct gfs2_bufdata *bd, *s;
147 struct buffer_head *bh; 190 struct buffer_head *bh;
@@ -149,76 +192,63 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
149 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, 192 list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
150 bd_ail_st_list) { 193 bd_ail_st_list) {
151 bh = bd->bd_bh; 194 bh = bd->bd_bh;
152
153 gfs2_assert(sdp, bd->bd_ail == ai); 195 gfs2_assert(sdp, bd->bd_ail == ai);
154 196 if (buffer_busy(bh))
155 if (buffer_busy(bh)) { 197 continue;
156 if (flags & DIO_ALL)
157 continue;
158 else
159 break;
160 }
161
162 if (!buffer_uptodate(bh)) 198 if (!buffer_uptodate(bh))
163 gfs2_io_error_bh(sdp, bh); 199 gfs2_io_error_bh(sdp, bh);
164
165 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); 200 list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
166 } 201 }
167 202
168 return list_empty(&ai->ai_ail1_list);
169} 203}
170 204
171static void gfs2_ail1_start(struct gfs2_sbd *sdp) 205/**
172{ 206 * gfs2_ail1_empty - Try to empty the ail1 lists
173 struct list_head *head; 207 * @sdp: The superblock
174 u64 sync_gen; 208 *
175 struct gfs2_ail *ai; 209 * Tries to empty the ail1 lists, starting with the oldest first
176 int done = 0; 210 */
177
178 spin_lock(&sdp->sd_ail_lock);
179 head = &sdp->sd_ail1_list;
180 if (list_empty(head)) {
181 spin_unlock(&sdp->sd_ail_lock);
182 return;
183 }
184 sync_gen = sdp->sd_ail_sync_gen++;
185
186 while(!done) {
187 done = 1;
188 list_for_each_entry_reverse(ai, head, ai_list) {
189 if (ai->ai_sync_gen >= sync_gen)
190 continue;
191 ai->ai_sync_gen = sync_gen;
192 gfs2_ail1_start_one(sdp, ai); /* This may drop ail lock */
193 done = 0;
194 break;
195 }
196 }
197
198 spin_unlock(&sdp->sd_ail_lock);
199}
200 211
201static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) 212static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
202{ 213{
203 struct gfs2_ail *ai, *s; 214 struct gfs2_ail *ai, *s;
204 int ret; 215 int ret;
205 216
206 spin_lock(&sdp->sd_ail_lock); 217 spin_lock(&sdp->sd_ail_lock);
207
208 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) { 218 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
209 if (gfs2_ail1_empty_one(sdp, ai, flags)) 219 gfs2_ail1_empty_one(sdp, ai);
220 if (list_empty(&ai->ai_ail1_list))
210 list_move(&ai->ai_list, &sdp->sd_ail2_list); 221 list_move(&ai->ai_list, &sdp->sd_ail2_list);
211 else if (!(flags & DIO_ALL)) 222 else
212 break; 223 break;
213 } 224 }
214
215 ret = list_empty(&sdp->sd_ail1_list); 225 ret = list_empty(&sdp->sd_ail1_list);
216
217 spin_unlock(&sdp->sd_ail_lock); 226 spin_unlock(&sdp->sd_ail_lock);
218 227
219 return ret; 228 return ret;
220} 229}
221 230
231static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
232{
233 struct gfs2_ail *ai;
234 struct gfs2_bufdata *bd;
235 struct buffer_head *bh;
236
237 spin_lock(&sdp->sd_ail_lock);
238 list_for_each_entry_reverse(ai, &sdp->sd_ail1_list, ai_list) {
239 list_for_each_entry(bd, &ai->ai_ail1_list, bd_ail_st_list) {
240 bh = bd->bd_bh;
241 if (!buffer_locked(bh))
242 continue;
243 get_bh(bh);
244 spin_unlock(&sdp->sd_ail_lock);
245 wait_on_buffer(bh);
246 brelse(bh);
247 return;
248 }
249 }
250 spin_unlock(&sdp->sd_ail_lock);
251}
222 252
223/** 253/**
224 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced 254 * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
@@ -574,7 +604,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
574 set_buffer_uptodate(bh); 604 set_buffer_uptodate(bh);
575 clear_buffer_dirty(bh); 605 clear_buffer_dirty(bh);
576 606
577 gfs2_ail1_empty(sdp, 0); 607 gfs2_ail1_empty(sdp);
578 tail = current_tail(sdp); 608 tail = current_tail(sdp);
579 609
580 lh = (struct gfs2_log_header *)bh->b_data; 610 lh = (struct gfs2_log_header *)bh->b_data;
@@ -869,9 +899,9 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
869 gfs2_log_flush(sdp, NULL); 899 gfs2_log_flush(sdp, NULL);
870 for (;;) { 900 for (;;) {
871 gfs2_ail1_start(sdp); 901 gfs2_ail1_start(sdp);
872 if (gfs2_ail1_empty(sdp, DIO_ALL)) 902 gfs2_ail1_wait(sdp);
903 if (gfs2_ail1_empty(sdp))
873 break; 904 break;
874 msleep(10);
875 } 905 }
876} 906}
877 907
@@ -905,20 +935,20 @@ int gfs2_logd(void *data)
905 935
906 preflush = atomic_read(&sdp->sd_log_pinned); 936 preflush = atomic_read(&sdp->sd_log_pinned);
907 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { 937 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
908 gfs2_ail1_empty(sdp, DIO_ALL); 938 gfs2_ail1_empty(sdp);
909 gfs2_log_flush(sdp, NULL); 939 gfs2_log_flush(sdp, NULL);
910 gfs2_ail1_empty(sdp, DIO_ALL);
911 } 940 }
912 941
913 if (gfs2_ail_flush_reqd(sdp)) { 942 if (gfs2_ail_flush_reqd(sdp)) {
914 gfs2_ail1_start(sdp); 943 gfs2_ail1_start(sdp);
915 io_schedule(); 944 gfs2_ail1_wait(sdp);
916 gfs2_ail1_empty(sdp, 0); 945 gfs2_ail1_empty(sdp);
917 gfs2_log_flush(sdp, NULL); 946 gfs2_log_flush(sdp, NULL);
918 gfs2_ail1_empty(sdp, DIO_ALL);
919 } 947 }
920 948
921 wake_up(&sdp->sd_log_waitq); 949 if (!gfs2_ail_flush_reqd(sdp))
950 wake_up(&sdp->sd_log_waitq);
951
922 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; 952 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
923 if (freezing(current)) 953 if (freezing(current))
924 refrigerator(); 954 refrigerator();
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 0d007f920234..ab0621698b73 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -12,6 +12,7 @@
12 12
13#include <linux/list.h> 13#include <linux/list.h>
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/writeback.h>
15#include "incore.h" 16#include "incore.h"
16 17
17/** 18/**
@@ -59,6 +60,7 @@ extern struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
59extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); 60extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
60extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); 61extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
61extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd); 62extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
63extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
62 64
63extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); 65extern void gfs2_log_shutdown(struct gfs2_sbd *sdp);
64extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp); 66extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 51d27f00ebb4..05bbb124699f 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -40,7 +40,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
40{ 40{
41 struct gfs2_bufdata *bd; 41 struct gfs2_bufdata *bd;
42 42
43 gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)); 43 BUG_ON(!current->journal_info);
44 44
45 clear_buffer_dirty(bh); 45 clear_buffer_dirty(bh);
46 if (test_set_buffer_pinned(bh)) 46 if (test_set_buffer_pinned(bh))
@@ -65,6 +65,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
65 * @sdp: the filesystem the buffer belongs to 65 * @sdp: the filesystem the buffer belongs to
66 * @bh: The buffer to unpin 66 * @bh: The buffer to unpin
67 * @ai: 67 * @ai:
68 * @flags: The inode dirty flags
68 * 69 *
69 */ 70 */
70 71
@@ -73,10 +74,8 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
73{ 74{
74 struct gfs2_bufdata *bd = bh->b_private; 75 struct gfs2_bufdata *bd = bh->b_private;
75 76
76 gfs2_assert_withdraw(sdp, buffer_uptodate(bh)); 77 BUG_ON(!buffer_uptodate(bh));
77 78 BUG_ON(!buffer_pinned(bh));
78 if (!buffer_pinned(bh))
79 gfs2_assert_withdraw(sdp, 0);
80 79
81 lock_buffer(bh); 80 lock_buffer(bh);
82 mark_buffer_dirty(bh); 81 mark_buffer_dirty(bh);
@@ -95,8 +94,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
95 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); 94 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
96 spin_unlock(&sdp->sd_ail_lock); 95 spin_unlock(&sdp->sd_ail_lock);
97 96
98 if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags)) 97 clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
99 gfs2_glock_schedule_for_reclaim(bd->bd_gl);
100 trace_gfs2_pin(bd, 0); 98 trace_gfs2_pin(bd, 0);
101 unlock_buffer(bh); 99 unlock_buffer(bh);
102 atomic_dec(&sdp->sd_log_pinned); 100 atomic_dec(&sdp->sd_log_pinned);
@@ -322,12 +320,16 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
322 320
323static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 321static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
324{ 322{
323 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
324 struct gfs2_glock *gl = bd->bd_gl;
325 struct gfs2_trans *tr; 325 struct gfs2_trans *tr;
326 326
327 tr = current->journal_info; 327 tr = current->journal_info;
328 tr->tr_touched = 1; 328 tr->tr_touched = 1;
329 tr->tr_num_revoke++; 329 tr->tr_num_revoke++;
330 sdp->sd_log_num_revoke++; 330 sdp->sd_log_num_revoke++;
331 atomic_inc(&gl->gl_revokes);
332 set_bit(GLF_LFLUSH, &gl->gl_flags);
331 list_add(&le->le_list, &sdp->sd_log_le_revoke); 333 list_add(&le->le_list, &sdp->sd_log_le_revoke);
332} 334}
333 335
@@ -350,9 +352,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
350 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); 352 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
351 offset = sizeof(struct gfs2_log_descriptor); 353 offset = sizeof(struct gfs2_log_descriptor);
352 354
353 while (!list_empty(head)) { 355 list_for_each_entry(bd, head, bd_le.le_list) {
354 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
355 list_del_init(&bd->bd_le.le_list);
356 sdp->sd_log_num_revoke--; 356 sdp->sd_log_num_revoke--;
357 357
358 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { 358 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
@@ -367,8 +367,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
367 } 367 }
368 368
369 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno); 369 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno);
370 kmem_cache_free(gfs2_bufdata_cachep, bd);
371
372 offset += sizeof(u64); 370 offset += sizeof(u64);
373 } 371 }
374 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 372 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
@@ -376,6 +374,22 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
376 submit_bh(WRITE_SYNC, bh); 374 submit_bh(WRITE_SYNC, bh);
377} 375}
378 376
377static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
378{
379 struct list_head *head = &sdp->sd_log_le_revoke;
380 struct gfs2_bufdata *bd;
381 struct gfs2_glock *gl;
382
383 while (!list_empty(head)) {
384 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
385 list_del_init(&bd->bd_le.le_list);
386 gl = bd->bd_gl;
387 atomic_dec(&gl->gl_revokes);
388 clear_bit(GLF_LFLUSH, &gl->gl_flags);
389 kmem_cache_free(gfs2_bufdata_cachep, bd);
390 }
391}
392
379static void revoke_lo_before_scan(struct gfs2_jdesc *jd, 393static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
380 struct gfs2_log_header_host *head, int pass) 394 struct gfs2_log_header_host *head, int pass)
381{ 395{
@@ -749,6 +763,7 @@ const struct gfs2_log_operations gfs2_buf_lops = {
749const struct gfs2_log_operations gfs2_revoke_lops = { 763const struct gfs2_log_operations gfs2_revoke_lops = {
750 .lo_add = revoke_lo_add, 764 .lo_add = revoke_lo_add,
751 .lo_before_commit = revoke_lo_before_commit, 765 .lo_before_commit = revoke_lo_before_commit,
766 .lo_after_commit = revoke_lo_after_commit,
752 .lo_before_scan = revoke_lo_before_scan, 767 .lo_before_scan = revoke_lo_before_scan,
753 .lo_scan_elements = revoke_lo_scan_elements, 768 .lo_scan_elements = revoke_lo_scan_elements,
754 .lo_after_scan = revoke_lo_after_scan, 769 .lo_after_scan = revoke_lo_after_scan,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 888a5f5a1a58..cfa327d33194 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -53,6 +53,7 @@ static void gfs2_init_glock_once(void *foo)
53 INIT_LIST_HEAD(&gl->gl_lru); 53 INIT_LIST_HEAD(&gl->gl_lru);
54 INIT_LIST_HEAD(&gl->gl_ail_list); 54 INIT_LIST_HEAD(&gl->gl_ail_list);
55 atomic_set(&gl->gl_ail_count, 0); 55 atomic_set(&gl->gl_ail_count, 0);
56 atomic_set(&gl->gl_revokes, 0);
56} 57}
57 58
58static void gfs2_init_gl_aspace_once(void *foo) 59static void gfs2_init_gl_aspace_once(void *foo)
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 675349b5a133..747238cd9f96 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -31,6 +31,7 @@
31#include "rgrp.h" 31#include "rgrp.h"
32#include "trans.h" 32#include "trans.h"
33#include "util.h" 33#include "util.h"
34#include "trace_gfs2.h"
34 35
35static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) 36static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc)
36{ 37{
@@ -310,6 +311,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
310 struct gfs2_bufdata *bd = bh->b_private; 311 struct gfs2_bufdata *bd = bh->b_private;
311 312
312 if (test_clear_buffer_pinned(bh)) { 313 if (test_clear_buffer_pinned(bh)) {
314 trace_gfs2_pin(bd, 0);
313 atomic_dec(&sdp->sd_log_pinned); 315 atomic_dec(&sdp->sd_log_pinned);
314 list_del_init(&bd->bd_le.le_list); 316 list_del_init(&bd->bd_le.le_list);
315 if (meta) { 317 if (meta) {
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 6a1d9ba16411..22c526593131 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -77,8 +77,6 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
77 77
78#define buffer_busy(bh) \ 78#define buffer_busy(bh) \
79((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned))) 79((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
80#define buffer_in_io(bh) \
81((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
82 80
83#endif /* __DIO_DOT_H__ */ 81#endif /* __DIO_DOT_H__ */
84 82
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index d3c69eb91c74..8ac9ae189b53 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -126,8 +126,10 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
126 * changed. 126 * changed.
127 */ 127 */
128 128
129static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) 129static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
130{ 130{
131 struct gfs2_sb_host *sb = &sdp->sd_sb;
132
131 if (sb->sb_magic != GFS2_MAGIC || 133 if (sb->sb_magic != GFS2_MAGIC ||
132 sb->sb_type != GFS2_METATYPE_SB) { 134 sb->sb_type != GFS2_METATYPE_SB) {
133 if (!silent) 135 if (!silent)
@@ -157,8 +159,10 @@ static void end_bio_io_page(struct bio *bio, int error)
157 unlock_page(page); 159 unlock_page(page);
158} 160}
159 161
160static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) 162static void gfs2_sb_in(struct gfs2_sbd *sdp, const void *buf)
161{ 163{
164 struct gfs2_sb_host *sb = &sdp->sd_sb;
165 struct super_block *s = sdp->sd_vfs;
162 const struct gfs2_sb *str = buf; 166 const struct gfs2_sb *str = buf;
163 167
164 sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); 168 sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
@@ -175,7 +179,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
175 179
176 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); 180 memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
177 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); 181 memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
178 memcpy(sb->sb_uuid, str->sb_uuid, 16); 182 memcpy(s->s_uuid, str->sb_uuid, 16);
179} 183}
180 184
181/** 185/**
@@ -197,7 +201,7 @@ static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
197 * Returns: 0 on success or error 201 * Returns: 0 on success or error
198 */ 202 */
199 203
200static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) 204static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
201{ 205{
202 struct super_block *sb = sdp->sd_vfs; 206 struct super_block *sb = sdp->sd_vfs;
203 struct gfs2_sb *p; 207 struct gfs2_sb *p;
@@ -227,10 +231,10 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
227 return -EIO; 231 return -EIO;
228 } 232 }
229 p = kmap(page); 233 p = kmap(page);
230 gfs2_sb_in(&sdp->sd_sb, p); 234 gfs2_sb_in(sdp, p);
231 kunmap(page); 235 kunmap(page);
232 __free_page(page); 236 __free_page(page);
233 return 0; 237 return gfs2_check_sb(sdp, silent);
234} 238}
235 239
236/** 240/**
@@ -247,17 +251,13 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
247 unsigned int x; 251 unsigned int x;
248 int error; 252 int error;
249 253
250 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); 254 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
251 if (error) { 255 if (error) {
252 if (!silent) 256 if (!silent)
253 fs_err(sdp, "can't read superblock\n"); 257 fs_err(sdp, "can't read superblock\n");
254 return error; 258 return error;
255 } 259 }
256 260
257 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
258 if (error)
259 return error;
260
261 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - 261 sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
262 GFS2_BASIC_BLOCK_SHIFT; 262 GFS2_BASIC_BLOCK_SHIFT;
263 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; 263 sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
@@ -340,14 +340,10 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
340 /* Try to autodetect */ 340 /* Try to autodetect */
341 341
342 if (!proto[0] || !table[0]) { 342 if (!proto[0] || !table[0]) {
343 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); 343 error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, silent);
344 if (error) 344 if (error)
345 return error; 345 return error;
346 346
347 error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
348 if (error)
349 goto out;
350
351 if (!proto[0]) 347 if (!proto[0])
352 proto = sdp->sd_sb.sb_lockproto; 348 proto = sdp->sd_sb.sb_lockproto;
353 if (!table[0]) 349 if (!table[0])
@@ -364,7 +360,6 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
364 while ((table = strchr(table, '/'))) 360 while ((table = strchr(table, '/')))
365 *table = '_'; 361 *table = '_';
366 362
367out:
368 return error; 363 return error;
369} 364}
370 365
@@ -1119,8 +1114,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
1119 if (sdp->sd_args.ar_statfs_quantum) { 1114 if (sdp->sd_args.ar_statfs_quantum) {
1120 sdp->sd_tune.gt_statfs_slow = 0; 1115 sdp->sd_tune.gt_statfs_slow = 0;
1121 sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum; 1116 sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum;
1122 } 1117 } else {
1123 else {
1124 sdp->sd_tune.gt_statfs_slow = 1; 1118 sdp->sd_tune.gt_statfs_slow = 1;
1125 sdp->sd_tune.gt_statfs_quantum = 30; 1119 sdp->sd_tune.gt_statfs_quantum = 30;
1126 } 1120 }
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
deleted file mode 100644
index 09e436a50723..000000000000
--- a/fs/gfs2/ops_inode.c
+++ /dev/null
@@ -1,1344 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/slab.h>
11#include <linux/spinlock.h>
12#include <linux/completion.h>
13#include <linux/buffer_head.h>
14#include <linux/namei.h>
15#include <linux/mm.h>
16#include <linux/xattr.h>
17#include <linux/posix_acl.h>
18#include <linux/gfs2_ondisk.h>
19#include <linux/crc32.h>
20#include <linux/fiemap.h>
21#include <asm/uaccess.h>
22
23#include "gfs2.h"
24#include "incore.h"
25#include "acl.h"
26#include "bmap.h"
27#include "dir.h"
28#include "xattr.h"
29#include "glock.h"
30#include "inode.h"
31#include "meta_io.h"
32#include "quota.h"
33#include "rgrp.h"
34#include "trans.h"
35#include "util.h"
36#include "super.h"
37
38/**
39 * gfs2_create - Create a file
40 * @dir: The directory in which to create the file
41 * @dentry: The dentry of the new file
42 * @mode: The mode of the new file
43 *
44 * Returns: errno
45 */
46
47static int gfs2_create(struct inode *dir, struct dentry *dentry,
48 int mode, struct nameidata *nd)
49{
50 struct gfs2_inode *dip = GFS2_I(dir);
51 struct gfs2_sbd *sdp = GFS2_SB(dir);
52 struct gfs2_holder ghs[2];
53 struct inode *inode;
54
55 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
56
57 for (;;) {
58 inode = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode, 0);
59 if (!IS_ERR(inode)) {
60 gfs2_trans_end(sdp);
61 if (dip->i_alloc->al_rgd)
62 gfs2_inplace_release(dip);
63 gfs2_quota_unlock(dip);
64 gfs2_alloc_put(dip);
65 gfs2_glock_dq_uninit_m(2, ghs);
66 mark_inode_dirty(inode);
67 break;
68 } else if (PTR_ERR(inode) != -EEXIST ||
69 (nd && nd->flags & LOOKUP_EXCL)) {
70 gfs2_holder_uninit(ghs);
71 return PTR_ERR(inode);
72 }
73
74 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
75 if (inode) {
76 if (!IS_ERR(inode)) {
77 gfs2_holder_uninit(ghs);
78 break;
79 } else {
80 gfs2_holder_uninit(ghs);
81 return PTR_ERR(inode);
82 }
83 }
84 }
85
86 d_instantiate(dentry, inode);
87
88 return 0;
89}
90
91/**
92 * gfs2_lookup - Look up a filename in a directory and return its inode
93 * @dir: The directory inode
94 * @dentry: The dentry of the new inode
95 * @nd: passed from Linux VFS, ignored by us
96 *
97 * Called by the VFS layer. Lock dir and call gfs2_lookupi()
98 *
99 * Returns: errno
100 */
101
102static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
103 struct nameidata *nd)
104{
105 struct inode *inode = NULL;
106
107 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
108 if (inode && IS_ERR(inode))
109 return ERR_CAST(inode);
110
111 if (inode) {
112 struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
113 struct gfs2_holder gh;
114 int error;
115 error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
116 if (error) {
117 iput(inode);
118 return ERR_PTR(error);
119 }
120 gfs2_glock_dq_uninit(&gh);
121 return d_splice_alias(inode, dentry);
122 }
123 d_add(dentry, inode);
124
125 return NULL;
126}
127
128/**
129 * gfs2_link - Link to a file
130 * @old_dentry: The inode to link
131 * @dir: Add link to this directory
132 * @dentry: The name of the link
133 *
134 * Link the inode in "old_dentry" into the directory "dir" with the
135 * name in "dentry".
136 *
137 * Returns: errno
138 */
139
140static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
141 struct dentry *dentry)
142{
143 struct gfs2_inode *dip = GFS2_I(dir);
144 struct gfs2_sbd *sdp = GFS2_SB(dir);
145 struct inode *inode = old_dentry->d_inode;
146 struct gfs2_inode *ip = GFS2_I(inode);
147 struct gfs2_holder ghs[2];
148 int alloc_required;
149 int error;
150
151 if (S_ISDIR(inode->i_mode))
152 return -EPERM;
153
154 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
155 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
156
157 error = gfs2_glock_nq(ghs); /* parent */
158 if (error)
159 goto out_parent;
160
161 error = gfs2_glock_nq(ghs + 1); /* child */
162 if (error)
163 goto out_child;
164
165 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
166 if (error)
167 goto out_gunlock;
168
169 error = gfs2_dir_check(dir, &dentry->d_name, NULL);
170 switch (error) {
171 case -ENOENT:
172 break;
173 case 0:
174 error = -EEXIST;
175 default:
176 goto out_gunlock;
177 }
178
179 error = -EINVAL;
180 if (!dip->i_inode.i_nlink)
181 goto out_gunlock;
182 error = -EFBIG;
183 if (dip->i_entries == (u32)-1)
184 goto out_gunlock;
185 error = -EPERM;
186 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
187 goto out_gunlock;
188 error = -EINVAL;
189 if (!ip->i_inode.i_nlink)
190 goto out_gunlock;
191 error = -EMLINK;
192 if (ip->i_inode.i_nlink == (u32)-1)
193 goto out_gunlock;
194
195 alloc_required = error = gfs2_diradd_alloc_required(dir, &dentry->d_name);
196 if (error < 0)
197 goto out_gunlock;
198 error = 0;
199
200 if (alloc_required) {
201 struct gfs2_alloc *al = gfs2_alloc_get(dip);
202 if (!al) {
203 error = -ENOMEM;
204 goto out_gunlock;
205 }
206
207 error = gfs2_quota_lock_check(dip);
208 if (error)
209 goto out_alloc;
210
211 al->al_requested = sdp->sd_max_dirres;
212
213 error = gfs2_inplace_reserve(dip);
214 if (error)
215 goto out_gunlock_q;
216
217 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
218 gfs2_rg_blocks(al) +
219 2 * RES_DINODE + RES_STATFS +
220 RES_QUOTA, 0);
221 if (error)
222 goto out_ipres;
223 } else {
224 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0);
225 if (error)
226 goto out_ipres;
227 }
228
229 error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode));
230 if (error)
231 goto out_end_trans;
232
233 error = gfs2_change_nlink(ip, +1);
234
235out_end_trans:
236 gfs2_trans_end(sdp);
237out_ipres:
238 if (alloc_required)
239 gfs2_inplace_release(dip);
240out_gunlock_q:
241 if (alloc_required)
242 gfs2_quota_unlock(dip);
243out_alloc:
244 if (alloc_required)
245 gfs2_alloc_put(dip);
246out_gunlock:
247 gfs2_glock_dq(ghs + 1);
248out_child:
249 gfs2_glock_dq(ghs);
250out_parent:
251 gfs2_holder_uninit(ghs);
252 gfs2_holder_uninit(ghs + 1);
253 if (!error) {
254 ihold(inode);
255 d_instantiate(dentry, inode);
256 mark_inode_dirty(inode);
257 }
258 return error;
259}
260
261/*
262 * gfs2_unlink_ok - check to see that a inode is still in a directory
263 * @dip: the directory
264 * @name: the name of the file
265 * @ip: the inode
266 *
267 * Assumes that the lock on (at least) @dip is held.
268 *
269 * Returns: 0 if the parent/child relationship is correct, errno if it isn't
270 */
271
272static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
273 const struct gfs2_inode *ip)
274{
275 int error;
276
277 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
278 return -EPERM;
279
280 if ((dip->i_inode.i_mode & S_ISVTX) &&
281 dip->i_inode.i_uid != current_fsuid() &&
282 ip->i_inode.i_uid != current_fsuid() && !capable(CAP_FOWNER))
283 return -EPERM;
284
285 if (IS_APPEND(&dip->i_inode))
286 return -EPERM;
287
288 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
289 if (error)
290 return error;
291
292 error = gfs2_dir_check(&dip->i_inode, name, ip);
293 if (error)
294 return error;
295
296 return 0;
297}
298
299/**
300 * gfs2_unlink - Unlink a file
301 * @dir: The inode of the directory containing the file to unlink
302 * @dentry: The file itself
303 *
304 * Unlink a file. Call gfs2_unlinki()
305 *
306 * Returns: errno
307 */
308
309static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
310{
311 struct gfs2_inode *dip = GFS2_I(dir);
312 struct gfs2_sbd *sdp = GFS2_SB(dir);
313 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
314 struct gfs2_holder ghs[3];
315 struct gfs2_rgrpd *rgd;
316 struct gfs2_holder ri_gh;
317 int error;
318
319 error = gfs2_rindex_hold(sdp, &ri_gh);
320 if (error)
321 return error;
322
323 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
324 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
325
326 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
327 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
328
329
330 error = gfs2_glock_nq(ghs); /* parent */
331 if (error)
332 goto out_parent;
333
334 error = gfs2_glock_nq(ghs + 1); /* child */
335 if (error)
336 goto out_child;
337
338 error = gfs2_glock_nq(ghs + 2); /* rgrp */
339 if (error)
340 goto out_rgrp;
341
342 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
343 if (error)
344 goto out_gunlock;
345
346 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
347 if (error)
348 goto out_gunlock;
349
350 error = gfs2_dir_del(dip, &dentry->d_name);
351 if (error)
352 goto out_end_trans;
353
354 error = gfs2_change_nlink(ip, -1);
355
356out_end_trans:
357 gfs2_trans_end(sdp);
358out_gunlock:
359 gfs2_glock_dq(ghs + 2);
360out_rgrp:
361 gfs2_holder_uninit(ghs + 2);
362 gfs2_glock_dq(ghs + 1);
363out_child:
364 gfs2_holder_uninit(ghs + 1);
365 gfs2_glock_dq(ghs);
366out_parent:
367 gfs2_holder_uninit(ghs);
368 gfs2_glock_dq_uninit(&ri_gh);
369 return error;
370}
371
372/**
373 * gfs2_symlink - Create a symlink
374 * @dir: The directory to create the symlink in
375 * @dentry: The dentry to put the symlink in
376 * @symname: The thing which the link points to
377 *
378 * Returns: errno
379 */
380
381static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
382 const char *symname)
383{
384 struct gfs2_inode *dip = GFS2_I(dir), *ip;
385 struct gfs2_sbd *sdp = GFS2_SB(dir);
386 struct gfs2_holder ghs[2];
387 struct inode *inode;
388 struct buffer_head *dibh;
389 int size;
390 int error;
391
392 /* Must be stuffed with a null terminator for gfs2_follow_link() */
393 size = strlen(symname);
394 if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
395 return -ENAMETOOLONG;
396
397 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
398
399 inode = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO, 0);
400 if (IS_ERR(inode)) {
401 gfs2_holder_uninit(ghs);
402 return PTR_ERR(inode);
403 }
404
405 ip = ghs[1].gh_gl->gl_object;
406
407 i_size_write(inode, size);
408
409 error = gfs2_meta_inode_buffer(ip, &dibh);
410
411 if (!gfs2_assert_withdraw(sdp, !error)) {
412 gfs2_dinode_out(ip, dibh->b_data);
413 memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname,
414 size);
415 brelse(dibh);
416 }
417
418 gfs2_trans_end(sdp);
419 if (dip->i_alloc->al_rgd)
420 gfs2_inplace_release(dip);
421 gfs2_quota_unlock(dip);
422 gfs2_alloc_put(dip);
423
424 gfs2_glock_dq_uninit_m(2, ghs);
425
426 d_instantiate(dentry, inode);
427 mark_inode_dirty(inode);
428
429 return 0;
430}
431
432/**
433 * gfs2_mkdir - Make a directory
434 * @dir: The parent directory of the new one
435 * @dentry: The dentry of the new directory
436 * @mode: The mode of the new directory
437 *
438 * Returns: errno
439 */
440
441static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
442{
443 struct gfs2_inode *dip = GFS2_I(dir), *ip;
444 struct gfs2_sbd *sdp = GFS2_SB(dir);
445 struct gfs2_holder ghs[2];
446 struct inode *inode;
447 struct buffer_head *dibh;
448 int error;
449
450 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
451
452 inode = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode, 0);
453 if (IS_ERR(inode)) {
454 gfs2_holder_uninit(ghs);
455 return PTR_ERR(inode);
456 }
457
458 ip = ghs[1].gh_gl->gl_object;
459
460 ip->i_inode.i_nlink = 2;
461 i_size_write(inode, sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode));
462 ip->i_diskflags |= GFS2_DIF_JDATA;
463 ip->i_entries = 2;
464
465 error = gfs2_meta_inode_buffer(ip, &dibh);
466
467 if (!gfs2_assert_withdraw(sdp, !error)) {
468 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
469 struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1);
470
471 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
472 gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent);
473 dent->de_inum = di->di_num; /* already GFS2 endian */
474 dent->de_type = cpu_to_be16(DT_DIR);
475 di->di_entries = cpu_to_be32(1);
476
477 dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
478 gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
479
480 gfs2_inum_out(dip, dent);
481 dent->de_type = cpu_to_be16(DT_DIR);
482
483 gfs2_dinode_out(ip, di);
484
485 brelse(dibh);
486 }
487
488 error = gfs2_change_nlink(dip, +1);
489 gfs2_assert_withdraw(sdp, !error); /* dip already pinned */
490
491 gfs2_trans_end(sdp);
492 if (dip->i_alloc->al_rgd)
493 gfs2_inplace_release(dip);
494 gfs2_quota_unlock(dip);
495 gfs2_alloc_put(dip);
496
497 gfs2_glock_dq_uninit_m(2, ghs);
498
499 d_instantiate(dentry, inode);
500 mark_inode_dirty(inode);
501
502 return 0;
503}
504
505/**
506 * gfs2_rmdiri - Remove a directory
507 * @dip: The parent directory of the directory to be removed
508 * @name: The name of the directory to be removed
509 * @ip: The GFS2 inode of the directory to be removed
510 *
511 * Assumes Glocks on dip and ip are held
512 *
513 * Returns: errno
514 */
515
516static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
517 struct gfs2_inode *ip)
518{
519 int error;
520
521 if (ip->i_entries != 2) {
522 if (gfs2_consist_inode(ip))
523 gfs2_dinode_print(ip);
524 return -EIO;
525 }
526
527 error = gfs2_dir_del(dip, name);
528 if (error)
529 return error;
530
531 error = gfs2_change_nlink(dip, -1);
532 if (error)
533 return error;
534
535 error = gfs2_dir_del(ip, &gfs2_qdot);
536 if (error)
537 return error;
538
539 error = gfs2_dir_del(ip, &gfs2_qdotdot);
540 if (error)
541 return error;
542
543 /* It looks odd, but it really should be done twice */
544 error = gfs2_change_nlink(ip, -1);
545 if (error)
546 return error;
547
548 error = gfs2_change_nlink(ip, -1);
549 if (error)
550 return error;
551
552 return error;
553}
554
555/**
556 * gfs2_rmdir - Remove a directory
557 * @dir: The parent directory of the directory to be removed
558 * @dentry: The dentry of the directory to remove
559 *
560 * Remove a directory. Call gfs2_rmdiri()
561 *
562 * Returns: errno
563 */
564
565static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
566{
567 struct gfs2_inode *dip = GFS2_I(dir);
568 struct gfs2_sbd *sdp = GFS2_SB(dir);
569 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
570 struct gfs2_holder ghs[3];
571 struct gfs2_rgrpd *rgd;
572 struct gfs2_holder ri_gh;
573 int error;
574
575 error = gfs2_rindex_hold(sdp, &ri_gh);
576 if (error)
577 return error;
578 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
579 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
580
581 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
582 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
583
584 error = gfs2_glock_nq(ghs); /* parent */
585 if (error)
586 goto out_parent;
587
588 error = gfs2_glock_nq(ghs + 1); /* child */
589 if (error)
590 goto out_child;
591
592 error = gfs2_glock_nq(ghs + 2); /* rgrp */
593 if (error)
594 goto out_rgrp;
595
596 error = gfs2_unlink_ok(dip, &dentry->d_name, ip);
597 if (error)
598 goto out_gunlock;
599
600 if (ip->i_entries < 2) {
601 if (gfs2_consist_inode(ip))
602 gfs2_dinode_print(ip);
603 error = -EIO;
604 goto out_gunlock;
605 }
606 if (ip->i_entries > 2) {
607 error = -ENOTEMPTY;
608 goto out_gunlock;
609 }
610
611 error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF + RES_RG_BIT, 0);
612 if (error)
613 goto out_gunlock;
614
615 error = gfs2_rmdiri(dip, &dentry->d_name, ip);
616
617 gfs2_trans_end(sdp);
618
619out_gunlock:
620 gfs2_glock_dq(ghs + 2);
621out_rgrp:
622 gfs2_holder_uninit(ghs + 2);
623 gfs2_glock_dq(ghs + 1);
624out_child:
625 gfs2_holder_uninit(ghs + 1);
626 gfs2_glock_dq(ghs);
627out_parent:
628 gfs2_holder_uninit(ghs);
629 gfs2_glock_dq_uninit(&ri_gh);
630 return error;
631}
632
633/**
634 * gfs2_mknod - Make a special file
635 * @dir: The directory in which the special file will reside
636 * @dentry: The dentry of the special file
637 * @mode: The mode of the special file
638 * @rdev: The device specification of the special file
639 *
640 */
641
642static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
643 dev_t dev)
644{
645 struct gfs2_inode *dip = GFS2_I(dir);
646 struct gfs2_sbd *sdp = GFS2_SB(dir);
647 struct gfs2_holder ghs[2];
648 struct inode *inode;
649
650 gfs2_holder_init(dip->i_gl, 0, 0, ghs);
651
652 inode = gfs2_createi(ghs, &dentry->d_name, mode, dev);
653 if (IS_ERR(inode)) {
654 gfs2_holder_uninit(ghs);
655 return PTR_ERR(inode);
656 }
657
658 gfs2_trans_end(sdp);
659 if (dip->i_alloc->al_rgd)
660 gfs2_inplace_release(dip);
661 gfs2_quota_unlock(dip);
662 gfs2_alloc_put(dip);
663
664 gfs2_glock_dq_uninit_m(2, ghs);
665
666 d_instantiate(dentry, inode);
667 mark_inode_dirty(inode);
668
669 return 0;
670}
671
672/*
673 * gfs2_ok_to_move - check if it's ok to move a directory to another directory
674 * @this: move this
675 * @to: to here
676 *
677 * Follow @to back to the root and make sure we don't encounter @this
678 * Assumes we already hold the rename lock.
679 *
680 * Returns: errno
681 */
682
683static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
684{
685 struct inode *dir = &to->i_inode;
686 struct super_block *sb = dir->i_sb;
687 struct inode *tmp;
688 int error = 0;
689
690 igrab(dir);
691
692 for (;;) {
693 if (dir == &this->i_inode) {
694 error = -EINVAL;
695 break;
696 }
697 if (dir == sb->s_root->d_inode) {
698 error = 0;
699 break;
700 }
701
702 tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1);
703 if (IS_ERR(tmp)) {
704 error = PTR_ERR(tmp);
705 break;
706 }
707
708 iput(dir);
709 dir = tmp;
710 }
711
712 iput(dir);
713
714 return error;
715}
716
717/**
718 * gfs2_rename - Rename a file
719 * @odir: Parent directory of old file name
720 * @odentry: The old dentry of the file
721 * @ndir: Parent directory of new file name
722 * @ndentry: The new dentry of the file
723 *
724 * Returns: errno
725 */
726
727static int gfs2_rename(struct inode *odir, struct dentry *odentry,
728 struct inode *ndir, struct dentry *ndentry)
729{
730 struct gfs2_inode *odip = GFS2_I(odir);
731 struct gfs2_inode *ndip = GFS2_I(ndir);
732 struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
733 struct gfs2_inode *nip = NULL;
734 struct gfs2_sbd *sdp = GFS2_SB(odir);
735 struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
736 struct gfs2_rgrpd *nrgd;
737 unsigned int num_gh;
738 int dir_rename = 0;
739 int alloc_required = 0;
740 unsigned int x;
741 int error;
742
743 if (ndentry->d_inode) {
744 nip = GFS2_I(ndentry->d_inode);
745 if (ip == nip)
746 return 0;
747 }
748
749 error = gfs2_rindex_hold(sdp, &ri_gh);
750 if (error)
751 return error;
752
753 if (odip != ndip) {
754 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
755 0, &r_gh);
756 if (error)
757 goto out;
758
759 if (S_ISDIR(ip->i_inode.i_mode)) {
760 dir_rename = 1;
761 /* don't move a dirctory into it's subdir */
762 error = gfs2_ok_to_move(ip, ndip);
763 if (error)
764 goto out_gunlock_r;
765 }
766 }
767
768 num_gh = 1;
769 gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
770 if (odip != ndip) {
771 gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
772 num_gh++;
773 }
774 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
775 num_gh++;
776
777 if (nip) {
778 gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
779 num_gh++;
780 /* grab the resource lock for unlink flag twiddling
781 * this is the case of the target file already existing
782 * so we unlink before doing the rename
783 */
784 nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
785 if (nrgd)
786 gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
787 }
788
789 for (x = 0; x < num_gh; x++) {
790 error = gfs2_glock_nq(ghs + x);
791 if (error)
792 goto out_gunlock;
793 }
794
795 /* Check out the old directory */
796
797 error = gfs2_unlink_ok(odip, &odentry->d_name, ip);
798 if (error)
799 goto out_gunlock;
800
801 /* Check out the new directory */
802
803 if (nip) {
804 error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip);
805 if (error)
806 goto out_gunlock;
807
808 if (S_ISDIR(nip->i_inode.i_mode)) {
809 if (nip->i_entries < 2) {
810 if (gfs2_consist_inode(nip))
811 gfs2_dinode_print(nip);
812 error = -EIO;
813 goto out_gunlock;
814 }
815 if (nip->i_entries > 2) {
816 error = -ENOTEMPTY;
817 goto out_gunlock;
818 }
819 }
820 } else {
821 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
822 if (error)
823 goto out_gunlock;
824
825 error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
826 switch (error) {
827 case -ENOENT:
828 error = 0;
829 break;
830 case 0:
831 error = -EEXIST;
832 default:
833 goto out_gunlock;
834 };
835
836 if (odip != ndip) {
837 if (!ndip->i_inode.i_nlink) {
838 error = -EINVAL;
839 goto out_gunlock;
840 }
841 if (ndip->i_entries == (u32)-1) {
842 error = -EFBIG;
843 goto out_gunlock;
844 }
845 if (S_ISDIR(ip->i_inode.i_mode) &&
846 ndip->i_inode.i_nlink == (u32)-1) {
847 error = -EMLINK;
848 goto out_gunlock;
849 }
850 }
851 }
852
853 /* Check out the dir to be renamed */
854
855 if (dir_rename) {
856 error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
857 if (error)
858 goto out_gunlock;
859 }
860
861 if (nip == NULL)
862 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
863 error = alloc_required;
864 if (error < 0)
865 goto out_gunlock;
866 error = 0;
867
868 if (alloc_required) {
869 struct gfs2_alloc *al = gfs2_alloc_get(ndip);
870 if (!al) {
871 error = -ENOMEM;
872 goto out_gunlock;
873 }
874
875 error = gfs2_quota_lock_check(ndip);
876 if (error)
877 goto out_alloc;
878
879 al->al_requested = sdp->sd_max_dirres;
880
881 error = gfs2_inplace_reserve_ri(ndip);
882 if (error)
883 goto out_gunlock_q;
884
885 error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
886 gfs2_rg_blocks(al) +
887 4 * RES_DINODE + 4 * RES_LEAF +
888 RES_STATFS + RES_QUOTA + 4, 0);
889 if (error)
890 goto out_ipreserv;
891 } else {
892 error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
893 5 * RES_LEAF + 4, 0);
894 if (error)
895 goto out_gunlock;
896 }
897
898 /* Remove the target file, if it exists */
899
900 if (nip) {
901 if (S_ISDIR(nip->i_inode.i_mode))
902 error = gfs2_rmdiri(ndip, &ndentry->d_name, nip);
903 else {
904 error = gfs2_dir_del(ndip, &ndentry->d_name);
905 if (error)
906 goto out_end_trans;
907 error = gfs2_change_nlink(nip, -1);
908 }
909 if (error)
910 goto out_end_trans;
911 }
912
913 if (dir_rename) {
914 error = gfs2_change_nlink(ndip, +1);
915 if (error)
916 goto out_end_trans;
917 error = gfs2_change_nlink(odip, -1);
918 if (error)
919 goto out_end_trans;
920
921 error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
922 if (error)
923 goto out_end_trans;
924 } else {
925 struct buffer_head *dibh;
926 error = gfs2_meta_inode_buffer(ip, &dibh);
927 if (error)
928 goto out_end_trans;
929 ip->i_inode.i_ctime = CURRENT_TIME;
930 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
931 gfs2_dinode_out(ip, dibh->b_data);
932 brelse(dibh);
933 }
934
935 error = gfs2_dir_del(odip, &odentry->d_name);
936 if (error)
937 goto out_end_trans;
938
939 error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode));
940 if (error)
941 goto out_end_trans;
942
943out_end_trans:
944 gfs2_trans_end(sdp);
945out_ipreserv:
946 if (alloc_required)
947 gfs2_inplace_release(ndip);
948out_gunlock_q:
949 if (alloc_required)
950 gfs2_quota_unlock(ndip);
951out_alloc:
952 if (alloc_required)
953 gfs2_alloc_put(ndip);
954out_gunlock:
955 while (x--) {
956 gfs2_glock_dq(ghs + x);
957 gfs2_holder_uninit(ghs + x);
958 }
959out_gunlock_r:
960 if (r_gh.gh_gl)
961 gfs2_glock_dq_uninit(&r_gh);
962out:
963 gfs2_glock_dq_uninit(&ri_gh);
964 return error;
965}
966
967/**
968 * gfs2_follow_link - Follow a symbolic link
969 * @dentry: The dentry of the link
970 * @nd: Data that we pass to vfs_follow_link()
971 *
972 * This can handle symlinks of any size.
973 *
974 * Returns: 0 on success or error code
975 */
976
977static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
978{
979 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
980 struct gfs2_holder i_gh;
981 struct buffer_head *dibh;
982 unsigned int x, size;
983 char *buf;
984 int error;
985
986 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
987 error = gfs2_glock_nq(&i_gh);
988 if (error) {
989 gfs2_holder_uninit(&i_gh);
990 nd_set_link(nd, ERR_PTR(error));
991 return NULL;
992 }
993
994 size = (unsigned int)i_size_read(&ip->i_inode);
995 if (size == 0) {
996 gfs2_consist_inode(ip);
997 buf = ERR_PTR(-EIO);
998 goto out;
999 }
1000
1001 error = gfs2_meta_inode_buffer(ip, &dibh);
1002 if (error) {
1003 buf = ERR_PTR(error);
1004 goto out;
1005 }
1006
1007 x = size + 1;
1008 buf = kmalloc(x, GFP_NOFS);
1009 if (!buf)
1010 buf = ERR_PTR(-ENOMEM);
1011 else
1012 memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
1013 brelse(dibh);
1014out:
1015 gfs2_glock_dq_uninit(&i_gh);
1016 nd_set_link(nd, buf);
1017 return NULL;
1018}
1019
1020static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1021{
1022 char *s = nd_get_link(nd);
1023 if (!IS_ERR(s))
1024 kfree(s);
1025}
1026
1027/**
1028 * gfs2_permission -
1029 * @inode: The inode
1030 * @mask: The mask to be tested
1031 * @flags: Indicates whether this is an RCU path walk or not
1032 *
1033 * This may be called from the VFS directly, or from within GFS2 with the
1034 * inode locked, so we look to see if the glock is already locked and only
1035 * lock the glock if its not already been done.
1036 *
1037 * Returns: errno
1038 */
1039
1040int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1041{
1042 struct gfs2_inode *ip;
1043 struct gfs2_holder i_gh;
1044 int error;
1045 int unlock = 0;
1046
1047
1048 ip = GFS2_I(inode);
1049 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1050 if (flags & IPERM_FLAG_RCU)
1051 return -ECHILD;
1052 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1053 if (error)
1054 return error;
1055 unlock = 1;
1056 }
1057
1058 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
1059 error = -EACCES;
1060 else
1061 error = generic_permission(inode, mask, flags, gfs2_check_acl);
1062 if (unlock)
1063 gfs2_glock_dq_uninit(&i_gh);
1064
1065 return error;
1066}
1067
1068static int setattr_chown(struct inode *inode, struct iattr *attr)
1069{
1070 struct gfs2_inode *ip = GFS2_I(inode);
1071 struct gfs2_sbd *sdp = GFS2_SB(inode);
1072 u32 ouid, ogid, nuid, ngid;
1073 int error;
1074
1075 ouid = inode->i_uid;
1076 ogid = inode->i_gid;
1077 nuid = attr->ia_uid;
1078 ngid = attr->ia_gid;
1079
1080 if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
1081 ouid = nuid = NO_QUOTA_CHANGE;
1082 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
1083 ogid = ngid = NO_QUOTA_CHANGE;
1084
1085 if (!gfs2_alloc_get(ip))
1086 return -ENOMEM;
1087
1088 error = gfs2_quota_lock(ip, nuid, ngid);
1089 if (error)
1090 goto out_alloc;
1091
1092 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1093 error = gfs2_quota_check(ip, nuid, ngid);
1094 if (error)
1095 goto out_gunlock_q;
1096 }
1097
1098 error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0);
1099 if (error)
1100 goto out_gunlock_q;
1101
1102 error = gfs2_setattr_simple(ip, attr);
1103 if (error)
1104 goto out_end_trans;
1105
1106 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1107 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
1108 gfs2_quota_change(ip, -blocks, ouid, ogid);
1109 gfs2_quota_change(ip, blocks, nuid, ngid);
1110 }
1111
1112out_end_trans:
1113 gfs2_trans_end(sdp);
1114out_gunlock_q:
1115 gfs2_quota_unlock(ip);
1116out_alloc:
1117 gfs2_alloc_put(ip);
1118 return error;
1119}
1120
1121/**
1122 * gfs2_setattr - Change attributes on an inode
1123 * @dentry: The dentry which is changing
1124 * @attr: The structure describing the change
1125 *
1126 * The VFS layer wants to change one or more of an inodes attributes. Write
1127 * that change out to disk.
1128 *
1129 * Returns: errno
1130 */
1131
1132static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
1133{
1134 struct inode *inode = dentry->d_inode;
1135 struct gfs2_inode *ip = GFS2_I(inode);
1136 struct gfs2_holder i_gh;
1137 int error;
1138
1139 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1140 if (error)
1141 return error;
1142
1143 error = -EPERM;
1144 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1145 goto out;
1146
1147 error = inode_change_ok(inode, attr);
1148 if (error)
1149 goto out;
1150
1151 if (attr->ia_valid & ATTR_SIZE)
1152 error = gfs2_setattr_size(inode, attr->ia_size);
1153 else if (attr->ia_valid & (ATTR_UID | ATTR_GID))
1154 error = setattr_chown(inode, attr);
1155 else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
1156 error = gfs2_acl_chmod(ip, attr);
1157 else
1158 error = gfs2_setattr_simple(ip, attr);
1159
1160out:
1161 gfs2_glock_dq_uninit(&i_gh);
1162 if (!error)
1163 mark_inode_dirty(inode);
1164 return error;
1165}
1166
1167/**
1168 * gfs2_getattr - Read out an inode's attributes
1169 * @mnt: The vfsmount the inode is being accessed from
1170 * @dentry: The dentry to stat
1171 * @stat: The inode's stats
1172 *
1173 * This may be called from the VFS directly, or from within GFS2 with the
1174 * inode locked, so we look to see if the glock is already locked and only
1175 * lock the glock if its not already been done. Note that its the NFS
1176 * readdirplus operation which causes this to be called (from filldir)
1177 * with the glock already held.
1178 *
1179 * Returns: errno
1180 */
1181
1182static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
1183 struct kstat *stat)
1184{
1185 struct inode *inode = dentry->d_inode;
1186 struct gfs2_inode *ip = GFS2_I(inode);
1187 struct gfs2_holder gh;
1188 int error;
1189 int unlock = 0;
1190
1191 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1192 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1193 if (error)
1194 return error;
1195 unlock = 1;
1196 }
1197
1198 generic_fillattr(inode, stat);
1199 if (unlock)
1200 gfs2_glock_dq_uninit(&gh);
1201
1202 return 0;
1203}
1204
1205static int gfs2_setxattr(struct dentry *dentry, const char *name,
1206 const void *data, size_t size, int flags)
1207{
1208 struct inode *inode = dentry->d_inode;
1209 struct gfs2_inode *ip = GFS2_I(inode);
1210 struct gfs2_holder gh;
1211 int ret;
1212
1213 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1214 ret = gfs2_glock_nq(&gh);
1215 if (ret == 0) {
1216 ret = generic_setxattr(dentry, name, data, size, flags);
1217 gfs2_glock_dq(&gh);
1218 }
1219 gfs2_holder_uninit(&gh);
1220 return ret;
1221}
1222
1223static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1224 void *data, size_t size)
1225{
1226 struct inode *inode = dentry->d_inode;
1227 struct gfs2_inode *ip = GFS2_I(inode);
1228 struct gfs2_holder gh;
1229 int ret;
1230
1231 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1232 ret = gfs2_glock_nq(&gh);
1233 if (ret == 0) {
1234 ret = generic_getxattr(dentry, name, data, size);
1235 gfs2_glock_dq(&gh);
1236 }
1237 gfs2_holder_uninit(&gh);
1238 return ret;
1239}
1240
1241static int gfs2_removexattr(struct dentry *dentry, const char *name)
1242{
1243 struct inode *inode = dentry->d_inode;
1244 struct gfs2_inode *ip = GFS2_I(inode);
1245 struct gfs2_holder gh;
1246 int ret;
1247
1248 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1249 ret = gfs2_glock_nq(&gh);
1250 if (ret == 0) {
1251 ret = generic_removexattr(dentry, name);
1252 gfs2_glock_dq(&gh);
1253 }
1254 gfs2_holder_uninit(&gh);
1255 return ret;
1256}
1257
1258static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1259 u64 start, u64 len)
1260{
1261 struct gfs2_inode *ip = GFS2_I(inode);
1262 struct gfs2_holder gh;
1263 int ret;
1264
1265 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
1266 if (ret)
1267 return ret;
1268
1269 mutex_lock(&inode->i_mutex);
1270
1271 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
1272 if (ret)
1273 goto out;
1274
1275 if (gfs2_is_stuffed(ip)) {
1276 u64 phys = ip->i_no_addr << inode->i_blkbits;
1277 u64 size = i_size_read(inode);
1278 u32 flags = FIEMAP_EXTENT_LAST|FIEMAP_EXTENT_NOT_ALIGNED|
1279 FIEMAP_EXTENT_DATA_INLINE;
1280 phys += sizeof(struct gfs2_dinode);
1281 phys += start;
1282 if (start + len > size)
1283 len = size - start;
1284 if (start < size)
1285 ret = fiemap_fill_next_extent(fieinfo, start, phys,
1286 len, flags);
1287 if (ret == 1)
1288 ret = 0;
1289 } else {
1290 ret = __generic_block_fiemap(inode, fieinfo, start, len,
1291 gfs2_block_map);
1292 }
1293
1294 gfs2_glock_dq_uninit(&gh);
1295out:
1296 mutex_unlock(&inode->i_mutex);
1297 return ret;
1298}
1299
1300const struct inode_operations gfs2_file_iops = {
1301 .permission = gfs2_permission,
1302 .setattr = gfs2_setattr,
1303 .getattr = gfs2_getattr,
1304 .setxattr = gfs2_setxattr,
1305 .getxattr = gfs2_getxattr,
1306 .listxattr = gfs2_listxattr,
1307 .removexattr = gfs2_removexattr,
1308 .fiemap = gfs2_fiemap,
1309};
1310
1311const struct inode_operations gfs2_dir_iops = {
1312 .create = gfs2_create,
1313 .lookup = gfs2_lookup,
1314 .link = gfs2_link,
1315 .unlink = gfs2_unlink,
1316 .symlink = gfs2_symlink,
1317 .mkdir = gfs2_mkdir,
1318 .rmdir = gfs2_rmdir,
1319 .mknod = gfs2_mknod,
1320 .rename = gfs2_rename,
1321 .permission = gfs2_permission,
1322 .setattr = gfs2_setattr,
1323 .getattr = gfs2_getattr,
1324 .setxattr = gfs2_setxattr,
1325 .getxattr = gfs2_getxattr,
1326 .listxattr = gfs2_listxattr,
1327 .removexattr = gfs2_removexattr,
1328 .fiemap = gfs2_fiemap,
1329};
1330
1331const struct inode_operations gfs2_symlink_iops = {
1332 .readlink = generic_readlink,
1333 .follow_link = gfs2_follow_link,
1334 .put_link = gfs2_put_link,
1335 .permission = gfs2_permission,
1336 .setattr = gfs2_setattr,
1337 .getattr = gfs2_getattr,
1338 .setxattr = gfs2_setxattr,
1339 .getxattr = gfs2_getxattr,
1340 .listxattr = gfs2_listxattr,
1341 .removexattr = gfs2_removexattr,
1342 .fiemap = gfs2_fiemap,
1343};
1344
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 6fcae8469f6d..9b780df3fd54 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -78,10 +78,11 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
78 78
79static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, 79static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
80 unsigned char *buf2, unsigned int offset, 80 unsigned char *buf2, unsigned int offset,
81 unsigned int buflen, u32 block, 81 struct gfs2_bitmap *bi, u32 block,
82 unsigned char new_state) 82 unsigned char new_state)
83{ 83{
84 unsigned char *byte1, *byte2, *end, cur_state; 84 unsigned char *byte1, *byte2, *end, cur_state;
85 unsigned int buflen = bi->bi_len;
85 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; 86 const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE;
86 87
87 byte1 = buf1 + offset + (block / GFS2_NBBY); 88 byte1 = buf1 + offset + (block / GFS2_NBBY);
@@ -92,6 +93,16 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,
92 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; 93 cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;
93 94
94 if (unlikely(!valid_change[new_state * 4 + cur_state])) { 95 if (unlikely(!valid_change[new_state * 4 + cur_state])) {
96 printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, "
97 "new_state=%d\n",
98 (unsigned long long)block, cur_state, new_state);
99 printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n",
100 (unsigned long long)rgd->rd_addr,
101 (unsigned long)bi->bi_start);
102 printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n",
103 (unsigned long)bi->bi_offset,
104 (unsigned long)bi->bi_len);
105 dump_stack();
95 gfs2_consist_rgrpd(rgd); 106 gfs2_consist_rgrpd(rgd);
96 return; 107 return;
97 } 108 }
@@ -381,6 +392,7 @@ static void clear_rgrpdi(struct gfs2_sbd *sdp)
381 392
382 if (gl) { 393 if (gl) {
383 gl->gl_object = NULL; 394 gl->gl_object = NULL;
395 gfs2_glock_add_to_lru(gl);
384 gfs2_glock_put(gl); 396 gfs2_glock_put(gl);
385 } 397 }
386 398
@@ -1365,7 +1377,7 @@ skip:
1365 1377
1366 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1378 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1367 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1379 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1368 bi->bi_len, blk, new_state); 1380 bi, blk, new_state);
1369 goal = blk; 1381 goal = blk;
1370 while (*n < elen) { 1382 while (*n < elen) {
1371 goal++; 1383 goal++;
@@ -1375,7 +1387,7 @@ skip:
1375 GFS2_BLKST_FREE) 1387 GFS2_BLKST_FREE)
1376 break; 1388 break;
1377 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1389 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1378 bi->bi_len, goal, new_state); 1390 bi, goal, new_state);
1379 (*n)++; 1391 (*n)++;
1380 } 1392 }
1381out: 1393out:
@@ -1432,7 +1444,7 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1432 } 1444 }
1433 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1445 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1434 gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset, 1446 gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset,
1435 bi->bi_len, buf_blk, new_state); 1447 bi, buf_blk, new_state);
1436 } 1448 }
1437 1449
1438 return rgd; 1450 return rgd;
@@ -1617,6 +1629,10 @@ void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1617 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1629 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1618 1630
1619 gfs2_trans_add_rg(rgd); 1631 gfs2_trans_add_rg(rgd);
1632
1633 /* Directories keep their data in the metadata address space */
1634 if (ip->i_depth)
1635 gfs2_meta_wipe(ip, bstart, blen);
1620} 1636}
1621 1637
1622/** 1638/**
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b9f28e66dad1..ed540e7018be 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -23,6 +23,7 @@
23#include <linux/time.h> 23#include <linux/time.h>
24#include <linux/wait.h> 24#include <linux/wait.h>
25#include <linux/writeback.h> 25#include <linux/writeback.h>
26#include <linux/backing-dev.h>
26 27
27#include "gfs2.h" 28#include "gfs2.h"
28#include "incore.h" 29#include "incore.h"
@@ -700,11 +701,47 @@ void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
700 mutex_unlock(&sdp->sd_freeze_lock); 701 mutex_unlock(&sdp->sd_freeze_lock);
701} 702}
702 703
704void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
705{
706 struct gfs2_dinode *str = buf;
707
708 str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
709 str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
710 str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
711 str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
712 str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
713 str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
714 str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
715 str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
716 str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
717 str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
718 str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
719 str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
720 str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
721 str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
722
723 str->di_goal_meta = cpu_to_be64(ip->i_goal);
724 str->di_goal_data = cpu_to_be64(ip->i_goal);
725 str->di_generation = cpu_to_be64(ip->i_generation);
726
727 str->di_flags = cpu_to_be32(ip->i_diskflags);
728 str->di_height = cpu_to_be16(ip->i_height);
729 str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
730 !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
731 GFS2_FORMAT_DE : 0);
732 str->di_depth = cpu_to_be16(ip->i_depth);
733 str->di_entries = cpu_to_be32(ip->i_entries);
734
735 str->di_eattr = cpu_to_be64(ip->i_eattr);
736 str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
737 str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
738 str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
739}
703 740
704/** 741/**
705 * gfs2_write_inode - Make sure the inode is stable on the disk 742 * gfs2_write_inode - Make sure the inode is stable on the disk
706 * @inode: The inode 743 * @inode: The inode
707 * @sync: synchronous write flag 744 * @wbc: The writeback control structure
708 * 745 *
709 * Returns: errno 746 * Returns: errno
710 */ 747 */
@@ -713,15 +750,17 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
713{ 750{
714 struct gfs2_inode *ip = GFS2_I(inode); 751 struct gfs2_inode *ip = GFS2_I(inode);
715 struct gfs2_sbd *sdp = GFS2_SB(inode); 752 struct gfs2_sbd *sdp = GFS2_SB(inode);
753 struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
754 struct backing_dev_info *bdi = metamapping->backing_dev_info;
716 struct gfs2_holder gh; 755 struct gfs2_holder gh;
717 struct buffer_head *bh; 756 struct buffer_head *bh;
718 struct timespec atime; 757 struct timespec atime;
719 struct gfs2_dinode *di; 758 struct gfs2_dinode *di;
720 int ret = 0; 759 int ret = -EAGAIN;
721 760
722 /* Check this is a "normal" inode, etc */ 761 /* Skip timestamp update, if this is from a memalloc */
723 if (current->flags & PF_MEMALLOC) 762 if (current->flags & PF_MEMALLOC)
724 return 0; 763 goto do_flush;
725 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 764 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
726 if (ret) 765 if (ret)
727 goto do_flush; 766 goto do_flush;
@@ -745,6 +784,13 @@ do_unlock:
745do_flush: 784do_flush:
746 if (wbc->sync_mode == WB_SYNC_ALL) 785 if (wbc->sync_mode == WB_SYNC_ALL)
747 gfs2_log_flush(GFS2_SB(inode), ip->i_gl); 786 gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
787 filemap_fdatawrite(metamapping);
788 if (bdi->dirty_exceeded)
789 gfs2_ail1_flush(sdp, wbc);
790 if (!ret && (wbc->sync_mode == WB_SYNC_ALL))
791 ret = filemap_fdatawait(metamapping);
792 if (ret)
793 mark_inode_dirty_sync(inode);
748 return ret; 794 return ret;
749} 795}
750 796
@@ -874,8 +920,9 @@ restart:
874 920
875static int gfs2_sync_fs(struct super_block *sb, int wait) 921static int gfs2_sync_fs(struct super_block *sb, int wait)
876{ 922{
877 if (wait && sb->s_fs_info) 923 struct gfs2_sbd *sdp = sb->s_fs_info;
878 gfs2_log_flush(sb->s_fs_info, NULL); 924 if (wait && sdp)
925 gfs2_log_flush(sdp, NULL);
879 return 0; 926 return 0;
880} 927}
881 928
@@ -1308,6 +1355,78 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1308 return 0; 1355 return 0;
1309} 1356}
1310 1357
1358static void gfs2_final_release_pages(struct gfs2_inode *ip)
1359{
1360 struct inode *inode = &ip->i_inode;
1361 struct gfs2_glock *gl = ip->i_gl;
1362
1363 truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0);
1364 truncate_inode_pages(&inode->i_data, 0);
1365
1366 if (atomic_read(&gl->gl_revokes) == 0) {
1367 clear_bit(GLF_LFLUSH, &gl->gl_flags);
1368 clear_bit(GLF_DIRTY, &gl->gl_flags);
1369 }
1370}
1371
1372static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1373{
1374 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1375 struct gfs2_alloc *al;
1376 struct gfs2_rgrpd *rgd;
1377 int error;
1378
1379 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
1380 gfs2_consist_inode(ip);
1381 return -EIO;
1382 }
1383
1384 al = gfs2_alloc_get(ip);
1385 if (!al)
1386 return -ENOMEM;
1387
1388 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1389 if (error)
1390 goto out;
1391
1392 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
1393 if (error)
1394 goto out_qs;
1395
1396 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
1397 if (!rgd) {
1398 gfs2_consist_inode(ip);
1399 error = -EIO;
1400 goto out_rindex_relse;
1401 }
1402
1403 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
1404 &al->al_rgd_gh);
1405 if (error)
1406 goto out_rindex_relse;
1407
1408 error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
1409 sdp->sd_jdesc->jd_blocks);
1410 if (error)
1411 goto out_rg_gunlock;
1412
1413 gfs2_free_di(rgd, ip);
1414
1415 gfs2_final_release_pages(ip);
1416
1417 gfs2_trans_end(sdp);
1418
1419out_rg_gunlock:
1420 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1421out_rindex_relse:
1422 gfs2_glock_dq_uninit(&al->al_ri_gh);
1423out_qs:
1424 gfs2_quota_unhold(ip);
1425out:
1426 gfs2_alloc_put(ip);
1427 return error;
1428}
1429
1311/* 1430/*
1312 * We have to (at the moment) hold the inodes main lock to cover 1431 * We have to (at the moment) hold the inodes main lock to cover
1313 * the gap between unlocking the shared lock on the iopen lock and 1432 * the gap between unlocking the shared lock on the iopen lock and
@@ -1371,15 +1490,13 @@ static void gfs2_evict_inode(struct inode *inode)
1371 } 1490 }
1372 1491
1373 error = gfs2_dinode_dealloc(ip); 1492 error = gfs2_dinode_dealloc(ip);
1374 if (error) 1493 goto out_unlock;
1375 goto out_unlock;
1376 1494
1377out_truncate: 1495out_truncate:
1378 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); 1496 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
1379 if (error) 1497 if (error)
1380 goto out_unlock; 1498 goto out_unlock;
1381 /* Needs to be done before glock release & also in a transaction */ 1499 gfs2_final_release_pages(ip);
1382 truncate_inode_pages(&inode->i_data, 0);
1383 gfs2_trans_end(sdp); 1500 gfs2_trans_end(sdp);
1384 1501
1385out_unlock: 1502out_unlock:
@@ -1394,6 +1511,7 @@ out:
1394 end_writeback(inode); 1511 end_writeback(inode);
1395 1512
1396 ip->i_gl->gl_object = NULL; 1513 ip->i_gl->gl_object = NULL;
1514 gfs2_glock_add_to_lru(ip->i_gl);
1397 gfs2_glock_put(ip->i_gl); 1515 gfs2_glock_put(ip->i_gl);
1398 ip->i_gl = NULL; 1516 ip->i_gl = NULL;
1399 if (ip->i_iopen_gh.gh_gl) { 1517 if (ip->i_iopen_gh.gh_gl) {
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 748ccb557c18..e20eab37bc80 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -81,7 +81,8 @@ static int gfs2_uuid_valid(const u8 *uuid)
81 81
82static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf) 82static ssize_t uuid_show(struct gfs2_sbd *sdp, char *buf)
83{ 83{
84 const u8 *uuid = sdp->sd_sb.sb_uuid; 84 struct super_block *s = sdp->sd_vfs;
85 const u8 *uuid = s->s_uuid;
85 buf[0] = '\0'; 86 buf[0] = '\0';
86 if (!gfs2_uuid_valid(uuid)) 87 if (!gfs2_uuid_valid(uuid))
87 return 0; 88 return 0;
@@ -616,7 +617,8 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
616 struct kobj_uevent_env *env) 617 struct kobj_uevent_env *env)
617{ 618{
618 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); 619 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
619 const u8 *uuid = sdp->sd_sb.sb_uuid; 620 struct super_block *s = sdp->sd_vfs;
621 const u8 *uuid = s->s_uuid;
620 622
621 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); 623 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
622 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); 624 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index cedb0bb96d96..5d07609ec57d 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -10,6 +10,7 @@
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/dlmconstants.h> 11#include <linux/dlmconstants.h>
12#include <linux/gfs2_ondisk.h> 12#include <linux/gfs2_ondisk.h>
13#include <linux/writeback.h>
13#include "incore.h" 14#include "incore.h"
14#include "glock.h" 15#include "glock.h"
15 16
@@ -40,7 +41,9 @@
40 {(1UL << GLF_REPLY_PENDING), "r" }, \ 41 {(1UL << GLF_REPLY_PENDING), "r" }, \
41 {(1UL << GLF_INITIAL), "I" }, \ 42 {(1UL << GLF_INITIAL), "I" }, \
42 {(1UL << GLF_FROZEN), "F" }, \ 43 {(1UL << GLF_FROZEN), "F" }, \
43 {(1UL << GLF_QUEUED), "q" }) 44 {(1UL << GLF_QUEUED), "q" }, \
45 {(1UL << GLF_LRU), "L" }, \
46 {(1UL << GLF_OBJECT), "o" })
44 47
45#ifndef NUMPTY 48#ifndef NUMPTY
46#define NUMPTY 49#define NUMPTY
@@ -94,7 +97,7 @@ TRACE_EVENT(gfs2_glock_state_change,
94 __entry->new_state = glock_trace_state(new_state); 97 __entry->new_state = glock_trace_state(new_state);
95 __entry->tgt_state = glock_trace_state(gl->gl_target); 98 __entry->tgt_state = glock_trace_state(gl->gl_target);
96 __entry->dmt_state = glock_trace_state(gl->gl_demote_state); 99 __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
97 __entry->flags = gl->gl_flags; 100 __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
98 ), 101 ),
99 102
100 TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s", 103 TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s",
@@ -127,7 +130,7 @@ TRACE_EVENT(gfs2_glock_put,
127 __entry->gltype = gl->gl_name.ln_type; 130 __entry->gltype = gl->gl_name.ln_type;
128 __entry->glnum = gl->gl_name.ln_number; 131 __entry->glnum = gl->gl_name.ln_number;
129 __entry->cur_state = glock_trace_state(gl->gl_state); 132 __entry->cur_state = glock_trace_state(gl->gl_state);
130 __entry->flags = gl->gl_flags; 133 __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
131 ), 134 ),
132 135
133 TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s", 136 TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s",
@@ -161,7 +164,7 @@ TRACE_EVENT(gfs2_demote_rq,
161 __entry->glnum = gl->gl_name.ln_number; 164 __entry->glnum = gl->gl_name.ln_number;
162 __entry->cur_state = glock_trace_state(gl->gl_state); 165 __entry->cur_state = glock_trace_state(gl->gl_state);
163 __entry->dmt_state = glock_trace_state(gl->gl_demote_state); 166 __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
164 __entry->flags = gl->gl_flags; 167 __entry->flags = gl->gl_flags | (gl->gl_object ? (1UL<<GLF_OBJECT) : 0);
165 ), 168 ),
166 169
167 TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s", 170 TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s",
@@ -318,6 +321,33 @@ TRACE_EVENT(gfs2_log_blocks,
318 MINOR(__entry->dev), __entry->blocks) 321 MINOR(__entry->dev), __entry->blocks)
319); 322);
320 323
324/* Writing back the AIL */
325TRACE_EVENT(gfs2_ail_flush,
326
327 TP_PROTO(const struct gfs2_sbd *sdp, const struct writeback_control *wbc, int start),
328
329 TP_ARGS(sdp, wbc, start),
330
331 TP_STRUCT__entry(
332 __field( dev_t, dev )
333 __field( int, start )
334 __field( int, sync_mode )
335 __field( long, nr_to_write )
336 ),
337
338 TP_fast_assign(
339 __entry->dev = sdp->sd_vfs->s_dev;
340 __entry->start = start;
341 __entry->sync_mode = wbc->sync_mode;
342 __entry->nr_to_write = wbc->nr_to_write;
343 ),
344
345 TP_printk("%u,%u ail flush %s %s %ld", MAJOR(__entry->dev),
346 MINOR(__entry->dev), __entry->start ? "start" : "end",
347 __entry->sync_mode == WB_SYNC_ALL ? "all" : "none",
348 __entry->nr_to_write)
349);
350
321/* Section 3 - bmap 351/* Section 3 - bmap
322 * 352 *
323 * Objectives: 353 * Objectives:
diff --git a/fs/inode.c b/fs/inode.c
index 33c963d08ab4..05f4fa521325 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -24,6 +24,7 @@
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <linux/async.h> 25#include <linux/async.h>
26#include <linux/posix_acl.h> 26#include <linux/posix_acl.h>
27#include <linux/prefetch.h>
27#include <linux/ima.h> 28#include <linux/ima.h>
28#include <linux/cred.h> 29#include <linux/cred.h>
29#include "internal.h" 30#include "internal.h"
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 1adc8d455f0e..df0de27c2733 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -10,6 +10,7 @@
10#include <linux/blkdev.h> 10#include <linux/blkdev.h>
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/gfp.h> 12#include <linux/gfp.h>
13#include <linux/prefetch.h>
13 14
14#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) 15#define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
15 16
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 9e22085231b3..d8d09380c7de 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -481,7 +481,7 @@ static int inode_write_alias(struct super_block *sb,
481 val = inode_val0(inode); 481 val = inode_val0(inode);
482 break; 482 break;
483 case INODE_USED_OFS: 483 case INODE_USED_OFS:
484 val = cpu_to_be64(li->li_used_bytes);; 484 val = cpu_to_be64(li->li_used_bytes);
485 break; 485 break;
486 case INODE_SIZE_OFS: 486 case INODE_SIZE_OFS:
487 val = cpu_to_be64(i_size_read(inode)); 487 val = cpu_to_be64(i_size_read(inode));
diff --git a/fs/namei.c b/fs/namei.c
index e3c4f112ebf7..6ff858c049c0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1378,12 +1378,12 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
1378{ 1378{
1379 int res; 1379 int res;
1380 1380
1381 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1382 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) { 1381 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
1383 path_put_conditional(path, nd); 1382 path_put_conditional(path, nd);
1384 path_put(&nd->path); 1383 path_put(&nd->path);
1385 return -ELOOP; 1384 return -ELOOP;
1386 } 1385 }
1386 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1387 1387
1388 nd->depth++; 1388 nd->depth++;
1389 current->link_count++; 1389 current->link_count++;
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 5232d3e8fb2f..a2e2402b2afb 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -8,7 +8,7 @@
8 * Statistsics for the reply cache 8 * Statistsics for the reply cache
9 * fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache> 9 * fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache>
10 * statistics for filehandle lookup 10 * statistics for filehandle lookup
11 * io <bytes-read> <bytes-writtten> 11 * io <bytes-read> <bytes-written>
12 * statistics for IO throughput 12 * statistics for IO throughput
13 * th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%> 13 * th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%>
14 * time (seconds) when nfsd thread usage above thresholds 14 * time (seconds) when nfsd thread usage above thresholds
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index f7684483785e..eed4d7b26249 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -489,8 +489,8 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode,
489void nilfs_palloc_commit_alloc_entry(struct inode *inode, 489void nilfs_palloc_commit_alloc_entry(struct inode *inode,
490 struct nilfs_palloc_req *req) 490 struct nilfs_palloc_req *req)
491{ 491{
492 nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); 492 mark_buffer_dirty(req->pr_bitmap_bh);
493 nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); 493 mark_buffer_dirty(req->pr_desc_bh);
494 nilfs_mdt_mark_dirty(inode); 494 nilfs_mdt_mark_dirty(inode);
495 495
496 brelse(req->pr_bitmap_bh); 496 brelse(req->pr_bitmap_bh);
@@ -527,8 +527,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
527 kunmap(req->pr_bitmap_bh->b_page); 527 kunmap(req->pr_bitmap_bh->b_page);
528 kunmap(req->pr_desc_bh->b_page); 528 kunmap(req->pr_desc_bh->b_page);
529 529
530 nilfs_mdt_mark_buffer_dirty(req->pr_desc_bh); 530 mark_buffer_dirty(req->pr_desc_bh);
531 nilfs_mdt_mark_buffer_dirty(req->pr_bitmap_bh); 531 mark_buffer_dirty(req->pr_bitmap_bh);
532 nilfs_mdt_mark_dirty(inode); 532 nilfs_mdt_mark_dirty(inode);
533 533
534 brelse(req->pr_bitmap_bh); 534 brelse(req->pr_bitmap_bh);
@@ -683,8 +683,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
683 kunmap(bitmap_bh->b_page); 683 kunmap(bitmap_bh->b_page);
684 kunmap(desc_bh->b_page); 684 kunmap(desc_bh->b_page);
685 685
686 nilfs_mdt_mark_buffer_dirty(desc_bh); 686 mark_buffer_dirty(desc_bh);
687 nilfs_mdt_mark_buffer_dirty(bitmap_bh); 687 mark_buffer_dirty(bitmap_bh);
688 nilfs_mdt_mark_dirty(inode); 688 nilfs_mdt_mark_dirty(inode);
689 689
690 brelse(bitmap_bh); 690 brelse(bitmap_bh);
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 4723f04e9b12..aadbd0b5e3e8 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -34,7 +34,9 @@
34 34
35struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap) 35struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
36{ 36{
37 return NILFS_I_NILFS(bmap->b_inode)->ns_dat; 37 struct the_nilfs *nilfs = bmap->b_inode->i_sb->s_fs_info;
38
39 return nilfs->ns_dat;
38} 40}
39 41
40static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap, 42static int nilfs_bmap_convert_error(struct nilfs_bmap *bmap,
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 609cd223eea8..a35ae35e6932 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -34,12 +34,6 @@
34#include "page.h" 34#include "page.h"
35#include "btnode.h" 35#include "btnode.h"
36 36
37void nilfs_btnode_cache_init(struct address_space *btnc,
38 struct backing_dev_info *bdi)
39{
40 nilfs_mapping_init(btnc, bdi);
41}
42
43void nilfs_btnode_cache_clear(struct address_space *btnc) 37void nilfs_btnode_cache_clear(struct address_space *btnc)
44{ 38{
45 invalidate_mapping_pages(btnc, 0, -1); 39 invalidate_mapping_pages(btnc, 0, -1);
@@ -62,7 +56,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
62 BUG(); 56 BUG();
63 } 57 }
64 memset(bh->b_data, 0, 1 << inode->i_blkbits); 58 memset(bh->b_data, 0, 1 << inode->i_blkbits);
65 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 59 bh->b_bdev = inode->i_sb->s_bdev;
66 bh->b_blocknr = blocknr; 60 bh->b_blocknr = blocknr;
67 set_buffer_mapped(bh); 61 set_buffer_mapped(bh);
68 set_buffer_uptodate(bh); 62 set_buffer_uptodate(bh);
@@ -94,10 +88,11 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
94 if (pblocknr == 0) { 88 if (pblocknr == 0) {
95 pblocknr = blocknr; 89 pblocknr = blocknr;
96 if (inode->i_ino != NILFS_DAT_INO) { 90 if (inode->i_ino != NILFS_DAT_INO) {
97 struct inode *dat = NILFS_I_NILFS(inode)->ns_dat; 91 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
98 92
99 /* blocknr is a virtual block number */ 93 /* blocknr is a virtual block number */
100 err = nilfs_dat_translate(dat, blocknr, &pblocknr); 94 err = nilfs_dat_translate(nilfs->ns_dat, blocknr,
95 &pblocknr);
101 if (unlikely(err)) { 96 if (unlikely(err)) {
102 brelse(bh); 97 brelse(bh);
103 goto out_locked; 98 goto out_locked;
@@ -120,7 +115,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
120 goto found; 115 goto found;
121 } 116 }
122 set_buffer_mapped(bh); 117 set_buffer_mapped(bh);
123 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 118 bh->b_bdev = inode->i_sb->s_bdev;
124 bh->b_blocknr = pblocknr; /* set block address for read */ 119 bh->b_blocknr = pblocknr; /* set block address for read */
125 bh->b_end_io = end_buffer_read_sync; 120 bh->b_end_io = end_buffer_read_sync;
126 get_bh(bh); 121 get_bh(bh);
@@ -259,7 +254,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
259 "invalid oldkey %lld (newkey=%lld)", 254 "invalid oldkey %lld (newkey=%lld)",
260 (unsigned long long)oldkey, 255 (unsigned long long)oldkey,
261 (unsigned long long)newkey); 256 (unsigned long long)newkey);
262 nilfs_btnode_mark_dirty(obh); 257 mark_buffer_dirty(obh);
263 258
264 spin_lock_irq(&btnc->tree_lock); 259 spin_lock_irq(&btnc->tree_lock);
265 radix_tree_delete(&btnc->page_tree, oldkey); 260 radix_tree_delete(&btnc->page_tree, oldkey);
@@ -271,7 +266,7 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc,
271 unlock_page(opage); 266 unlock_page(opage);
272 } else { 267 } else {
273 nilfs_copy_buffer(nbh, obh); 268 nilfs_copy_buffer(nbh, obh);
274 nilfs_btnode_mark_dirty(nbh); 269 mark_buffer_dirty(nbh);
275 270
276 nbh->b_blocknr = newkey; 271 nbh->b_blocknr = newkey;
277 ctxt->bh = nbh; 272 ctxt->bh = nbh;
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 1b8ebd888c28..3a4dd2d8d3fc 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -37,7 +37,6 @@ struct nilfs_btnode_chkey_ctxt {
37 struct buffer_head *newbh; 37 struct buffer_head *newbh;
38}; 38};
39 39
40void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
41void nilfs_btnode_cache_clear(struct address_space *); 40void nilfs_btnode_cache_clear(struct address_space *);
42struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, 41struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc,
43 __u64 blocknr); 42 __u64 blocknr);
@@ -51,7 +50,4 @@ void nilfs_btnode_commit_change_key(struct address_space *,
51void nilfs_btnode_abort_change_key(struct address_space *, 50void nilfs_btnode_abort_change_key(struct address_space *,
52 struct nilfs_btnode_chkey_ctxt *); 51 struct nilfs_btnode_chkey_ctxt *);
53 52
54#define nilfs_btnode_mark_dirty(bh) nilfs_mark_buffer_dirty(bh)
55
56
57#endif /* _NILFS_BTNODE_H */ 53#endif /* _NILFS_BTNODE_H */
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index d451ae0e0bf3..7eafe468a29c 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -714,7 +714,7 @@ static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
714 nilfs_btree_get_nonroot_node(path, level), 714 nilfs_btree_get_nonroot_node(path, level),
715 path[level].bp_index, key); 715 path[level].bp_index, key);
716 if (!buffer_dirty(path[level].bp_bh)) 716 if (!buffer_dirty(path[level].bp_bh))
717 nilfs_btnode_mark_dirty(path[level].bp_bh); 717 mark_buffer_dirty(path[level].bp_bh);
718 } while ((path[level].bp_index == 0) && 718 } while ((path[level].bp_index == 0) &&
719 (++level < nilfs_btree_height(btree) - 1)); 719 (++level < nilfs_btree_height(btree) - 1));
720 } 720 }
@@ -739,7 +739,7 @@ static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
739 nilfs_btree_node_insert(node, path[level].bp_index, 739 nilfs_btree_node_insert(node, path[level].bp_index,
740 *keyp, *ptrp, ncblk); 740 *keyp, *ptrp, ncblk);
741 if (!buffer_dirty(path[level].bp_bh)) 741 if (!buffer_dirty(path[level].bp_bh))
742 nilfs_btnode_mark_dirty(path[level].bp_bh); 742 mark_buffer_dirty(path[level].bp_bh);
743 743
744 if (path[level].bp_index == 0) 744 if (path[level].bp_index == 0)
745 nilfs_btree_promote_key(btree, path, level + 1, 745 nilfs_btree_promote_key(btree, path, level + 1,
@@ -777,9 +777,9 @@ static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
777 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 777 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
778 778
779 if (!buffer_dirty(path[level].bp_bh)) 779 if (!buffer_dirty(path[level].bp_bh))
780 nilfs_btnode_mark_dirty(path[level].bp_bh); 780 mark_buffer_dirty(path[level].bp_bh);
781 if (!buffer_dirty(path[level].bp_sib_bh)) 781 if (!buffer_dirty(path[level].bp_sib_bh))
782 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 782 mark_buffer_dirty(path[level].bp_sib_bh);
783 783
784 nilfs_btree_promote_key(btree, path, level + 1, 784 nilfs_btree_promote_key(btree, path, level + 1,
785 nilfs_btree_node_get_key(node, 0)); 785 nilfs_btree_node_get_key(node, 0));
@@ -823,9 +823,9 @@ static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
823 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 823 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
824 824
825 if (!buffer_dirty(path[level].bp_bh)) 825 if (!buffer_dirty(path[level].bp_bh))
826 nilfs_btnode_mark_dirty(path[level].bp_bh); 826 mark_buffer_dirty(path[level].bp_bh);
827 if (!buffer_dirty(path[level].bp_sib_bh)) 827 if (!buffer_dirty(path[level].bp_sib_bh))
828 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 828 mark_buffer_dirty(path[level].bp_sib_bh);
829 829
830 path[level + 1].bp_index++; 830 path[level + 1].bp_index++;
831 nilfs_btree_promote_key(btree, path, level + 1, 831 nilfs_btree_promote_key(btree, path, level + 1,
@@ -870,9 +870,9 @@ static void nilfs_btree_split(struct nilfs_bmap *btree,
870 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 870 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
871 871
872 if (!buffer_dirty(path[level].bp_bh)) 872 if (!buffer_dirty(path[level].bp_bh))
873 nilfs_btnode_mark_dirty(path[level].bp_bh); 873 mark_buffer_dirty(path[level].bp_bh);
874 if (!buffer_dirty(path[level].bp_sib_bh)) 874 if (!buffer_dirty(path[level].bp_sib_bh))
875 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 875 mark_buffer_dirty(path[level].bp_sib_bh);
876 876
877 newkey = nilfs_btree_node_get_key(right, 0); 877 newkey = nilfs_btree_node_get_key(right, 0);
878 newptr = path[level].bp_newreq.bpr_ptr; 878 newptr = path[level].bp_newreq.bpr_ptr;
@@ -919,7 +919,7 @@ static void nilfs_btree_grow(struct nilfs_bmap *btree,
919 nilfs_btree_node_set_level(root, level + 1); 919 nilfs_btree_node_set_level(root, level + 1);
920 920
921 if (!buffer_dirty(path[level].bp_sib_bh)) 921 if (!buffer_dirty(path[level].bp_sib_bh))
922 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 922 mark_buffer_dirty(path[level].bp_sib_bh);
923 923
924 path[level].bp_bh = path[level].bp_sib_bh; 924 path[level].bp_bh = path[level].bp_sib_bh;
925 path[level].bp_sib_bh = NULL; 925 path[level].bp_sib_bh = NULL;
@@ -1194,7 +1194,7 @@ static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1194 nilfs_btree_node_delete(node, path[level].bp_index, 1194 nilfs_btree_node_delete(node, path[level].bp_index,
1195 keyp, ptrp, ncblk); 1195 keyp, ptrp, ncblk);
1196 if (!buffer_dirty(path[level].bp_bh)) 1196 if (!buffer_dirty(path[level].bp_bh))
1197 nilfs_btnode_mark_dirty(path[level].bp_bh); 1197 mark_buffer_dirty(path[level].bp_bh);
1198 if (path[level].bp_index == 0) 1198 if (path[level].bp_index == 0)
1199 nilfs_btree_promote_key(btree, path, level + 1, 1199 nilfs_btree_promote_key(btree, path, level + 1,
1200 nilfs_btree_node_get_key(node, 0)); 1200 nilfs_btree_node_get_key(node, 0));
@@ -1226,9 +1226,9 @@ static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1226 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); 1226 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1227 1227
1228 if (!buffer_dirty(path[level].bp_bh)) 1228 if (!buffer_dirty(path[level].bp_bh))
1229 nilfs_btnode_mark_dirty(path[level].bp_bh); 1229 mark_buffer_dirty(path[level].bp_bh);
1230 if (!buffer_dirty(path[level].bp_sib_bh)) 1230 if (!buffer_dirty(path[level].bp_sib_bh))
1231 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1231 mark_buffer_dirty(path[level].bp_sib_bh);
1232 1232
1233 nilfs_btree_promote_key(btree, path, level + 1, 1233 nilfs_btree_promote_key(btree, path, level + 1,
1234 nilfs_btree_node_get_key(node, 0)); 1234 nilfs_btree_node_get_key(node, 0));
@@ -1258,9 +1258,9 @@ static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1258 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1258 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1259 1259
1260 if (!buffer_dirty(path[level].bp_bh)) 1260 if (!buffer_dirty(path[level].bp_bh))
1261 nilfs_btnode_mark_dirty(path[level].bp_bh); 1261 mark_buffer_dirty(path[level].bp_bh);
1262 if (!buffer_dirty(path[level].bp_sib_bh)) 1262 if (!buffer_dirty(path[level].bp_sib_bh))
1263 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1263 mark_buffer_dirty(path[level].bp_sib_bh);
1264 1264
1265 path[level + 1].bp_index++; 1265 path[level + 1].bp_index++;
1266 nilfs_btree_promote_key(btree, path, level + 1, 1266 nilfs_btree_promote_key(btree, path, level + 1,
@@ -1289,7 +1289,7 @@ static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1289 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 1289 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1290 1290
1291 if (!buffer_dirty(path[level].bp_sib_bh)) 1291 if (!buffer_dirty(path[level].bp_sib_bh))
1292 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 1292 mark_buffer_dirty(path[level].bp_sib_bh);
1293 1293
1294 nilfs_btnode_delete(path[level].bp_bh); 1294 nilfs_btnode_delete(path[level].bp_bh);
1295 path[level].bp_bh = path[level].bp_sib_bh; 1295 path[level].bp_bh = path[level].bp_sib_bh;
@@ -1315,7 +1315,7 @@ static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1315 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1315 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1316 1316
1317 if (!buffer_dirty(path[level].bp_bh)) 1317 if (!buffer_dirty(path[level].bp_bh))
1318 nilfs_btnode_mark_dirty(path[level].bp_bh); 1318 mark_buffer_dirty(path[level].bp_bh);
1319 1319
1320 nilfs_btnode_delete(path[level].bp_sib_bh); 1320 nilfs_btnode_delete(path[level].bp_sib_bh);
1321 path[level].bp_sib_bh = NULL; 1321 path[level].bp_sib_bh = NULL;
@@ -1709,7 +1709,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1709 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); 1709 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1710 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); 1710 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1711 if (!buffer_dirty(bh)) 1711 if (!buffer_dirty(bh))
1712 nilfs_btnode_mark_dirty(bh); 1712 mark_buffer_dirty(bh);
1713 if (!nilfs_bmap_dirty(btree)) 1713 if (!nilfs_bmap_dirty(btree))
1714 nilfs_bmap_set_dirty(btree); 1714 nilfs_bmap_set_dirty(btree);
1715 1715
@@ -1787,7 +1787,7 @@ static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1787{ 1787{
1788 while ((++level < nilfs_btree_height(btree) - 1) && 1788 while ((++level < nilfs_btree_height(btree) - 1) &&
1789 !buffer_dirty(path[level].bp_bh)) 1789 !buffer_dirty(path[level].bp_bh))
1790 nilfs_btnode_mark_dirty(path[level].bp_bh); 1790 mark_buffer_dirty(path[level].bp_bh);
1791 1791
1792 return 0; 1792 return 0;
1793} 1793}
@@ -2229,7 +2229,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2229 } 2229 }
2230 2230
2231 if (!buffer_dirty(bh)) 2231 if (!buffer_dirty(bh))
2232 nilfs_btnode_mark_dirty(bh); 2232 mark_buffer_dirty(bh);
2233 brelse(bh); 2233 brelse(bh);
2234 if (!nilfs_bmap_dirty(btree)) 2234 if (!nilfs_bmap_dirty(btree))
2235 nilfs_bmap_set_dirty(btree); 2235 nilfs_bmap_set_dirty(btree);
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 5ff15a8a1024..c9b342c8b503 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -216,14 +216,14 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile,
216 if (!nilfs_cpfile_is_in_first(cpfile, cno)) 216 if (!nilfs_cpfile_is_in_first(cpfile, cno))
217 nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh, 217 nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh,
218 kaddr, 1); 218 kaddr, 1);
219 nilfs_mdt_mark_buffer_dirty(cp_bh); 219 mark_buffer_dirty(cp_bh);
220 220
221 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 221 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
222 header = nilfs_cpfile_block_get_header(cpfile, header_bh, 222 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
223 kaddr); 223 kaddr);
224 le64_add_cpu(&header->ch_ncheckpoints, 1); 224 le64_add_cpu(&header->ch_ncheckpoints, 1);
225 kunmap_atomic(kaddr, KM_USER0); 225 kunmap_atomic(kaddr, KM_USER0);
226 nilfs_mdt_mark_buffer_dirty(header_bh); 226 mark_buffer_dirty(header_bh);
227 nilfs_mdt_mark_dirty(cpfile); 227 nilfs_mdt_mark_dirty(cpfile);
228 } 228 }
229 229
@@ -326,7 +326,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
326 } 326 }
327 if (nicps > 0) { 327 if (nicps > 0) {
328 tnicps += nicps; 328 tnicps += nicps;
329 nilfs_mdt_mark_buffer_dirty(cp_bh); 329 mark_buffer_dirty(cp_bh);
330 nilfs_mdt_mark_dirty(cpfile); 330 nilfs_mdt_mark_dirty(cpfile);
331 if (!nilfs_cpfile_is_in_first(cpfile, cno)) { 331 if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
332 count = 332 count =
@@ -358,7 +358,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
358 header = nilfs_cpfile_block_get_header(cpfile, header_bh, 358 header = nilfs_cpfile_block_get_header(cpfile, header_bh,
359 kaddr); 359 kaddr);
360 le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); 360 le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps);
361 nilfs_mdt_mark_buffer_dirty(header_bh); 361 mark_buffer_dirty(header_bh);
362 nilfs_mdt_mark_dirty(cpfile); 362 nilfs_mdt_mark_dirty(cpfile);
363 kunmap_atomic(kaddr, KM_USER0); 363 kunmap_atomic(kaddr, KM_USER0);
364 } 364 }
@@ -671,10 +671,10 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno)
671 le64_add_cpu(&header->ch_nsnapshots, 1); 671 le64_add_cpu(&header->ch_nsnapshots, 1);
672 kunmap_atomic(kaddr, KM_USER0); 672 kunmap_atomic(kaddr, KM_USER0);
673 673
674 nilfs_mdt_mark_buffer_dirty(prev_bh); 674 mark_buffer_dirty(prev_bh);
675 nilfs_mdt_mark_buffer_dirty(curr_bh); 675 mark_buffer_dirty(curr_bh);
676 nilfs_mdt_mark_buffer_dirty(cp_bh); 676 mark_buffer_dirty(cp_bh);
677 nilfs_mdt_mark_buffer_dirty(header_bh); 677 mark_buffer_dirty(header_bh);
678 nilfs_mdt_mark_dirty(cpfile); 678 nilfs_mdt_mark_dirty(cpfile);
679 679
680 brelse(prev_bh); 680 brelse(prev_bh);
@@ -774,10 +774,10 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno)
774 le64_add_cpu(&header->ch_nsnapshots, -1); 774 le64_add_cpu(&header->ch_nsnapshots, -1);
775 kunmap_atomic(kaddr, KM_USER0); 775 kunmap_atomic(kaddr, KM_USER0);
776 776
777 nilfs_mdt_mark_buffer_dirty(next_bh); 777 mark_buffer_dirty(next_bh);
778 nilfs_mdt_mark_buffer_dirty(prev_bh); 778 mark_buffer_dirty(prev_bh);
779 nilfs_mdt_mark_buffer_dirty(cp_bh); 779 mark_buffer_dirty(cp_bh);
780 nilfs_mdt_mark_buffer_dirty(header_bh); 780 mark_buffer_dirty(header_bh);
781 nilfs_mdt_mark_dirty(cpfile); 781 nilfs_mdt_mark_dirty(cpfile);
782 782
783 brelse(prev_bh); 783 brelse(prev_bh);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 59e5fe742f7b..fcc2f869af16 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -54,7 +54,7 @@ static int nilfs_dat_prepare_entry(struct inode *dat,
54static void nilfs_dat_commit_entry(struct inode *dat, 54static void nilfs_dat_commit_entry(struct inode *dat,
55 struct nilfs_palloc_req *req) 55 struct nilfs_palloc_req *req)
56{ 56{
57 nilfs_mdt_mark_buffer_dirty(req->pr_entry_bh); 57 mark_buffer_dirty(req->pr_entry_bh);
58 nilfs_mdt_mark_dirty(dat); 58 nilfs_mdt_mark_dirty(dat);
59 brelse(req->pr_entry_bh); 59 brelse(req->pr_entry_bh);
60} 60}
@@ -361,7 +361,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
361 entry->de_blocknr = cpu_to_le64(blocknr); 361 entry->de_blocknr = cpu_to_le64(blocknr);
362 kunmap_atomic(kaddr, KM_USER0); 362 kunmap_atomic(kaddr, KM_USER0);
363 363
364 nilfs_mdt_mark_buffer_dirty(entry_bh); 364 mark_buffer_dirty(entry_bh);
365 nilfs_mdt_mark_dirty(dat); 365 nilfs_mdt_mark_dirty(dat);
366 366
367 brelse(entry_bh); 367 brelse(entry_bh);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 397e73258631..d7eeca62febd 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -111,7 +111,6 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
111 nilfs_transaction_commit(inode->i_sb); 111 nilfs_transaction_commit(inode->i_sb);
112 112
113 mapped: 113 mapped:
114 SetPageChecked(page);
115 wait_on_page_writeback(page); 114 wait_on_page_writeback(page);
116 return VM_FAULT_LOCKED; 115 return VM_FAULT_LOCKED;
117} 116}
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 1c2a3e23f8b2..08a07a218d26 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -48,9 +48,6 @@
48#include "dat.h" 48#include "dat.h"
49#include "ifile.h" 49#include "ifile.h"
50 50
51static const struct address_space_operations def_gcinode_aops = {
52};
53
54/* 51/*
55 * nilfs_gccache_submit_read_data() - add data buffer and submit read request 52 * nilfs_gccache_submit_read_data() - add data buffer and submit read request
56 * @inode - gc inode 53 * @inode - gc inode
@@ -87,9 +84,9 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
87 goto out; 84 goto out;
88 85
89 if (pbn == 0) { 86 if (pbn == 0) {
90 struct inode *dat_inode = NILFS_I_NILFS(inode)->ns_dat; 87 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
91 /* use original dat, not gc dat. */ 88
92 err = nilfs_dat_translate(dat_inode, vbn, &pbn); 89 err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn);
93 if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */ 90 if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */
94 brelse(bh); 91 brelse(bh);
95 goto failed; 92 goto failed;
@@ -103,7 +100,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
103 } 100 }
104 101
105 if (!buffer_mapped(bh)) { 102 if (!buffer_mapped(bh)) {
106 bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; 103 bh->b_bdev = inode->i_sb->s_bdev;
107 set_buffer_mapped(bh); 104 set_buffer_mapped(bh);
108 } 105 }
109 bh->b_blocknr = pbn; 106 bh->b_blocknr = pbn;
@@ -160,15 +157,11 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
160 if (buffer_dirty(bh)) 157 if (buffer_dirty(bh))
161 return -EEXIST; 158 return -EEXIST;
162 159
163 if (buffer_nilfs_node(bh)) { 160 if (buffer_nilfs_node(bh) && nilfs_btree_broken_node_block(bh)) {
164 if (nilfs_btree_broken_node_block(bh)) { 161 clear_buffer_uptodate(bh);
165 clear_buffer_uptodate(bh); 162 return -EIO;
166 return -EIO;
167 }
168 nilfs_btnode_mark_dirty(bh);
169 } else {
170 nilfs_mark_buffer_dirty(bh);
171 } 163 }
164 mark_buffer_dirty(bh);
172 return 0; 165 return 0;
173} 166}
174 167
@@ -178,7 +171,7 @@ int nilfs_init_gcinode(struct inode *inode)
178 171
179 inode->i_mode = S_IFREG; 172 inode->i_mode = S_IFREG;
180 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 173 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
181 inode->i_mapping->a_ops = &def_gcinode_aops; 174 inode->i_mapping->a_ops = &empty_aops;
182 inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; 175 inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi;
183 176
184 ii->i_flags = 0; 177 ii->i_flags = 0;
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index bfc73d3a30ed..684d76300a80 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -80,7 +80,7 @@ int nilfs_ifile_create_inode(struct inode *ifile, ino_t *out_ino,
80 return ret; 80 return ret;
81 } 81 }
82 nilfs_palloc_commit_alloc_entry(ifile, &req); 82 nilfs_palloc_commit_alloc_entry(ifile, &req);
83 nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); 83 mark_buffer_dirty(req.pr_entry_bh);
84 nilfs_mdt_mark_dirty(ifile); 84 nilfs_mdt_mark_dirty(ifile);
85 *out_ino = (ino_t)req.pr_entry_nr; 85 *out_ino = (ino_t)req.pr_entry_nr;
86 *out_bh = req.pr_entry_bh; 86 *out_bh = req.pr_entry_bh;
@@ -128,7 +128,7 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino)
128 raw_inode->i_flags = 0; 128 raw_inode->i_flags = 0;
129 kunmap_atomic(kaddr, KM_USER0); 129 kunmap_atomic(kaddr, KM_USER0);
130 130
131 nilfs_mdt_mark_buffer_dirty(req.pr_entry_bh); 131 mark_buffer_dirty(req.pr_entry_bh);
132 brelse(req.pr_entry_bh); 132 brelse(req.pr_entry_bh);
133 133
134 nilfs_palloc_commit_free_entry(ifile, &req); 134 nilfs_palloc_commit_free_entry(ifile, &req);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index c0aa27490c02..587f18432832 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -74,14 +74,14 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
74 struct buffer_head *bh_result, int create) 74 struct buffer_head *bh_result, int create)
75{ 75{
76 struct nilfs_inode_info *ii = NILFS_I(inode); 76 struct nilfs_inode_info *ii = NILFS_I(inode);
77 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
77 __u64 blknum = 0; 78 __u64 blknum = 0;
78 int err = 0, ret; 79 int err = 0, ret;
79 struct inode *dat = NILFS_I_NILFS(inode)->ns_dat;
80 unsigned maxblocks = bh_result->b_size >> inode->i_blkbits; 80 unsigned maxblocks = bh_result->b_size >> inode->i_blkbits;
81 81
82 down_read(&NILFS_MDT(dat)->mi_sem); 82 down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
83 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks); 83 ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
84 up_read(&NILFS_MDT(dat)->mi_sem); 84 up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem);
85 if (ret >= 0) { /* found */ 85 if (ret >= 0) { /* found */
86 map_bh(bh_result, inode->i_sb, blknum); 86 map_bh(bh_result, inode->i_sb, blknum);
87 if (ret > 0) 87 if (ret > 0)
@@ -596,6 +596,16 @@ void nilfs_write_inode_common(struct inode *inode,
596 raw_inode->i_flags = cpu_to_le32(ii->i_flags); 596 raw_inode->i_flags = cpu_to_le32(ii->i_flags);
597 raw_inode->i_generation = cpu_to_le32(inode->i_generation); 597 raw_inode->i_generation = cpu_to_le32(inode->i_generation);
598 598
599 if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) {
600 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
601
602 /* zero-fill unused portion in the case of super root block */
603 raw_inode->i_xattr = 0;
604 raw_inode->i_pad = 0;
605 memset((void *)raw_inode + sizeof(*raw_inode), 0,
606 nilfs->ns_inode_size - sizeof(*raw_inode));
607 }
608
599 if (has_bmap) 609 if (has_bmap)
600 nilfs_bmap_write(ii->i_bmap, raw_inode); 610 nilfs_bmap_write(ii->i_bmap, raw_inode);
601 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) 611 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
@@ -872,8 +882,7 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
872 return -EINVAL; /* NILFS_I_DIRTY may remain for 882 return -EINVAL; /* NILFS_I_DIRTY may remain for
873 freeing inode */ 883 freeing inode */
874 } 884 }
875 list_del(&ii->i_dirty); 885 list_move_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
876 list_add_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
877 set_bit(NILFS_I_QUEUED, &ii->i_state); 886 set_bit(NILFS_I_QUEUED, &ii->i_state);
878 } 887 }
879 spin_unlock(&nilfs->ns_inode_lock); 888 spin_unlock(&nilfs->ns_inode_lock);
@@ -892,7 +901,7 @@ int nilfs_mark_inode_dirty(struct inode *inode)
892 return err; 901 return err;
893 } 902 }
894 nilfs_update_inode(inode, ibh); 903 nilfs_update_inode(inode, ibh);
895 nilfs_mdt_mark_buffer_dirty(ibh); 904 mark_buffer_dirty(ibh);
896 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); 905 nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile);
897 brelse(ibh); 906 brelse(ibh);
898 return 0; 907 return 0;
@@ -931,7 +940,7 @@ void nilfs_dirty_inode(struct inode *inode)
931int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 940int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
932 __u64 start, __u64 len) 941 __u64 start, __u64 len)
933{ 942{
934 struct the_nilfs *nilfs = NILFS_I_NILFS(inode); 943 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
935 __u64 logical = 0, phys = 0, size = 0; 944 __u64 logical = 0, phys = 0, size = 0;
936 __u32 flags = 0; 945 __u32 flags = 0;
937 loff_t isize; 946 loff_t isize;
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index f2469ba6246b..41d6743d303c 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -698,6 +698,63 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
698 return 0; 698 return 0;
699} 699}
700 700
701static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
702 void __user *argp)
703{
704 __u64 newsize;
705 int ret = -EPERM;
706
707 if (!capable(CAP_SYS_ADMIN))
708 goto out;
709
710 ret = mnt_want_write(filp->f_path.mnt);
711 if (ret)
712 goto out;
713
714 ret = -EFAULT;
715 if (copy_from_user(&newsize, argp, sizeof(newsize)))
716 goto out_drop_write;
717
718 ret = nilfs_resize_fs(inode->i_sb, newsize);
719
720out_drop_write:
721 mnt_drop_write(filp->f_path.mnt);
722out:
723 return ret;
724}
725
726static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
727{
728 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
729 __u64 range[2];
730 __u64 minseg, maxseg;
731 unsigned long segbytes;
732 int ret = -EPERM;
733
734 if (!capable(CAP_SYS_ADMIN))
735 goto out;
736
737 ret = -EFAULT;
738 if (copy_from_user(range, argp, sizeof(__u64[2])))
739 goto out;
740
741 ret = -ERANGE;
742 if (range[1] > i_size_read(inode->i_sb->s_bdev->bd_inode))
743 goto out;
744
745 segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize;
746
747 minseg = range[0] + segbytes - 1;
748 do_div(minseg, segbytes);
749 maxseg = NILFS_SB2_OFFSET_BYTES(range[1]);
750 do_div(maxseg, segbytes);
751 maxseg--;
752
753 ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg);
754out:
755 return ret;
756}
757
701static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, 758static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
702 unsigned int cmd, void __user *argp, 759 unsigned int cmd, void __user *argp,
703 size_t membsz, 760 size_t membsz,
@@ -763,6 +820,10 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
763 return nilfs_ioctl_clean_segments(inode, filp, cmd, argp); 820 return nilfs_ioctl_clean_segments(inode, filp, cmd, argp);
764 case NILFS_IOCTL_SYNC: 821 case NILFS_IOCTL_SYNC:
765 return nilfs_ioctl_sync(inode, filp, cmd, argp); 822 return nilfs_ioctl_sync(inode, filp, cmd, argp);
823 case NILFS_IOCTL_RESIZE:
824 return nilfs_ioctl_resize(inode, filp, argp);
825 case NILFS_IOCTL_SET_ALLOC_RANGE:
826 return nilfs_ioctl_set_alloc_range(inode, argp);
766 default: 827 default:
767 return -ENOTTY; 828 return -ENOTTY;
768 } 829 }
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index a649b05f7069..800e8d78a83b 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -66,7 +66,7 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
66 kunmap_atomic(kaddr, KM_USER0); 66 kunmap_atomic(kaddr, KM_USER0);
67 67
68 set_buffer_uptodate(bh); 68 set_buffer_uptodate(bh);
69 nilfs_mark_buffer_dirty(bh); 69 mark_buffer_dirty(bh);
70 nilfs_mdt_mark_dirty(inode); 70 nilfs_mdt_mark_dirty(inode);
71 return 0; 71 return 0;
72} 72}
@@ -355,7 +355,7 @@ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block)
355 err = nilfs_mdt_read_block(inode, block, 0, &bh); 355 err = nilfs_mdt_read_block(inode, block, 0, &bh);
356 if (unlikely(err)) 356 if (unlikely(err))
357 return err; 357 return err;
358 nilfs_mark_buffer_dirty(bh); 358 mark_buffer_dirty(bh);
359 nilfs_mdt_mark_dirty(inode); 359 nilfs_mdt_mark_dirty(inode);
360 brelse(bh); 360 brelse(bh);
361 return 0; 361 return 0;
@@ -450,9 +450,9 @@ int nilfs_mdt_setup_shadow_map(struct inode *inode,
450 450
451 INIT_LIST_HEAD(&shadow->frozen_buffers); 451 INIT_LIST_HEAD(&shadow->frozen_buffers);
452 address_space_init_once(&shadow->frozen_data); 452 address_space_init_once(&shadow->frozen_data);
453 nilfs_mapping_init(&shadow->frozen_data, bdi); 453 nilfs_mapping_init(&shadow->frozen_data, inode, bdi);
454 address_space_init_once(&shadow->frozen_btnodes); 454 address_space_init_once(&shadow->frozen_btnodes);
455 nilfs_mapping_init(&shadow->frozen_btnodes, bdi); 455 nilfs_mapping_init(&shadow->frozen_btnodes, inode, bdi);
456 mi->mi_shadow = shadow; 456 mi->mi_shadow = shadow;
457 return 0; 457 return 0;
458} 458}
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index ed68563ec708..ab20a4baa50f 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -64,11 +64,6 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
64 return inode->i_private; 64 return inode->i_private;
65} 65}
66 66
67static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode)
68{
69 return inode->i_sb->s_fs_info;
70}
71
72/* Default GFP flags using highmem */ 67/* Default GFP flags using highmem */
73#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM) 68#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM)
74 69
@@ -93,8 +88,6 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh);
93struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode, 88struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode,
94 struct buffer_head *bh); 89 struct buffer_head *bh);
95 90
96#define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh)
97
98static inline void nilfs_mdt_mark_dirty(struct inode *inode) 91static inline void nilfs_mdt_mark_dirty(struct inode *inode)
99{ 92{
100 if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state)) 93 if (!test_bit(NILFS_I_DIRTY, &NILFS_I(inode)->i_state))
@@ -108,7 +101,7 @@ static inline void nilfs_mdt_clear_dirty(struct inode *inode)
108 101
109static inline __u64 nilfs_mdt_cno(struct inode *inode) 102static inline __u64 nilfs_mdt_cno(struct inode *inode)
110{ 103{
111 return NILFS_I_NILFS(inode)->ns_cno; 104 return ((struct the_nilfs *)inode->i_sb->s_fs_info)->ns_cno;
112} 105}
113 106
114#define nilfs_mdt_bgl_lock(inode, bg) \ 107#define nilfs_mdt_bgl_lock(inode, bg) \
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index a8dd344303cb..a9c6a531f80c 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -80,12 +80,6 @@ static inline struct inode *NILFS_BTNC_I(struct address_space *btnc)
80 return &ii->vfs_inode; 80 return &ii->vfs_inode;
81} 81}
82 82
83static inline struct inode *NILFS_AS_I(struct address_space *mapping)
84{
85 return (mapping->host) ? :
86 container_of(mapping, struct inode, i_data);
87}
88
89/* 83/*
90 * Dynamic state flags of NILFS on-memory inode (i_state) 84 * Dynamic state flags of NILFS on-memory inode (i_state)
91 */ 85 */
@@ -298,6 +292,7 @@ struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
298 int flip); 292 int flip);
299int nilfs_commit_super(struct super_block *sb, int flag); 293int nilfs_commit_super(struct super_block *sb, int flag);
300int nilfs_cleanup_super(struct super_block *sb); 294int nilfs_cleanup_super(struct super_block *sb);
295int nilfs_resize_fs(struct super_block *sb, __u64 newsize);
301int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, 296int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
302 struct nilfs_root **root); 297 struct nilfs_root **root);
303int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno); 298int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 1168059c7efd..65221a04c6f0 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -37,8 +37,7 @@
37 37
38#define NILFS_BUFFER_INHERENT_BITS \ 38#define NILFS_BUFFER_INHERENT_BITS \
39 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ 39 ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \
40 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \ 40 (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Checked))
41 (1UL << BH_NILFS_Checked))
42 41
43static struct buffer_head * 42static struct buffer_head *
44__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, 43__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
@@ -59,19 +58,6 @@ __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
59 return bh; 58 return bh;
60} 59}
61 60
62/*
63 * Since the page cache of B-tree node pages or data page cache of pseudo
64 * inodes does not have a valid mapping->host pointer, calling
65 * mark_buffer_dirty() for their buffers causes a NULL pointer dereference;
66 * it calls __mark_inode_dirty(NULL) through __set_page_dirty().
67 * To avoid this problem, the old style mark_buffer_dirty() is used instead.
68 */
69void nilfs_mark_buffer_dirty(struct buffer_head *bh)
70{
71 if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh))
72 __set_page_dirty_nobuffers(bh->b_page);
73}
74
75struct buffer_head *nilfs_grab_buffer(struct inode *inode, 61struct buffer_head *nilfs_grab_buffer(struct inode *inode,
76 struct address_space *mapping, 62 struct address_space *mapping,
77 unsigned long blkoff, 63 unsigned long blkoff,
@@ -183,7 +169,7 @@ int nilfs_page_buffers_clean(struct page *page)
183void nilfs_page_bug(struct page *page) 169void nilfs_page_bug(struct page *page)
184{ 170{
185 struct address_space *m; 171 struct address_space *m;
186 unsigned long ino = 0; 172 unsigned long ino;
187 173
188 if (unlikely(!page)) { 174 if (unlikely(!page)) {
189 printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); 175 printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n");
@@ -191,11 +177,8 @@ void nilfs_page_bug(struct page *page)
191 } 177 }
192 178
193 m = page->mapping; 179 m = page->mapping;
194 if (m) { 180 ino = m ? m->host->i_ino : 0;
195 struct inode *inode = NILFS_AS_I(m); 181
196 if (inode != NULL)
197 ino = inode->i_ino;
198 }
199 printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " 182 printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
200 "mapping=%p ino=%lu\n", 183 "mapping=%p ino=%lu\n",
201 page, atomic_read(&page->_count), 184 page, atomic_read(&page->_count),
@@ -217,56 +200,6 @@ void nilfs_page_bug(struct page *page)
217} 200}
218 201
219/** 202/**
220 * nilfs_alloc_private_page - allocate a private page with buffer heads
221 *
222 * Return Value: On success, a pointer to the allocated page is returned.
223 * On error, NULL is returned.
224 */
225struct page *nilfs_alloc_private_page(struct block_device *bdev, int size,
226 unsigned long state)
227{
228 struct buffer_head *bh, *head, *tail;
229 struct page *page;
230
231 page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */
232 if (unlikely(!page))
233 return NULL;
234
235 lock_page(page);
236 head = alloc_page_buffers(page, size, 0);
237 if (unlikely(!head)) {
238 unlock_page(page);
239 __free_page(page);
240 return NULL;
241 }
242
243 bh = head;
244 do {
245 bh->b_state = (1UL << BH_NILFS_Allocated) | state;
246 tail = bh;
247 bh->b_bdev = bdev;
248 bh = bh->b_this_page;
249 } while (bh);
250
251 tail->b_this_page = head;
252 attach_page_buffers(page, head);
253
254 return page;
255}
256
257void nilfs_free_private_page(struct page *page)
258{
259 BUG_ON(!PageLocked(page));
260 BUG_ON(page->mapping);
261
262 if (page_has_buffers(page) && !try_to_free_buffers(page))
263 NILFS_PAGE_BUG(page, "failed to free page");
264
265 unlock_page(page);
266 __free_page(page);
267}
268
269/**
270 * nilfs_copy_page -- copy the page with buffers 203 * nilfs_copy_page -- copy the page with buffers
271 * @dst: destination page 204 * @dst: destination page
272 * @src: source page 205 * @src: source page
@@ -492,10 +425,10 @@ unsigned nilfs_page_count_clean_buffers(struct page *page,
492 return nc; 425 return nc;
493} 426}
494 427
495void nilfs_mapping_init(struct address_space *mapping, 428void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
496 struct backing_dev_info *bdi) 429 struct backing_dev_info *bdi)
497{ 430{
498 mapping->host = NULL; 431 mapping->host = inode;
499 mapping->flags = 0; 432 mapping->flags = 0;
500 mapping_set_gfp_mask(mapping, GFP_NOFS); 433 mapping_set_gfp_mask(mapping, GFP_NOFS);
501 mapping->assoc_mapping = NULL; 434 mapping->assoc_mapping = NULL;
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index f06b79ad7493..fb7de71605a0 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -38,14 +38,12 @@ enum {
38 BH_NILFS_Redirected, 38 BH_NILFS_Redirected,
39}; 39};
40 40
41BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */
42BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ 41BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */
43BUFFER_FNS(NILFS_Volatile, nilfs_volatile) 42BUFFER_FNS(NILFS_Volatile, nilfs_volatile)
44BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */ 43BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */
45BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */ 44BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */
46 45
47 46
48void nilfs_mark_buffer_dirty(struct buffer_head *bh);
49int __nilfs_clear_page_dirty(struct page *); 47int __nilfs_clear_page_dirty(struct page *);
50 48
51struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *, 49struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *,
@@ -54,14 +52,11 @@ void nilfs_forget_buffer(struct buffer_head *);
54void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *); 52void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *);
55int nilfs_page_buffers_clean(struct page *); 53int nilfs_page_buffers_clean(struct page *);
56void nilfs_page_bug(struct page *); 54void nilfs_page_bug(struct page *);
57struct page *nilfs_alloc_private_page(struct block_device *, int,
58 unsigned long);
59void nilfs_free_private_page(struct page *);
60 55
61int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); 56int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
62void nilfs_copy_back_pages(struct address_space *, struct address_space *); 57void nilfs_copy_back_pages(struct address_space *, struct address_space *);
63void nilfs_clear_dirty_pages(struct address_space *); 58void nilfs_clear_dirty_pages(struct address_space *);
64void nilfs_mapping_init(struct address_space *mapping, 59void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
65 struct backing_dev_info *bdi); 60 struct backing_dev_info *bdi);
66unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); 61unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
67unsigned long nilfs_find_uncommitted_extent(struct inode *inode, 62unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index ba4a64518f38..a604ac0331b2 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -387,9 +387,9 @@ static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr,
387static void dispose_recovery_list(struct list_head *head) 387static void dispose_recovery_list(struct list_head *head)
388{ 388{
389 while (!list_empty(head)) { 389 while (!list_empty(head)) {
390 struct nilfs_recovery_block *rb 390 struct nilfs_recovery_block *rb;
391 = list_entry(head->next, 391
392 struct nilfs_recovery_block, list); 392 rb = list_first_entry(head, struct nilfs_recovery_block, list);
393 list_del(&rb->list); 393 list_del(&rb->list);
394 kfree(rb); 394 kfree(rb);
395 } 395 }
@@ -416,9 +416,9 @@ static int nilfs_segment_list_add(struct list_head *head, __u64 segnum)
416void nilfs_dispose_segment_list(struct list_head *head) 416void nilfs_dispose_segment_list(struct list_head *head)
417{ 417{
418 while (!list_empty(head)) { 418 while (!list_empty(head)) {
419 struct nilfs_segment_entry *ent 419 struct nilfs_segment_entry *ent;
420 = list_entry(head->next, 420
421 struct nilfs_segment_entry, list); 421 ent = list_first_entry(head, struct nilfs_segment_entry, list);
422 list_del(&ent->list); 422 list_del(&ent->list);
423 kfree(ent); 423 kfree(ent);
424 } 424 }
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 2853ff20f85a..850a7c0228fb 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -239,12 +239,15 @@ nilfs_segbuf_fill_in_super_root_crc(struct nilfs_segment_buffer *segbuf,
239 u32 seed) 239 u32 seed)
240{ 240{
241 struct nilfs_super_root *raw_sr; 241 struct nilfs_super_root *raw_sr;
242 struct the_nilfs *nilfs = segbuf->sb_super->s_fs_info;
243 unsigned srsize;
242 u32 crc; 244 u32 crc;
243 245
244 raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data; 246 raw_sr = (struct nilfs_super_root *)segbuf->sb_super_root->b_data;
247 srsize = NILFS_SR_BYTES(nilfs->ns_inode_size);
245 crc = crc32_le(seed, 248 crc = crc32_le(seed,
246 (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum), 249 (unsigned char *)raw_sr + sizeof(raw_sr->sr_sum),
247 NILFS_SR_BYTES - sizeof(raw_sr->sr_sum)); 250 srsize - sizeof(raw_sr->sr_sum));
248 raw_sr->sr_sum = cpu_to_le32(crc); 251 raw_sr->sr_sum = cpu_to_le32(crc);
249} 252}
250 253
@@ -254,18 +257,6 @@ static void nilfs_release_buffers(struct list_head *list)
254 257
255 list_for_each_entry_safe(bh, n, list, b_assoc_buffers) { 258 list_for_each_entry_safe(bh, n, list, b_assoc_buffers) {
256 list_del_init(&bh->b_assoc_buffers); 259 list_del_init(&bh->b_assoc_buffers);
257 if (buffer_nilfs_allocated(bh)) {
258 struct page *clone_page = bh->b_page;
259
260 /* remove clone page */
261 brelse(bh);
262 page_cache_release(clone_page); /* for each bh */
263 if (page_count(clone_page) <= 2) {
264 lock_page(clone_page);
265 nilfs_free_private_page(clone_page);
266 }
267 continue;
268 }
269 brelse(bh); 260 brelse(bh);
270 } 261 }
271} 262}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index afe4f2183454..141646e88fb5 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -655,13 +655,10 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
655 if (unlikely(page->index > last)) 655 if (unlikely(page->index > last))
656 break; 656 break;
657 657
658 if (mapping->host) { 658 lock_page(page);
659 lock_page(page); 659 if (!page_has_buffers(page))
660 if (!page_has_buffers(page)) 660 create_empty_buffers(page, 1 << inode->i_blkbits, 0);
661 create_empty_buffers(page, 661 unlock_page(page);
662 1 << inode->i_blkbits, 0);
663 unlock_page(page);
664 }
665 662
666 bh = head = page_buffers(page); 663 bh = head = page_buffers(page);
667 do { 664 do {
@@ -809,7 +806,7 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
809 /* The following code is duplicated with cpfile. But, it is 806 /* The following code is duplicated with cpfile. But, it is
810 needed to collect the checkpoint even if it was not newly 807 needed to collect the checkpoint even if it was not newly
811 created */ 808 created */
812 nilfs_mdt_mark_buffer_dirty(bh_cp); 809 mark_buffer_dirty(bh_cp);
813 nilfs_mdt_mark_dirty(nilfs->ns_cpfile); 810 nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
814 nilfs_cpfile_put_checkpoint( 811 nilfs_cpfile_put_checkpoint(
815 nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); 812 nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
@@ -889,12 +886,14 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
889{ 886{
890 struct buffer_head *bh_sr; 887 struct buffer_head *bh_sr;
891 struct nilfs_super_root *raw_sr; 888 struct nilfs_super_root *raw_sr;
892 unsigned isz = nilfs->ns_inode_size; 889 unsigned isz, srsz;
893 890
894 bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root; 891 bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
895 raw_sr = (struct nilfs_super_root *)bh_sr->b_data; 892 raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
893 isz = nilfs->ns_inode_size;
894 srsz = NILFS_SR_BYTES(isz);
896 895
897 raw_sr->sr_bytes = cpu_to_le16(NILFS_SR_BYTES); 896 raw_sr->sr_bytes = cpu_to_le16(srsz);
898 raw_sr->sr_nongc_ctime 897 raw_sr->sr_nongc_ctime
899 = cpu_to_le64(nilfs_doing_gc() ? 898 = cpu_to_le64(nilfs_doing_gc() ?
900 nilfs->ns_nongc_ctime : sci->sc_seg_ctime); 899 nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
@@ -906,6 +905,7 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
906 NILFS_SR_CPFILE_OFFSET(isz), 1); 905 NILFS_SR_CPFILE_OFFSET(isz), 1);
907 nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + 906 nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
908 NILFS_SR_SUFILE_OFFSET(isz), 1); 907 NILFS_SR_SUFILE_OFFSET(isz), 1);
908 memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
909} 909}
910 910
911static void nilfs_redirty_inodes(struct list_head *head) 911static void nilfs_redirty_inodes(struct list_head *head)
@@ -954,8 +954,8 @@ static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
954 954
955 dispose_buffers: 955 dispose_buffers:
956 while (!list_empty(listp)) { 956 while (!list_empty(listp)) {
957 bh = list_entry(listp->next, struct buffer_head, 957 bh = list_first_entry(listp, struct buffer_head,
958 b_assoc_buffers); 958 b_assoc_buffers);
959 list_del_init(&bh->b_assoc_buffers); 959 list_del_init(&bh->b_assoc_buffers);
960 brelse(bh); 960 brelse(bh);
961 } 961 }
@@ -1500,10 +1500,7 @@ nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
1500 nblocks = le32_to_cpu(finfo->fi_nblocks); 1500 nblocks = le32_to_cpu(finfo->fi_nblocks);
1501 ndatablk = le32_to_cpu(finfo->fi_ndatablk); 1501 ndatablk = le32_to_cpu(finfo->fi_ndatablk);
1502 1502
1503 if (buffer_nilfs_node(bh)) 1503 inode = bh->b_page->mapping->host;
1504 inode = NILFS_BTNC_I(bh->b_page->mapping);
1505 else
1506 inode = NILFS_AS_I(bh->b_page->mapping);
1507 1504
1508 if (mode == SC_LSEG_DSYNC) 1505 if (mode == SC_LSEG_DSYNC)
1509 sc_op = &nilfs_sc_dsync_ops; 1506 sc_op = &nilfs_sc_dsync_ops;
@@ -1556,83 +1553,24 @@ static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode)
1556 return 0; 1553 return 0;
1557} 1554}
1558 1555
1559static int 1556static void nilfs_begin_page_io(struct page *page)
1560nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out)
1561{
1562 struct page *clone_page;
1563 struct buffer_head *bh, *head, *bh2;
1564 void *kaddr;
1565
1566 bh = head = page_buffers(page);
1567
1568 clone_page = nilfs_alloc_private_page(bh->b_bdev, bh->b_size, 0);
1569 if (unlikely(!clone_page))
1570 return -ENOMEM;
1571
1572 bh2 = page_buffers(clone_page);
1573 kaddr = kmap_atomic(page, KM_USER0);
1574 do {
1575 if (list_empty(&bh->b_assoc_buffers))
1576 continue;
1577 get_bh(bh2);
1578 page_cache_get(clone_page); /* for each bh */
1579 memcpy(bh2->b_data, kaddr + bh_offset(bh), bh2->b_size);
1580 bh2->b_blocknr = bh->b_blocknr;
1581 list_replace(&bh->b_assoc_buffers, &bh2->b_assoc_buffers);
1582 list_add_tail(&bh->b_assoc_buffers, out);
1583 } while (bh = bh->b_this_page, bh2 = bh2->b_this_page, bh != head);
1584 kunmap_atomic(kaddr, KM_USER0);
1585
1586 if (!TestSetPageWriteback(clone_page))
1587 account_page_writeback(clone_page);
1588 unlock_page(clone_page);
1589
1590 return 0;
1591}
1592
1593static int nilfs_test_page_to_be_frozen(struct page *page)
1594{
1595 struct address_space *mapping = page->mapping;
1596
1597 if (!mapping || !mapping->host || S_ISDIR(mapping->host->i_mode))
1598 return 0;
1599
1600 if (page_mapped(page)) {
1601 ClearPageChecked(page);
1602 return 1;
1603 }
1604 return PageChecked(page);
1605}
1606
1607static int nilfs_begin_page_io(struct page *page, struct list_head *out)
1608{ 1557{
1609 if (!page || PageWriteback(page)) 1558 if (!page || PageWriteback(page))
1610 /* For split b-tree node pages, this function may be called 1559 /* For split b-tree node pages, this function may be called
1611 twice. We ignore the 2nd or later calls by this check. */ 1560 twice. We ignore the 2nd or later calls by this check. */
1612 return 0; 1561 return;
1613 1562
1614 lock_page(page); 1563 lock_page(page);
1615 clear_page_dirty_for_io(page); 1564 clear_page_dirty_for_io(page);
1616 set_page_writeback(page); 1565 set_page_writeback(page);
1617 unlock_page(page); 1566 unlock_page(page);
1618
1619 if (nilfs_test_page_to_be_frozen(page)) {
1620 int err = nilfs_copy_replace_page_buffers(page, out);
1621 if (unlikely(err))
1622 return err;
1623 }
1624 return 0;
1625} 1567}
1626 1568
1627static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, 1569static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
1628 struct page **failed_page)
1629{ 1570{
1630 struct nilfs_segment_buffer *segbuf; 1571 struct nilfs_segment_buffer *segbuf;
1631 struct page *bd_page = NULL, *fs_page = NULL; 1572 struct page *bd_page = NULL, *fs_page = NULL;
1632 struct list_head *list = &sci->sc_copied_buffers;
1633 int err;
1634 1573
1635 *failed_page = NULL;
1636 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1574 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
1637 struct buffer_head *bh; 1575 struct buffer_head *bh;
1638 1576
@@ -1662,11 +1600,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
1662 break; 1600 break;
1663 } 1601 }
1664 if (bh->b_page != fs_page) { 1602 if (bh->b_page != fs_page) {
1665 err = nilfs_begin_page_io(fs_page, list); 1603 nilfs_begin_page_io(fs_page);
1666 if (unlikely(err)) {
1667 *failed_page = fs_page;
1668 goto out;
1669 }
1670 fs_page = bh->b_page; 1604 fs_page = bh->b_page;
1671 } 1605 }
1672 } 1606 }
@@ -1677,11 +1611,7 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci,
1677 set_page_writeback(bd_page); 1611 set_page_writeback(bd_page);
1678 unlock_page(bd_page); 1612 unlock_page(bd_page);
1679 } 1613 }
1680 err = nilfs_begin_page_io(fs_page, list); 1614 nilfs_begin_page_io(fs_page);
1681 if (unlikely(err))
1682 *failed_page = fs_page;
1683 out:
1684 return err;
1685} 1615}
1686 1616
1687static int nilfs_segctor_write(struct nilfs_sc_info *sci, 1617static int nilfs_segctor_write(struct nilfs_sc_info *sci,
@@ -1694,24 +1624,6 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci,
1694 return ret; 1624 return ret;
1695} 1625}
1696 1626
1697static void __nilfs_end_page_io(struct page *page, int err)
1698{
1699 if (!err) {
1700 if (!nilfs_page_buffers_clean(page))
1701 __set_page_dirty_nobuffers(page);
1702 ClearPageError(page);
1703 } else {
1704 __set_page_dirty_nobuffers(page);
1705 SetPageError(page);
1706 }
1707
1708 if (buffer_nilfs_allocated(page_buffers(page))) {
1709 if (TestClearPageWriteback(page))
1710 dec_zone_page_state(page, NR_WRITEBACK);
1711 } else
1712 end_page_writeback(page);
1713}
1714
1715static void nilfs_end_page_io(struct page *page, int err) 1627static void nilfs_end_page_io(struct page *page, int err)
1716{ 1628{
1717 if (!page) 1629 if (!page)
@@ -1738,40 +1650,19 @@ static void nilfs_end_page_io(struct page *page, int err)
1738 return; 1650 return;
1739 } 1651 }
1740 1652
1741 __nilfs_end_page_io(page, err); 1653 if (!err) {
1742} 1654 if (!nilfs_page_buffers_clean(page))
1743 1655 __set_page_dirty_nobuffers(page);
1744static void nilfs_clear_copied_buffers(struct list_head *list, int err) 1656 ClearPageError(page);
1745{ 1657 } else {
1746 struct buffer_head *bh, *head; 1658 __set_page_dirty_nobuffers(page);
1747 struct page *page; 1659 SetPageError(page);
1748
1749 while (!list_empty(list)) {
1750 bh = list_entry(list->next, struct buffer_head,
1751 b_assoc_buffers);
1752 page = bh->b_page;
1753 page_cache_get(page);
1754 head = bh = page_buffers(page);
1755 do {
1756 if (!list_empty(&bh->b_assoc_buffers)) {
1757 list_del_init(&bh->b_assoc_buffers);
1758 if (!err) {
1759 set_buffer_uptodate(bh);
1760 clear_buffer_dirty(bh);
1761 clear_buffer_delay(bh);
1762 clear_buffer_nilfs_volatile(bh);
1763 }
1764 brelse(bh); /* for b_assoc_buffers */
1765 }
1766 } while ((bh = bh->b_this_page) != head);
1767
1768 __nilfs_end_page_io(page, err);
1769 page_cache_release(page);
1770 } 1660 }
1661
1662 end_page_writeback(page);
1771} 1663}
1772 1664
1773static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page, 1665static void nilfs_abort_logs(struct list_head *logs, int err)
1774 int err)
1775{ 1666{
1776 struct nilfs_segment_buffer *segbuf; 1667 struct nilfs_segment_buffer *segbuf;
1777 struct page *bd_page = NULL, *fs_page = NULL; 1668 struct page *bd_page = NULL, *fs_page = NULL;
@@ -1801,8 +1692,6 @@ static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page,
1801 } 1692 }
1802 if (bh->b_page != fs_page) { 1693 if (bh->b_page != fs_page) {
1803 nilfs_end_page_io(fs_page, err); 1694 nilfs_end_page_io(fs_page, err);
1804 if (fs_page && fs_page == failed_page)
1805 return;
1806 fs_page = bh->b_page; 1695 fs_page = bh->b_page;
1807 } 1696 }
1808 } 1697 }
@@ -1821,12 +1710,11 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
1821 1710
1822 list_splice_tail_init(&sci->sc_write_logs, &logs); 1711 list_splice_tail_init(&sci->sc_write_logs, &logs);
1823 ret = nilfs_wait_on_logs(&logs); 1712 ret = nilfs_wait_on_logs(&logs);
1824 nilfs_abort_logs(&logs, NULL, ret ? : err); 1713 nilfs_abort_logs(&logs, ret ? : err);
1825 1714
1826 list_splice_tail_init(&sci->sc_segbufs, &logs); 1715 list_splice_tail_init(&sci->sc_segbufs, &logs);
1827 nilfs_cancel_segusage(&logs, nilfs->ns_sufile); 1716 nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
1828 nilfs_free_incomplete_logs(&logs, nilfs); 1717 nilfs_free_incomplete_logs(&logs, nilfs);
1829 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err);
1830 1718
1831 if (sci->sc_stage.flags & NILFS_CF_SUFREED) { 1719 if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
1832 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, 1720 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
@@ -1920,8 +1808,6 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1920 1808
1921 nilfs_end_page_io(fs_page, 0); 1809 nilfs_end_page_io(fs_page, 0);
1922 1810
1923 nilfs_clear_copied_buffers(&sci->sc_copied_buffers, 0);
1924
1925 nilfs_drop_collected_inodes(&sci->sc_dirty_files); 1811 nilfs_drop_collected_inodes(&sci->sc_dirty_files);
1926 1812
1927 if (nilfs_doing_gc()) 1813 if (nilfs_doing_gc())
@@ -1979,7 +1865,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1979 "failed to get inode block.\n"); 1865 "failed to get inode block.\n");
1980 return err; 1866 return err;
1981 } 1867 }
1982 nilfs_mdt_mark_buffer_dirty(ibh); 1868 mark_buffer_dirty(ibh);
1983 nilfs_mdt_mark_dirty(ifile); 1869 nilfs_mdt_mark_dirty(ifile);
1984 spin_lock(&nilfs->ns_inode_lock); 1870 spin_lock(&nilfs->ns_inode_lock);
1985 if (likely(!ii->i_bh)) 1871 if (likely(!ii->i_bh))
@@ -1991,8 +1877,7 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1991 1877
1992 clear_bit(NILFS_I_QUEUED, &ii->i_state); 1878 clear_bit(NILFS_I_QUEUED, &ii->i_state);
1993 set_bit(NILFS_I_BUSY, &ii->i_state); 1879 set_bit(NILFS_I_BUSY, &ii->i_state);
1994 list_del(&ii->i_dirty); 1880 list_move_tail(&ii->i_dirty, &sci->sc_dirty_files);
1995 list_add_tail(&ii->i_dirty, &sci->sc_dirty_files);
1996 } 1881 }
1997 spin_unlock(&nilfs->ns_inode_lock); 1882 spin_unlock(&nilfs->ns_inode_lock);
1998 1883
@@ -2014,8 +1899,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
2014 clear_bit(NILFS_I_BUSY, &ii->i_state); 1899 clear_bit(NILFS_I_BUSY, &ii->i_state);
2015 brelse(ii->i_bh); 1900 brelse(ii->i_bh);
2016 ii->i_bh = NULL; 1901 ii->i_bh = NULL;
2017 list_del(&ii->i_dirty); 1902 list_move_tail(&ii->i_dirty, &ti->ti_garbage);
2018 list_add_tail(&ii->i_dirty, &ti->ti_garbage);
2019 } 1903 }
2020 spin_unlock(&nilfs->ns_inode_lock); 1904 spin_unlock(&nilfs->ns_inode_lock);
2021} 1905}
@@ -2026,7 +1910,6 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
2026static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) 1910static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2027{ 1911{
2028 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 1912 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2029 struct page *failed_page;
2030 int err; 1913 int err;
2031 1914
2032 sci->sc_stage.scnt = NILFS_ST_INIT; 1915 sci->sc_stage.scnt = NILFS_ST_INIT;
@@ -2081,11 +1964,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2081 nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); 1964 nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
2082 1965
2083 /* Write partial segments */ 1966 /* Write partial segments */
2084 err = nilfs_segctor_prepare_write(sci, &failed_page); 1967 nilfs_segctor_prepare_write(sci);
2085 if (err) {
2086 nilfs_abort_logs(&sci->sc_segbufs, failed_page, err);
2087 goto failed_to_write;
2088 }
2089 1968
2090 nilfs_add_checksums_on_logs(&sci->sc_segbufs, 1969 nilfs_add_checksums_on_logs(&sci->sc_segbufs,
2091 nilfs->ns_crc_seed); 1970 nilfs->ns_crc_seed);
@@ -2687,7 +2566,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
2687 INIT_LIST_HEAD(&sci->sc_segbufs); 2566 INIT_LIST_HEAD(&sci->sc_segbufs);
2688 INIT_LIST_HEAD(&sci->sc_write_logs); 2567 INIT_LIST_HEAD(&sci->sc_write_logs);
2689 INIT_LIST_HEAD(&sci->sc_gc_inodes); 2568 INIT_LIST_HEAD(&sci->sc_gc_inodes);
2690 INIT_LIST_HEAD(&sci->sc_copied_buffers);
2691 init_timer(&sci->sc_timer); 2569 init_timer(&sci->sc_timer);
2692 2570
2693 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; 2571 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
@@ -2741,8 +2619,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2741 if (flag || !nilfs_segctor_confirm(sci)) 2619 if (flag || !nilfs_segctor_confirm(sci))
2742 nilfs_segctor_write_out(sci); 2620 nilfs_segctor_write_out(sci);
2743 2621
2744 WARN_ON(!list_empty(&sci->sc_copied_buffers));
2745
2746 if (!list_empty(&sci->sc_dirty_files)) { 2622 if (!list_empty(&sci->sc_dirty_files)) {
2747 nilfs_warning(sci->sc_super, __func__, 2623 nilfs_warning(sci->sc_super, __func__,
2748 "dirty file(s) after the final construction\n"); 2624 "dirty file(s) after the final construction\n");
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 6c02a86745fb..38a1d0013314 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -92,7 +92,6 @@ struct nilfs_segsum_pointer {
92 * @sc_nblk_inc: Block count of current generation 92 * @sc_nblk_inc: Block count of current generation
93 * @sc_dirty_files: List of files to be written 93 * @sc_dirty_files: List of files to be written
94 * @sc_gc_inodes: List of GC inodes having blocks to be written 94 * @sc_gc_inodes: List of GC inodes having blocks to be written
95 * @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data
96 * @sc_freesegs: array of segment numbers to be freed 95 * @sc_freesegs: array of segment numbers to be freed
97 * @sc_nfreesegs: number of segments on @sc_freesegs 96 * @sc_nfreesegs: number of segments on @sc_freesegs
98 * @sc_dsync_inode: inode whose data pages are written for a sync operation 97 * @sc_dsync_inode: inode whose data pages are written for a sync operation
@@ -136,7 +135,6 @@ struct nilfs_sc_info {
136 135
137 struct list_head sc_dirty_files; 136 struct list_head sc_dirty_files;
138 struct list_head sc_gc_inodes; 137 struct list_head sc_gc_inodes;
139 struct list_head sc_copied_buffers;
140 138
141 __u64 *sc_freesegs; 139 __u64 *sc_freesegs;
142 size_t sc_nfreesegs; 140 size_t sc_nfreesegs;
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 1d6f488ccae8..0a0aba617d8a 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -33,7 +33,9 @@
33 33
34struct nilfs_sufile_info { 34struct nilfs_sufile_info {
35 struct nilfs_mdt_info mi; 35 struct nilfs_mdt_info mi;
36 unsigned long ncleansegs; 36 unsigned long ncleansegs;/* number of clean segments */
37 __u64 allocmin; /* lower limit of allocatable segment range */
38 __u64 allocmax; /* upper limit of allocatable segment range */
37}; 39};
38 40
39static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile) 41static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile)
@@ -96,6 +98,13 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum,
96 create, NULL, bhp); 98 create, NULL, bhp);
97} 99}
98 100
101static int nilfs_sufile_delete_segment_usage_block(struct inode *sufile,
102 __u64 segnum)
103{
104 return nilfs_mdt_delete_block(sufile,
105 nilfs_sufile_get_blkoff(sufile, segnum));
106}
107
99static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, 108static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
100 u64 ncleanadd, u64 ndirtyadd) 109 u64 ncleanadd, u64 ndirtyadd)
101{ 110{
@@ -108,7 +117,7 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
108 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); 117 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
109 kunmap_atomic(kaddr, KM_USER0); 118 kunmap_atomic(kaddr, KM_USER0);
110 119
111 nilfs_mdt_mark_buffer_dirty(header_bh); 120 mark_buffer_dirty(header_bh);
112} 121}
113 122
114/** 123/**
@@ -248,6 +257,35 @@ int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
248} 257}
249 258
250/** 259/**
260 * nilfs_sufile_set_alloc_range - limit range of segment to be allocated
261 * @sufile: inode of segment usage file
262 * @start: minimum segment number of allocatable region (inclusive)
263 * @end: maximum segment number of allocatable region (inclusive)
264 *
265 * Return Value: On success, 0 is returned. On error, one of the
266 * following negative error codes is returned.
267 *
268 * %-ERANGE - invalid segment region
269 */
270int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end)
271{
272 struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
273 __u64 nsegs;
274 int ret = -ERANGE;
275
276 down_write(&NILFS_MDT(sufile)->mi_sem);
277 nsegs = nilfs_sufile_get_nsegments(sufile);
278
279 if (start <= end && end < nsegs) {
280 sui->allocmin = start;
281 sui->allocmax = end;
282 ret = 0;
283 }
284 up_write(&NILFS_MDT(sufile)->mi_sem);
285 return ret;
286}
287
288/**
251 * nilfs_sufile_alloc - allocate a segment 289 * nilfs_sufile_alloc - allocate a segment
252 * @sufile: inode of segment usage file 290 * @sufile: inode of segment usage file
253 * @segnump: pointer to segment number 291 * @segnump: pointer to segment number
@@ -269,11 +307,12 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
269 struct buffer_head *header_bh, *su_bh; 307 struct buffer_head *header_bh, *su_bh;
270 struct nilfs_sufile_header *header; 308 struct nilfs_sufile_header *header;
271 struct nilfs_segment_usage *su; 309 struct nilfs_segment_usage *su;
310 struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
272 size_t susz = NILFS_MDT(sufile)->mi_entry_size; 311 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
273 __u64 segnum, maxsegnum, last_alloc; 312 __u64 segnum, maxsegnum, last_alloc;
274 void *kaddr; 313 void *kaddr;
275 unsigned long nsegments, ncleansegs, nsus; 314 unsigned long nsegments, ncleansegs, nsus, cnt;
276 int ret, i, j; 315 int ret, j;
277 316
278 down_write(&NILFS_MDT(sufile)->mi_sem); 317 down_write(&NILFS_MDT(sufile)->mi_sem);
279 318
@@ -287,13 +326,31 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
287 kunmap_atomic(kaddr, KM_USER0); 326 kunmap_atomic(kaddr, KM_USER0);
288 327
289 nsegments = nilfs_sufile_get_nsegments(sufile); 328 nsegments = nilfs_sufile_get_nsegments(sufile);
329 maxsegnum = sui->allocmax;
290 segnum = last_alloc + 1; 330 segnum = last_alloc + 1;
291 maxsegnum = nsegments - 1; 331 if (segnum < sui->allocmin || segnum > sui->allocmax)
292 for (i = 0; i < nsegments; i += nsus) { 332 segnum = sui->allocmin;
293 if (segnum >= nsegments) { 333
294 /* wrap around */ 334 for (cnt = 0; cnt < nsegments; cnt += nsus) {
295 segnum = 0; 335 if (segnum > maxsegnum) {
296 maxsegnum = last_alloc; 336 if (cnt < sui->allocmax - sui->allocmin + 1) {
337 /*
338 * wrap around in the limited region.
339 * if allocation started from
340 * sui->allocmin, this never happens.
341 */
342 segnum = sui->allocmin;
343 maxsegnum = last_alloc;
344 } else if (segnum > sui->allocmin &&
345 sui->allocmax + 1 < nsegments) {
346 segnum = sui->allocmax + 1;
347 maxsegnum = nsegments - 1;
348 } else if (sui->allocmin > 0) {
349 segnum = 0;
350 maxsegnum = sui->allocmin - 1;
351 } else {
352 break; /* never happens */
353 }
297 } 354 }
298 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, 355 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1,
299 &su_bh); 356 &su_bh);
@@ -319,9 +376,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
319 header->sh_last_alloc = cpu_to_le64(segnum); 376 header->sh_last_alloc = cpu_to_le64(segnum);
320 kunmap_atomic(kaddr, KM_USER0); 377 kunmap_atomic(kaddr, KM_USER0);
321 378
322 NILFS_SUI(sufile)->ncleansegs--; 379 sui->ncleansegs--;
323 nilfs_mdt_mark_buffer_dirty(header_bh); 380 mark_buffer_dirty(header_bh);
324 nilfs_mdt_mark_buffer_dirty(su_bh); 381 mark_buffer_dirty(su_bh);
325 nilfs_mdt_mark_dirty(sufile); 382 nilfs_mdt_mark_dirty(sufile);
326 brelse(su_bh); 383 brelse(su_bh);
327 *segnump = segnum; 384 *segnump = segnum;
@@ -364,7 +421,7 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
364 nilfs_sufile_mod_counter(header_bh, -1, 1); 421 nilfs_sufile_mod_counter(header_bh, -1, 1);
365 NILFS_SUI(sufile)->ncleansegs--; 422 NILFS_SUI(sufile)->ncleansegs--;
366 423
367 nilfs_mdt_mark_buffer_dirty(su_bh); 424 mark_buffer_dirty(su_bh);
368 nilfs_mdt_mark_dirty(sufile); 425 nilfs_mdt_mark_dirty(sufile);
369} 426}
370 427
@@ -395,7 +452,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
395 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); 452 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
396 NILFS_SUI(sufile)->ncleansegs -= clean; 453 NILFS_SUI(sufile)->ncleansegs -= clean;
397 454
398 nilfs_mdt_mark_buffer_dirty(su_bh); 455 mark_buffer_dirty(su_bh);
399 nilfs_mdt_mark_dirty(sufile); 456 nilfs_mdt_mark_dirty(sufile);
400} 457}
401 458
@@ -421,7 +478,7 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
421 sudirty = nilfs_segment_usage_dirty(su); 478 sudirty = nilfs_segment_usage_dirty(su);
422 nilfs_segment_usage_set_clean(su); 479 nilfs_segment_usage_set_clean(su);
423 kunmap_atomic(kaddr, KM_USER0); 480 kunmap_atomic(kaddr, KM_USER0);
424 nilfs_mdt_mark_buffer_dirty(su_bh); 481 mark_buffer_dirty(su_bh);
425 482
426 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); 483 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
427 NILFS_SUI(sufile)->ncleansegs++; 484 NILFS_SUI(sufile)->ncleansegs++;
@@ -441,7 +498,7 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum)
441 498
442 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); 499 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh);
443 if (!ret) { 500 if (!ret) {
444 nilfs_mdt_mark_buffer_dirty(bh); 501 mark_buffer_dirty(bh);
445 nilfs_mdt_mark_dirty(sufile); 502 nilfs_mdt_mark_dirty(sufile);
446 brelse(bh); 503 brelse(bh);
447 } 504 }
@@ -476,7 +533,7 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
476 su->su_nblocks = cpu_to_le32(nblocks); 533 su->su_nblocks = cpu_to_le32(nblocks);
477 kunmap_atomic(kaddr, KM_USER0); 534 kunmap_atomic(kaddr, KM_USER0);
478 535
479 nilfs_mdt_mark_buffer_dirty(bh); 536 mark_buffer_dirty(bh);
480 nilfs_mdt_mark_dirty(sufile); 537 nilfs_mdt_mark_dirty(sufile);
481 brelse(bh); 538 brelse(bh);
482 539
@@ -505,7 +562,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat)
505{ 562{
506 struct buffer_head *header_bh; 563 struct buffer_head *header_bh;
507 struct nilfs_sufile_header *header; 564 struct nilfs_sufile_header *header;
508 struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); 565 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
509 void *kaddr; 566 void *kaddr;
510 int ret; 567 int ret;
511 568
@@ -555,11 +612,183 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
555 nilfs_sufile_mod_counter(header_bh, -1, 0); 612 nilfs_sufile_mod_counter(header_bh, -1, 0);
556 NILFS_SUI(sufile)->ncleansegs--; 613 NILFS_SUI(sufile)->ncleansegs--;
557 } 614 }
558 nilfs_mdt_mark_buffer_dirty(su_bh); 615 mark_buffer_dirty(su_bh);
559 nilfs_mdt_mark_dirty(sufile); 616 nilfs_mdt_mark_dirty(sufile);
560} 617}
561 618
562/** 619/**
620 * nilfs_sufile_truncate_range - truncate range of segment array
621 * @sufile: inode of segment usage file
622 * @start: start segment number (inclusive)
623 * @end: end segment number (inclusive)
624 *
625 * Return Value: On success, 0 is returned. On error, one of the
626 * following negative error codes is returned.
627 *
628 * %-EIO - I/O error.
629 *
630 * %-ENOMEM - Insufficient amount of memory available.
631 *
632 * %-EINVAL - Invalid number of segments specified
633 *
634 * %-EBUSY - Dirty or active segments are present in the range
635 */
636static int nilfs_sufile_truncate_range(struct inode *sufile,
637 __u64 start, __u64 end)
638{
639 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
640 struct buffer_head *header_bh;
641 struct buffer_head *su_bh;
642 struct nilfs_segment_usage *su, *su2;
643 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
644 unsigned long segusages_per_block;
645 unsigned long nsegs, ncleaned;
646 __u64 segnum;
647 void *kaddr;
648 ssize_t n, nc;
649 int ret;
650 int j;
651
652 nsegs = nilfs_sufile_get_nsegments(sufile);
653
654 ret = -EINVAL;
655 if (start > end || start >= nsegs)
656 goto out;
657
658 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
659 if (ret < 0)
660 goto out;
661
662 segusages_per_block = nilfs_sufile_segment_usages_per_block(sufile);
663 ncleaned = 0;
664
665 for (segnum = start; segnum <= end; segnum += n) {
666 n = min_t(unsigned long,
667 segusages_per_block -
668 nilfs_sufile_get_offset(sufile, segnum),
669 end - segnum + 1);
670 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0,
671 &su_bh);
672 if (ret < 0) {
673 if (ret != -ENOENT)
674 goto out_header;
675 /* hole */
676 continue;
677 }
678 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
679 su = nilfs_sufile_block_get_segment_usage(
680 sufile, segnum, su_bh, kaddr);
681 su2 = su;
682 for (j = 0; j < n; j++, su = (void *)su + susz) {
683 if ((le32_to_cpu(su->su_flags) &
684 ~(1UL << NILFS_SEGMENT_USAGE_ERROR)) ||
685 nilfs_segment_is_active(nilfs, segnum + j)) {
686 ret = -EBUSY;
687 kunmap_atomic(kaddr, KM_USER0);
688 brelse(su_bh);
689 goto out_header;
690 }
691 }
692 nc = 0;
693 for (su = su2, j = 0; j < n; j++, su = (void *)su + susz) {
694 if (nilfs_segment_usage_error(su)) {
695 nilfs_segment_usage_set_clean(su);
696 nc++;
697 }
698 }
699 kunmap_atomic(kaddr, KM_USER0);
700 if (nc > 0) {
701 mark_buffer_dirty(su_bh);
702 ncleaned += nc;
703 }
704 brelse(su_bh);
705
706 if (n == segusages_per_block) {
707 /* make hole */
708 nilfs_sufile_delete_segment_usage_block(sufile, segnum);
709 }
710 }
711 ret = 0;
712
713out_header:
714 if (ncleaned > 0) {
715 NILFS_SUI(sufile)->ncleansegs += ncleaned;
716 nilfs_sufile_mod_counter(header_bh, ncleaned, 0);
717 nilfs_mdt_mark_dirty(sufile);
718 }
719 brelse(header_bh);
720out:
721 return ret;
722}
723
724/**
725 * nilfs_sufile_resize - resize segment array
726 * @sufile: inode of segment usage file
727 * @newnsegs: new number of segments
728 *
729 * Return Value: On success, 0 is returned. On error, one of the
730 * following negative error codes is returned.
731 *
732 * %-EIO - I/O error.
733 *
734 * %-ENOMEM - Insufficient amount of memory available.
735 *
736 * %-ENOSPC - Enough free space is not left for shrinking
737 *
738 * %-EBUSY - Dirty or active segments exist in the region to be truncated
739 */
740int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs)
741{
742 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
743 struct buffer_head *header_bh;
744 struct nilfs_sufile_header *header;
745 struct nilfs_sufile_info *sui = NILFS_SUI(sufile);
746 void *kaddr;
747 unsigned long nsegs, nrsvsegs;
748 int ret = 0;
749
750 down_write(&NILFS_MDT(sufile)->mi_sem);
751
752 nsegs = nilfs_sufile_get_nsegments(sufile);
753 if (nsegs == newnsegs)
754 goto out;
755
756 ret = -ENOSPC;
757 nrsvsegs = nilfs_nrsvsegs(nilfs, newnsegs);
758 if (newnsegs < nsegs && nsegs - newnsegs + nrsvsegs > sui->ncleansegs)
759 goto out;
760
761 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
762 if (ret < 0)
763 goto out;
764
765 if (newnsegs > nsegs) {
766 sui->ncleansegs += newnsegs - nsegs;
767 } else /* newnsegs < nsegs */ {
768 ret = nilfs_sufile_truncate_range(sufile, newnsegs, nsegs - 1);
769 if (ret < 0)
770 goto out_header;
771
772 sui->ncleansegs -= nsegs - newnsegs;
773 }
774
775 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
776 header = kaddr + bh_offset(header_bh);
777 header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs);
778 kunmap_atomic(kaddr, KM_USER0);
779
780 mark_buffer_dirty(header_bh);
781 nilfs_mdt_mark_dirty(sufile);
782 nilfs_set_nsegments(nilfs, newnsegs);
783
784out_header:
785 brelse(header_bh);
786out:
787 up_write(&NILFS_MDT(sufile)->mi_sem);
788 return ret;
789}
790
791/**
563 * nilfs_sufile_get_suinfo - 792 * nilfs_sufile_get_suinfo -
564 * @sufile: inode of segment usage file 793 * @sufile: inode of segment usage file
565 * @segnum: segment number to start looking 794 * @segnum: segment number to start looking
@@ -583,7 +812,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
583 struct nilfs_segment_usage *su; 812 struct nilfs_segment_usage *su;
584 struct nilfs_suinfo *si = buf; 813 struct nilfs_suinfo *si = buf;
585 size_t susz = NILFS_MDT(sufile)->mi_entry_size; 814 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
586 struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); 815 struct the_nilfs *nilfs = sufile->i_sb->s_fs_info;
587 void *kaddr; 816 void *kaddr;
588 unsigned long nsegs, segusages_per_block; 817 unsigned long nsegs, segusages_per_block;
589 ssize_t n; 818 ssize_t n;
@@ -679,6 +908,9 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize,
679 kunmap_atomic(kaddr, KM_USER0); 908 kunmap_atomic(kaddr, KM_USER0);
680 brelse(header_bh); 909 brelse(header_bh);
681 910
911 sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1;
912 sui->allocmin = 0;
913
682 unlock_new_inode(sufile); 914 unlock_new_inode(sufile);
683 out: 915 out:
684 *inodep = sufile; 916 *inodep = sufile;
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index a943fbacb45b..e84bc5b51fc1 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -31,11 +31,12 @@
31 31
32static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) 32static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
33{ 33{
34 return NILFS_I_NILFS(sufile)->ns_nsegments; 34 return ((struct the_nilfs *)sufile->i_sb->s_fs_info)->ns_nsegments;
35} 35}
36 36
37unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile); 37unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile);
38 38
39int nilfs_sufile_set_alloc_range(struct inode *sufile, __u64 start, __u64 end);
39int nilfs_sufile_alloc(struct inode *, __u64 *); 40int nilfs_sufile_alloc(struct inode *, __u64 *);
40int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum); 41int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum);
41int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, 42int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
@@ -61,6 +62,7 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
61void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, 62void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
62 struct buffer_head *); 63 struct buffer_head *);
63 64
65int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs);
64int nilfs_sufile_read(struct super_block *sb, size_t susize, 66int nilfs_sufile_read(struct super_block *sb, size_t susize,
65 struct nilfs_inode *raw_inode, struct inode **inodep); 67 struct nilfs_inode *raw_inode, struct inode **inodep);
66 68
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 062cca065195..8351c44a7320 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -56,6 +56,7 @@
56#include "btnode.h" 56#include "btnode.h"
57#include "page.h" 57#include "page.h"
58#include "cpfile.h" 58#include "cpfile.h"
59#include "sufile.h" /* nilfs_sufile_resize(), nilfs_sufile_set_alloc_range() */
59#include "ifile.h" 60#include "ifile.h"
60#include "dat.h" 61#include "dat.h"
61#include "segment.h" 62#include "segment.h"
@@ -165,7 +166,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
165 ii->i_state = 0; 166 ii->i_state = 0;
166 ii->i_cno = 0; 167 ii->i_cno = 0;
167 ii->vfs_inode.i_version = 1; 168 ii->vfs_inode.i_version = 1;
168 nilfs_btnode_cache_init(&ii->i_btnode_cache, sb->s_bdi); 169 nilfs_mapping_init(&ii->i_btnode_cache, &ii->vfs_inode, sb->s_bdi);
169 return &ii->vfs_inode; 170 return &ii->vfs_inode;
170} 171}
171 172
@@ -347,6 +348,134 @@ int nilfs_cleanup_super(struct super_block *sb)
347 return ret; 348 return ret;
348} 349}
349 350
351/**
352 * nilfs_move_2nd_super - relocate secondary super block
353 * @sb: super block instance
354 * @sb2off: new offset of the secondary super block (in bytes)
355 */
356static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
357{
358 struct the_nilfs *nilfs = sb->s_fs_info;
359 struct buffer_head *nsbh;
360 struct nilfs_super_block *nsbp;
361 sector_t blocknr, newblocknr;
362 unsigned long offset;
363 int sb2i = -1; /* array index of the secondary superblock */
364 int ret = 0;
365
366 /* nilfs->ns_sem must be locked by the caller. */
367 if (nilfs->ns_sbh[1] &&
368 nilfs->ns_sbh[1]->b_blocknr > nilfs->ns_first_data_block) {
369 sb2i = 1;
370 blocknr = nilfs->ns_sbh[1]->b_blocknr;
371 } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) {
372 sb2i = 0;
373 blocknr = nilfs->ns_sbh[0]->b_blocknr;
374 }
375 if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off)
376 goto out; /* super block location is unchanged */
377
378 /* Get new super block buffer */
379 newblocknr = sb2off >> nilfs->ns_blocksize_bits;
380 offset = sb2off & (nilfs->ns_blocksize - 1);
381 nsbh = sb_getblk(sb, newblocknr);
382 if (!nsbh) {
383 printk(KERN_WARNING
384 "NILFS warning: unable to move secondary superblock "
385 "to block %llu\n", (unsigned long long)newblocknr);
386 ret = -EIO;
387 goto out;
388 }
389 nsbp = (void *)nsbh->b_data + offset;
390 memset(nsbp, 0, nilfs->ns_blocksize);
391
392 if (sb2i >= 0) {
393 memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
394 brelse(nilfs->ns_sbh[sb2i]);
395 nilfs->ns_sbh[sb2i] = nsbh;
396 nilfs->ns_sbp[sb2i] = nsbp;
397 } else if (nilfs->ns_sbh[0]->b_blocknr < nilfs->ns_first_data_block) {
398 /* secondary super block will be restored to index 1 */
399 nilfs->ns_sbh[1] = nsbh;
400 nilfs->ns_sbp[1] = nsbp;
401 } else {
402 brelse(nsbh);
403 }
404out:
405 return ret;
406}
407
408/**
409 * nilfs_resize_fs - resize the filesystem
410 * @sb: super block instance
411 * @newsize: new size of the filesystem (in bytes)
412 */
413int nilfs_resize_fs(struct super_block *sb, __u64 newsize)
414{
415 struct the_nilfs *nilfs = sb->s_fs_info;
416 struct nilfs_super_block **sbp;
417 __u64 devsize, newnsegs;
418 loff_t sb2off;
419 int ret;
420
421 ret = -ERANGE;
422 devsize = i_size_read(sb->s_bdev->bd_inode);
423 if (newsize > devsize)
424 goto out;
425
426 /*
427 * Write lock is required to protect some functions depending
428 * on the number of segments, the number of reserved segments,
429 * and so forth.
430 */
431 down_write(&nilfs->ns_segctor_sem);
432
433 sb2off = NILFS_SB2_OFFSET_BYTES(newsize);
434 newnsegs = sb2off >> nilfs->ns_blocksize_bits;
435 do_div(newnsegs, nilfs->ns_blocks_per_segment);
436
437 ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs);
438 up_write(&nilfs->ns_segctor_sem);
439 if (ret < 0)
440 goto out;
441
442 ret = nilfs_construct_segment(sb);
443 if (ret < 0)
444 goto out;
445
446 down_write(&nilfs->ns_sem);
447 nilfs_move_2nd_super(sb, sb2off);
448 ret = -EIO;
449 sbp = nilfs_prepare_super(sb, 0);
450 if (likely(sbp)) {
451 nilfs_set_log_cursor(sbp[0], nilfs);
452 /*
453 * Drop NILFS_RESIZE_FS flag for compatibility with
454 * mount-time resize which may be implemented in a
455 * future release.
456 */
457 sbp[0]->s_state = cpu_to_le16(le16_to_cpu(sbp[0]->s_state) &
458 ~NILFS_RESIZE_FS);
459 sbp[0]->s_dev_size = cpu_to_le64(newsize);
460 sbp[0]->s_nsegments = cpu_to_le64(nilfs->ns_nsegments);
461 if (sbp[1])
462 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
463 ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
464 }
465 up_write(&nilfs->ns_sem);
466
467 /*
468 * Reset the range of allocatable segments last. This order
469 * is important in the case of expansion because the secondary
470 * superblock must be protected from log write until migration
471 * completes.
472 */
473 if (!ret)
474 nilfs_sufile_set_alloc_range(nilfs->ns_sufile, 0, newnsegs - 1);
475out:
476 return ret;
477}
478
350static void nilfs_put_super(struct super_block *sb) 479static void nilfs_put_super(struct super_block *sb)
351{ 480{
352 struct the_nilfs *nilfs = sb->s_fs_info; 481 struct the_nilfs *nilfs = sb->s_fs_info;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index d2acd1a651f3..d32714094375 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -363,6 +363,24 @@ static unsigned long long nilfs_max_size(unsigned int blkbits)
363 return res; 363 return res;
364} 364}
365 365
366/**
367 * nilfs_nrsvsegs - calculate the number of reserved segments
368 * @nilfs: nilfs object
369 * @nsegs: total number of segments
370 */
371unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs)
372{
373 return max_t(unsigned long, NILFS_MIN_NRSVSEGS,
374 DIV_ROUND_UP(nsegs * nilfs->ns_r_segments_percentage,
375 100));
376}
377
378void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs)
379{
380 nilfs->ns_nsegments = nsegs;
381 nilfs->ns_nrsvsegs = nilfs_nrsvsegs(nilfs, nsegs);
382}
383
366static int nilfs_store_disk_layout(struct the_nilfs *nilfs, 384static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
367 struct nilfs_super_block *sbp) 385 struct nilfs_super_block *sbp)
368{ 386{
@@ -389,13 +407,9 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs,
389 } 407 }
390 408
391 nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block); 409 nilfs->ns_first_data_block = le64_to_cpu(sbp->s_first_data_block);
392 nilfs->ns_nsegments = le64_to_cpu(sbp->s_nsegments);
393 nilfs->ns_r_segments_percentage = 410 nilfs->ns_r_segments_percentage =
394 le32_to_cpu(sbp->s_r_segments_percentage); 411 le32_to_cpu(sbp->s_r_segments_percentage);
395 nilfs->ns_nrsvsegs = 412 nilfs_set_nsegments(nilfs, le64_to_cpu(sbp->s_nsegments));
396 max_t(unsigned long, NILFS_MIN_NRSVSEGS,
397 DIV_ROUND_UP(nilfs->ns_nsegments *
398 nilfs->ns_r_segments_percentage, 100));
399 nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed); 413 nilfs->ns_crc_seed = le32_to_cpu(sbp->s_crc_seed);
400 return 0; 414 return 0;
401} 415}
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index f4968145c2a3..9992b11312ff 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -268,6 +268,8 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev);
268void destroy_nilfs(struct the_nilfs *nilfs); 268void destroy_nilfs(struct the_nilfs *nilfs);
269int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data); 269int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data);
270int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb); 270int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
271unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs);
272void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs);
271int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t); 273int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t);
272int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); 274int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
273struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno); 275struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 5d32749c896d..3c7606cff1ab 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3706,7 +3706,7 @@ int ocfs2_refcount_cow_xattr(struct inode *inode,
3706 context->cow_start = cow_start; 3706 context->cow_start = cow_start;
3707 context->cow_len = cow_len; 3707 context->cow_len = cow_len;
3708 context->ref_tree = ref_tree; 3708 context->ref_tree = ref_tree;
3709 context->ref_root_bh = ref_root_bh;; 3709 context->ref_root_bh = ref_root_bh;
3710 context->cow_object = xv; 3710 context->cow_object = xv;
3711 3711
3712 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd; 3712 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd;
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index ce4f62440425..af9fdf046769 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -565,7 +565,7 @@ static bool ldm_validate_partition_table(struct parsed_partitions *state)
565 565
566 data = read_part_sector(state, 0, &sect); 566 data = read_part_sector(state, 0, &sect);
567 if (!data) { 567 if (!data) {
568 ldm_crit ("Disk read failed."); 568 ldm_info ("Disk read failed.");
569 return false; 569 return false;
570 } 570 }
571 571
@@ -1335,6 +1335,11 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1335 1335
1336 list_add_tail (&f->list, frags); 1336 list_add_tail (&f->list, frags);
1337found: 1337found:
1338 if (rec >= f->num) {
1339 ldm_error("REC value (%d) exceeds NUM value (%d)", rec, f->num);
1340 return false;
1341 }
1342
1338 if (f->map & (1 << rec)) { 1343 if (f->map & (1 << rec)) {
1339 ldm_error ("Duplicate VBLK, part %d.", rec); 1344 ldm_error ("Duplicate VBLK, part %d.", rec);
1340 f->map &= 0x7F; /* Mark the group as broken */ 1345 f->map &= 0x7F; /* Mark the group as broken */
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index f835a25625ff..f2c3ff20ea68 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -152,21 +152,27 @@ EXPORT_SYMBOL_GPL(pstore_register);
152void pstore_get_records(void) 152void pstore_get_records(void)
153{ 153{
154 struct pstore_info *psi = psinfo; 154 struct pstore_info *psi = psinfo;
155 size_t size; 155 ssize_t size;
156 u64 id; 156 u64 id;
157 enum pstore_type_id type; 157 enum pstore_type_id type;
158 struct timespec time; 158 struct timespec time;
159 int failed = 0; 159 int failed = 0, rc;
160 160
161 if (!psi) 161 if (!psi)
162 return; 162 return;
163 163
164 mutex_lock(&psinfo->buf_mutex); 164 mutex_lock(&psinfo->buf_mutex);
165 rc = psi->open(psi);
166 if (rc)
167 goto out;
168
165 while ((size = psi->read(&id, &type, &time)) > 0) { 169 while ((size = psi->read(&id, &type, &time)) > 0) {
166 if (pstore_mkfile(type, psi->name, id, psi->buf, size, 170 if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
167 time, psi->erase)) 171 time, psi->erase))
168 failed++; 172 failed++;
169 } 173 }
174 psi->close(psi);
175out:
170 mutex_unlock(&psinfo->buf_mutex); 176 mutex_unlock(&psinfo->buf_mutex);
171 177
172 if (failed) 178 if (failed)
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index efc309fa3035..7797218d0b30 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -42,7 +42,7 @@ config SQUASHFS_LZO
42 select LZO_DECOMPRESS 42 select LZO_DECOMPRESS
43 help 43 help
44 Saying Y here includes support for reading Squashfs file systems 44 Saying Y here includes support for reading Squashfs file systems
45 compressed with LZO compresssion. LZO compression is mainly 45 compressed with LZO compression. LZO compression is mainly
46 aimed at embedded systems with slower CPUs where the overheads 46 aimed at embedded systems with slower CPUs where the overheads
47 of zlib are too high. 47 of zlib are too high.
48 48
@@ -57,7 +57,7 @@ config SQUASHFS_XZ
57 select XZ_DEC 57 select XZ_DEC
58 help 58 help
59 Saying Y here includes support for reading Squashfs file systems 59 Saying Y here includes support for reading Squashfs file systems
60 compressed with XZ compresssion. XZ gives better compression than 60 compressed with XZ compression. XZ gives better compression than
61 the default zlib compression, at the expense of greater CPU and 61 the default zlib compression, at the expense of greater CPU and
62 memory overhead. 62 memory overhead.
63 63
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index c37b520132ff..4b5a3fbb1f1f 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -29,7 +29,7 @@
29 * plus functions layered ontop of the generic cache implementation to 29 * plus functions layered ontop of the generic cache implementation to
30 * access the metadata and fragment caches. 30 * access the metadata and fragment caches.
31 * 31 *
32 * To avoid out of memory and fragmentation isssues with vmalloc the cache 32 * To avoid out of memory and fragmentation issues with vmalloc the cache
33 * uses sequences of kmalloced PAGE_CACHE_SIZE buffers. 33 * uses sequences of kmalloced PAGE_CACHE_SIZE buffers.
34 * 34 *
35 * It should be noted that the cache is not used for file datablocks, these 35 * It should be noted that the cache is not used for file datablocks, these
diff --git a/fs/super.c b/fs/super.c
index 8a06881b1920..c04f7e0b7ed2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -948,8 +948,7 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
948 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE 948 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
949 * but s_maxbytes was an unsigned long long for many releases. Throw 949 * but s_maxbytes was an unsigned long long for many releases. Throw
950 * this warning for a little while to try and catch filesystems that 950 * this warning for a little while to try and catch filesystems that
951 * violate this rule. This warning should be either removed or 951 * violate this rule.
952 * converted to a BUG() in 2.6.34.
953 */ 952 */
954 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " 953 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
955 "negative value (%lld)\n", type->name, sb->s_maxbytes); 954 "negative value (%lld)\n", type->name, sb->s_maxbytes);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index da3fefe91a8f..1ad8c93c1b85 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -24,13 +24,6 @@
24 24
25#include "sysfs.h" 25#include "sysfs.h"
26 26
27/* used in crash dumps to help with debugging */
28static char last_sysfs_file[PATH_MAX];
29void sysfs_printk_last_file(void)
30{
31 printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file);
32}
33
34/* 27/*
35 * There's one sysfs_buffer for each open file and one 28 * There's one sysfs_buffer for each open file and one
36 * sysfs_open_dirent for each sysfs_dirent with one or more open 29 * sysfs_open_dirent for each sysfs_dirent with one or more open
@@ -337,11 +330,6 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
337 struct sysfs_buffer *buffer; 330 struct sysfs_buffer *buffer;
338 const struct sysfs_ops *ops; 331 const struct sysfs_ops *ops;
339 int error = -EACCES; 332 int error = -EACCES;
340 char *p;
341
342 p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
343 if (!IS_ERR(p))
344 memmove(last_sysfs_file, p, strlen(p) + 1);
345 333
346 /* need attr_sd for attr and ops, its parent for kobj */ 334 /* need attr_sd for attr and ops, its parent for kobj */
347 if (!sysfs_get_active(attr_sd)) 335 if (!sysfs_get_active(attr_sd))
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index c8769dc222d8..194414f8298c 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -101,9 +101,9 @@ int sysfs_create_group(struct kobject *kobj,
101} 101}
102 102
103/** 103/**
104 * sysfs_update_group - given a directory kobject, create an attribute group 104 * sysfs_update_group - given a directory kobject, update an attribute group
105 * @kobj: The kobject to create the group on 105 * @kobj: The kobject to update the group on
106 * @grp: The attribute group to create 106 * @grp: The attribute group to update
107 * 107 *
108 * This function updates an attribute group. Unlike 108 * This function updates an attribute group. Unlike
109 * sysfs_create_group(), it will explicitly not warn or error if any 109 * sysfs_create_group(), it will explicitly not warn or error if any
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 8c4fc1425b3e..f67acbdda5e8 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -22,16 +22,24 @@
22#include <linux/anon_inodes.h> 22#include <linux/anon_inodes.h>
23#include <linux/timerfd.h> 23#include <linux/timerfd.h>
24#include <linux/syscalls.h> 24#include <linux/syscalls.h>
25#include <linux/rcupdate.h>
25 26
26struct timerfd_ctx { 27struct timerfd_ctx {
27 struct hrtimer tmr; 28 struct hrtimer tmr;
28 ktime_t tintv; 29 ktime_t tintv;
30 ktime_t moffs;
29 wait_queue_head_t wqh; 31 wait_queue_head_t wqh;
30 u64 ticks; 32 u64 ticks;
31 int expired; 33 int expired;
32 int clockid; 34 int clockid;
35 struct rcu_head rcu;
36 struct list_head clist;
37 bool might_cancel;
33}; 38};
34 39
40static LIST_HEAD(cancel_list);
41static DEFINE_SPINLOCK(cancel_lock);
42
35/* 43/*
36 * This gets called when the timer event triggers. We set the "expired" 44 * This gets called when the timer event triggers. We set the "expired"
37 * flag, but we do not re-arm the timer (in case it's necessary, 45 * flag, but we do not re-arm the timer (in case it's necessary,
@@ -51,6 +59,63 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
51 return HRTIMER_NORESTART; 59 return HRTIMER_NORESTART;
52} 60}
53 61
62/*
63 * Called when the clock was set to cancel the timers in the cancel
64 * list.
65 */
66void timerfd_clock_was_set(void)
67{
68 ktime_t moffs = ktime_get_monotonic_offset();
69 struct timerfd_ctx *ctx;
70 unsigned long flags;
71
72 rcu_read_lock();
73 list_for_each_entry_rcu(ctx, &cancel_list, clist) {
74 if (!ctx->might_cancel)
75 continue;
76 spin_lock_irqsave(&ctx->wqh.lock, flags);
77 if (ctx->moffs.tv64 != moffs.tv64) {
78 ctx->moffs.tv64 = KTIME_MAX;
79 wake_up_locked(&ctx->wqh);
80 }
81 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
82 }
83 rcu_read_unlock();
84}
85
86static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
87{
88 if (ctx->might_cancel) {
89 ctx->might_cancel = false;
90 spin_lock(&cancel_lock);
91 list_del_rcu(&ctx->clist);
92 spin_unlock(&cancel_lock);
93 }
94}
95
96static bool timerfd_canceled(struct timerfd_ctx *ctx)
97{
98 if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
99 return false;
100 ctx->moffs = ktime_get_monotonic_offset();
101 return true;
102}
103
104static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
105{
106 if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) &&
107 (flags & TFD_TIMER_CANCEL_ON_SET)) {
108 if (!ctx->might_cancel) {
109 ctx->might_cancel = true;
110 spin_lock(&cancel_lock);
111 list_add_rcu(&ctx->clist, &cancel_list);
112 spin_unlock(&cancel_lock);
113 }
114 } else if (ctx->might_cancel) {
115 timerfd_remove_cancel(ctx);
116 }
117}
118
54static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) 119static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
55{ 120{
56 ktime_t remaining; 121 ktime_t remaining;
@@ -59,11 +124,12 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
59 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; 124 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
60} 125}
61 126
62static void timerfd_setup(struct timerfd_ctx *ctx, int flags, 127static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
63 const struct itimerspec *ktmr) 128 const struct itimerspec *ktmr)
64{ 129{
65 enum hrtimer_mode htmode; 130 enum hrtimer_mode htmode;
66 ktime_t texp; 131 ktime_t texp;
132 int clockid = ctx->clockid;
67 133
68 htmode = (flags & TFD_TIMER_ABSTIME) ? 134 htmode = (flags & TFD_TIMER_ABSTIME) ?
69 HRTIMER_MODE_ABS: HRTIMER_MODE_REL; 135 HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
@@ -72,19 +138,24 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
72 ctx->expired = 0; 138 ctx->expired = 0;
73 ctx->ticks = 0; 139 ctx->ticks = 0;
74 ctx->tintv = timespec_to_ktime(ktmr->it_interval); 140 ctx->tintv = timespec_to_ktime(ktmr->it_interval);
75 hrtimer_init(&ctx->tmr, ctx->clockid, htmode); 141 hrtimer_init(&ctx->tmr, clockid, htmode);
76 hrtimer_set_expires(&ctx->tmr, texp); 142 hrtimer_set_expires(&ctx->tmr, texp);
77 ctx->tmr.function = timerfd_tmrproc; 143 ctx->tmr.function = timerfd_tmrproc;
78 if (texp.tv64 != 0) 144 if (texp.tv64 != 0) {
79 hrtimer_start(&ctx->tmr, texp, htmode); 145 hrtimer_start(&ctx->tmr, texp, htmode);
146 if (timerfd_canceled(ctx))
147 return -ECANCELED;
148 }
149 return 0;
80} 150}
81 151
82static int timerfd_release(struct inode *inode, struct file *file) 152static int timerfd_release(struct inode *inode, struct file *file)
83{ 153{
84 struct timerfd_ctx *ctx = file->private_data; 154 struct timerfd_ctx *ctx = file->private_data;
85 155
156 timerfd_remove_cancel(ctx);
86 hrtimer_cancel(&ctx->tmr); 157 hrtimer_cancel(&ctx->tmr);
87 kfree(ctx); 158 kfree_rcu(ctx, rcu);
88 return 0; 159 return 0;
89} 160}
90 161
@@ -118,8 +189,21 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
118 res = -EAGAIN; 189 res = -EAGAIN;
119 else 190 else
120 res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); 191 res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
192
193 /*
194 * If clock has changed, we do not care about the
195 * ticks and we do not rearm the timer. Userspace must
196 * reevaluate anyway.
197 */
198 if (timerfd_canceled(ctx)) {
199 ctx->ticks = 0;
200 ctx->expired = 0;
201 res = -ECANCELED;
202 }
203
121 if (ctx->ticks) { 204 if (ctx->ticks) {
122 ticks = ctx->ticks; 205 ticks = ctx->ticks;
206
123 if (ctx->expired && ctx->tintv.tv64) { 207 if (ctx->expired && ctx->tintv.tv64) {
124 /* 208 /*
125 * If tintv.tv64 != 0, this is a periodic timer that 209 * If tintv.tv64 != 0, this is a periodic timer that
@@ -183,6 +267,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
183 init_waitqueue_head(&ctx->wqh); 267 init_waitqueue_head(&ctx->wqh);
184 ctx->clockid = clockid; 268 ctx->clockid = clockid;
185 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 269 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
270 ctx->moffs = ktime_get_monotonic_offset();
186 271
187 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, 272 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
188 O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); 273 O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
@@ -199,6 +284,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
199 struct file *file; 284 struct file *file;
200 struct timerfd_ctx *ctx; 285 struct timerfd_ctx *ctx;
201 struct itimerspec ktmr, kotmr; 286 struct itimerspec ktmr, kotmr;
287 int ret;
202 288
203 if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) 289 if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
204 return -EFAULT; 290 return -EFAULT;
@@ -213,6 +299,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
213 return PTR_ERR(file); 299 return PTR_ERR(file);
214 ctx = file->private_data; 300 ctx = file->private_data;
215 301
302 timerfd_setup_cancel(ctx, flags);
303
216 /* 304 /*
217 * We need to stop the existing timer before reprogramming 305 * We need to stop the existing timer before reprogramming
218 * it to the new values. 306 * it to the new values.
@@ -240,14 +328,14 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
240 /* 328 /*
241 * Re-program the timer to the new value ... 329 * Re-program the timer to the new value ...
242 */ 330 */
243 timerfd_setup(ctx, flags, &ktmr); 331 ret = timerfd_setup(ctx, flags, &ktmr);
244 332
245 spin_unlock_irq(&ctx->wqh.lock); 333 spin_unlock_irq(&ctx->wqh.lock);
246 fput(file); 334 fput(file);
247 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) 335 if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
248 return -EFAULT; 336 return -EFAULT;
249 337
250 return 0; 338 return ret;
251} 339}
252 340
253SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) 341SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr)
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index e765743cf9f3..b4d791a83207 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -409,7 +409,7 @@ out:
409} 409}
410 410
411/** 411/**
412 * ufs_getfrag_bloc() - `get_block_t' function, interface between UFS and 412 * ufs_getfrag_block() - `get_block_t' function, interface between UFS and
413 * readpage, writepage and so on 413 * readpage, writepage and so on
414 */ 414 */
415 415
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 9ef9ed2cfe2e..52b2b5da566e 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -33,7 +33,6 @@
33#include <linux/migrate.h> 33#include <linux/migrate.h>
34#include <linux/backing-dev.h> 34#include <linux/backing-dev.h>
35#include <linux/freezer.h> 35#include <linux/freezer.h>
36#include <linux/list_sort.h>
37 36
38#include "xfs_sb.h" 37#include "xfs_sb.h"
39#include "xfs_inum.h" 38#include "xfs_inum.h"
@@ -709,6 +708,27 @@ xfs_buf_get_empty(
709 return bp; 708 return bp;
710} 709}
711 710
711/*
712 * Return a buffer allocated as an empty buffer and associated to external
713 * memory via xfs_buf_associate_memory() back to it's empty state.
714 */
715void
716xfs_buf_set_empty(
717 struct xfs_buf *bp,
718 size_t len)
719{
720 if (bp->b_pages)
721 _xfs_buf_free_pages(bp);
722
723 bp->b_pages = NULL;
724 bp->b_page_count = 0;
725 bp->b_addr = NULL;
726 bp->b_file_offset = 0;
727 bp->b_buffer_length = bp->b_count_desired = len;
728 bp->b_bn = XFS_BUF_DADDR_NULL;
729 bp->b_flags &= ~XBF_MAPPED;
730}
731
712static inline struct page * 732static inline struct page *
713mem_to_page( 733mem_to_page(
714 void *addr) 734 void *addr)
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a9a1c4512645..50a7d5fb3b73 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -178,6 +178,7 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
178 xfs_buf_flags_t); 178 xfs_buf_flags_t);
179 179
180extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); 180extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
181extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
181extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); 182extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
182extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); 183extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
183extern void xfs_buf_hold(xfs_buf_t *); 184extern void xfs_buf_hold(xfs_buf_t *);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index b3486dfa5520..54e623bfbb85 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -586,7 +586,8 @@ xfs_file_compat_ioctl(
586 case XFS_IOC_RESVSP_32: 586 case XFS_IOC_RESVSP_32:
587 case XFS_IOC_UNRESVSP_32: 587 case XFS_IOC_UNRESVSP_32:
588 case XFS_IOC_RESVSP64_32: 588 case XFS_IOC_RESVSP64_32:
589 case XFS_IOC_UNRESVSP64_32: { 589 case XFS_IOC_UNRESVSP64_32:
590 case XFS_IOC_ZERO_RANGE_32: {
590 struct xfs_flock64 bf; 591 struct xfs_flock64 bf;
591 592
592 if (xfs_compat_flock64_copyin(&bf, arg)) 593 if (xfs_compat_flock64_copyin(&bf, arg))
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 08b605792a99..80f4060e8970 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -184,6 +184,7 @@ typedef struct compat_xfs_flock64 {
184#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) 184#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64)
185#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) 185#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64)
186#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) 186#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64)
187#define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64)
187 188
188typedef struct compat_xfs_fsop_geom_v1 { 189typedef struct compat_xfs_fsop_geom_v1 {
189 __u32 blocksize; /* filesystem (data) block size */ 190 __u32 blocksize; /* filesystem (data) block size */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 244be9cbfe78..8633521b3b2e 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -70,6 +70,7 @@
70#include <linux/ctype.h> 70#include <linux/ctype.h>
71#include <linux/writeback.h> 71#include <linux/writeback.h>
72#include <linux/capability.h> 72#include <linux/capability.h>
73#include <linux/list_sort.h>
73 74
74#include <asm/page.h> 75#include <asm/page.h>
75#include <asm/div64.h> 76#include <asm/div64.h>
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
index 9f76cceb678d..bd672def95ac 100644
--- a/fs/xfs/linux-2.6/xfs_message.c
+++ b/fs/xfs/linux-2.6/xfs_message.c
@@ -41,23 +41,6 @@ __xfs_printk(
41 printk("%sXFS: %pV\n", level, vaf); 41 printk("%sXFS: %pV\n", level, vaf);
42} 42}
43 43
44void xfs_printk(
45 const char *level,
46 const struct xfs_mount *mp,
47 const char *fmt, ...)
48{
49 struct va_format vaf;
50 va_list args;
51
52 va_start(args, fmt);
53
54 vaf.fmt = fmt;
55 vaf.va = &args;
56
57 __xfs_printk(level, mp, &vaf);
58 va_end(args);
59}
60
61#define define_xfs_printk_level(func, kern_level) \ 44#define define_xfs_printk_level(func, kern_level) \
62void func(const struct xfs_mount *mp, const char *fmt, ...) \ 45void func(const struct xfs_mount *mp, const char *fmt, ...) \
63{ \ 46{ \
@@ -95,8 +78,7 @@ xfs_alert_tag(
95 int do_panic = 0; 78 int do_panic = 0;
96 79
97 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { 80 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
98 xfs_printk(KERN_ALERT, mp, 81 xfs_alert(mp, "Transforming an alert into a BUG.");
99 "XFS: Transforming an alert into a BUG.");
100 do_panic = 1; 82 do_panic = 1;
101 } 83 }
102 84
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
index f1b3fc1b6c4e..7fb7ea007672 100644
--- a/fs/xfs/linux-2.6/xfs_message.h
+++ b/fs/xfs/linux-2.6/xfs_message.h
@@ -3,9 +3,6 @@
3 3
4struct xfs_mount; 4struct xfs_mount;
5 5
6extern void xfs_printk(const char *level, const struct xfs_mount *mp,
7 const char *fmt, ...)
8 __attribute__ ((format (printf, 3, 4)));
9extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) 6extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
10 __attribute__ ((format (printf, 2, 3))); 7 __attribute__ ((format (printf, 2, 3)));
11extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) 8extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
@@ -28,7 +25,9 @@ extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
28extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) 25extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
29 __attribute__ ((format (printf, 2, 3))); 26 __attribute__ ((format (printf, 2, 3)));
30#else 27#else
31static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) 28static inline void
29__attribute__ ((format (printf, 2, 3)))
30xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
32{ 31{
33} 32}
34#endif 33#endif
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index b38e58d02299..b0aa59e51fd0 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1787,10 +1787,6 @@ init_xfs_fs(void)
1787 if (error) 1787 if (error)
1788 goto out_cleanup_procfs; 1788 goto out_cleanup_procfs;
1789 1789
1790 error = xfs_init_workqueues();
1791 if (error)
1792 goto out_sysctl_unregister;
1793
1794 vfs_initquota(); 1790 vfs_initquota();
1795 1791
1796 error = register_filesystem(&xfs_fs_type); 1792 error = register_filesystem(&xfs_fs_type);
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 3e898a48122d..cb1bb2080e44 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -267,6 +267,16 @@ xfs_sync_inode_attr(
267 267
268 error = xfs_iflush(ip, flags); 268 error = xfs_iflush(ip, flags);
269 269
270 /*
271 * We don't want to try again on non-blocking flushes that can't run
272 * again immediately. If an inode really must be written, then that's
273 * what the SYNC_WAIT flag is for.
274 */
275 if (error == EAGAIN) {
276 ASSERT(!(flags & SYNC_WAIT));
277 error = 0;
278 }
279
270 out_unlock: 280 out_unlock:
271 xfs_iunlock(ip, XFS_ILOCK_SHARED); 281 xfs_iunlock(ip, XFS_ILOCK_SHARED);
272 return error; 282 return error;
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 2d0bcb479075..d48b7a579ae1 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -1151,44 +1151,7 @@ TRACE_EVENT(xfs_bunmap,
1151 1151
1152); 1152);
1153 1153
1154#define XFS_BUSY_SYNC \ 1154DECLARE_EVENT_CLASS(xfs_busy_class,
1155 { 0, "async" }, \
1156 { 1, "sync" }
1157
1158TRACE_EVENT(xfs_alloc_busy,
1159 TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno,
1160 xfs_agblock_t agbno, xfs_extlen_t len, int sync),
1161 TP_ARGS(trans, agno, agbno, len, sync),
1162 TP_STRUCT__entry(
1163 __field(dev_t, dev)
1164 __field(struct xfs_trans *, tp)
1165 __field(int, tid)
1166 __field(xfs_agnumber_t, agno)
1167 __field(xfs_agblock_t, agbno)
1168 __field(xfs_extlen_t, len)
1169 __field(int, sync)
1170 ),
1171 TP_fast_assign(
1172 __entry->dev = trans->t_mountp->m_super->s_dev;
1173 __entry->tp = trans;
1174 __entry->tid = trans->t_ticket->t_tid;
1175 __entry->agno = agno;
1176 __entry->agbno = agbno;
1177 __entry->len = len;
1178 __entry->sync = sync;
1179 ),
1180 TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s",
1181 MAJOR(__entry->dev), MINOR(__entry->dev),
1182 __entry->tp,
1183 __entry->tid,
1184 __entry->agno,
1185 __entry->agbno,
1186 __entry->len,
1187 __print_symbolic(__entry->sync, XFS_BUSY_SYNC))
1188
1189);
1190
1191TRACE_EVENT(xfs_alloc_unbusy,
1192 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 1155 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
1193 xfs_agblock_t agbno, xfs_extlen_t len), 1156 xfs_agblock_t agbno, xfs_extlen_t len),
1194 TP_ARGS(mp, agno, agbno, len), 1157 TP_ARGS(mp, agno, agbno, len),
@@ -1210,35 +1173,45 @@ TRACE_EVENT(xfs_alloc_unbusy,
1210 __entry->agbno, 1173 __entry->agbno,
1211 __entry->len) 1174 __entry->len)
1212); 1175);
1176#define DEFINE_BUSY_EVENT(name) \
1177DEFINE_EVENT(xfs_busy_class, name, \
1178 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
1179 xfs_agblock_t agbno, xfs_extlen_t len), \
1180 TP_ARGS(mp, agno, agbno, len))
1181DEFINE_BUSY_EVENT(xfs_alloc_busy);
1182DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
1183DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
1184DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
1185DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
1213 1186
1214#define XFS_BUSY_STATES \ 1187TRACE_EVENT(xfs_alloc_busy_trim,
1215 { 0, "missing" }, \
1216 { 1, "found" }
1217
1218TRACE_EVENT(xfs_alloc_busysearch,
1219 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 1188 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
1220 xfs_agblock_t agbno, xfs_extlen_t len, int found), 1189 xfs_agblock_t agbno, xfs_extlen_t len,
1221 TP_ARGS(mp, agno, agbno, len, found), 1190 xfs_agblock_t tbno, xfs_extlen_t tlen),
1191 TP_ARGS(mp, agno, agbno, len, tbno, tlen),
1222 TP_STRUCT__entry( 1192 TP_STRUCT__entry(
1223 __field(dev_t, dev) 1193 __field(dev_t, dev)
1224 __field(xfs_agnumber_t, agno) 1194 __field(xfs_agnumber_t, agno)
1225 __field(xfs_agblock_t, agbno) 1195 __field(xfs_agblock_t, agbno)
1226 __field(xfs_extlen_t, len) 1196 __field(xfs_extlen_t, len)
1227 __field(int, found) 1197 __field(xfs_agblock_t, tbno)
1198 __field(xfs_extlen_t, tlen)
1228 ), 1199 ),
1229 TP_fast_assign( 1200 TP_fast_assign(
1230 __entry->dev = mp->m_super->s_dev; 1201 __entry->dev = mp->m_super->s_dev;
1231 __entry->agno = agno; 1202 __entry->agno = agno;
1232 __entry->agbno = agbno; 1203 __entry->agbno = agbno;
1233 __entry->len = len; 1204 __entry->len = len;
1234 __entry->found = found; 1205 __entry->tbno = tbno;
1206 __entry->tlen = tlen;
1235 ), 1207 ),
1236 TP_printk("dev %d:%d agno %u agbno %u len %u %s", 1208 TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
1237 MAJOR(__entry->dev), MINOR(__entry->dev), 1209 MAJOR(__entry->dev), MINOR(__entry->dev),
1238 __entry->agno, 1210 __entry->agno,
1239 __entry->agbno, 1211 __entry->agbno,
1240 __entry->len, 1212 __entry->len,
1241 __print_symbolic(__entry->found, XFS_BUSY_STATES)) 1213 __entry->tbno,
1214 __entry->tlen)
1242); 1215);
1243 1216
1244TRACE_EVENT(xfs_trans_commit_lsn, 1217TRACE_EVENT(xfs_trans_commit_lsn,
@@ -1418,7 +1391,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
1418 __entry->wasfromfl, 1391 __entry->wasfromfl,
1419 __entry->isfl, 1392 __entry->isfl,
1420 __entry->userdata, 1393 __entry->userdata,
1421 __entry->firstblock) 1394 (unsigned long long)__entry->firstblock)
1422) 1395)
1423 1396
1424#define DEFINE_ALLOC_EVENT(name) \ 1397#define DEFINE_ALLOC_EVENT(name) \
@@ -1433,11 +1406,14 @@ DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
1433DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); 1406DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
1434DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); 1407DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
1435DEFINE_ALLOC_EVENT(xfs_alloc_near_error); 1408DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
1409DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
1410DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
1436DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); 1411DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
1437DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); 1412DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
1438DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); 1413DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
1439DEFINE_ALLOC_EVENT(xfs_alloc_size_done); 1414DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
1440DEFINE_ALLOC_EVENT(xfs_alloc_size_error); 1415DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
1416DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
1441DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); 1417DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
1442DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); 1418DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
1443DEFINE_ALLOC_EVENT(xfs_alloc_small_done); 1419DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 58632cc17f2d..da0a561ffba2 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -187,7 +187,6 @@ struct xfs_busy_extent {
187 xfs_agnumber_t agno; 187 xfs_agnumber_t agno;
188 xfs_agblock_t bno; 188 xfs_agblock_t bno;
189 xfs_extlen_t length; 189 xfs_extlen_t length;
190 xlog_tid_t tid; /* transaction that created this */
191}; 190};
192 191
193/* 192/*
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 27d64d752eab..acdced86413c 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -41,19 +41,13 @@
41#define XFSA_FIXUP_BNO_OK 1 41#define XFSA_FIXUP_BNO_OK 1
42#define XFSA_FIXUP_CNT_OK 2 42#define XFSA_FIXUP_CNT_OK 2
43 43
44/*
45 * Prototypes for per-ag allocation routines
46 */
47
48STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); 44STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
49STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); 45STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
50STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); 46STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
51STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, 47STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
52 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); 48 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
53 49STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *,
54/* 50 xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *);
55 * Internal functions.
56 */
57 51
58/* 52/*
59 * Lookup the record equal to [bno, len] in the btree given by cur. 53 * Lookup the record equal to [bno, len] in the btree given by cur.
@@ -154,19 +148,21 @@ xfs_alloc_compute_aligned(
154 xfs_extlen_t *reslen) /* result length */ 148 xfs_extlen_t *reslen) /* result length */
155{ 149{
156 xfs_agblock_t bno; 150 xfs_agblock_t bno;
157 xfs_extlen_t diff;
158 xfs_extlen_t len; 151 xfs_extlen_t len;
159 152
160 if (args->alignment > 1 && foundlen >= args->minlen) { 153 /* Trim busy sections out of found extent */
161 bno = roundup(foundbno, args->alignment); 154 xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len);
162 diff = bno - foundbno; 155
163 len = diff >= foundlen ? 0 : foundlen - diff; 156 if (args->alignment > 1 && len >= args->minlen) {
157 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
158 xfs_extlen_t diff = aligned_bno - bno;
159
160 *resbno = aligned_bno;
161 *reslen = diff >= len ? 0 : len - diff;
164 } else { 162 } else {
165 bno = foundbno; 163 *resbno = bno;
166 len = foundlen; 164 *reslen = len;
167 } 165 }
168 *resbno = bno;
169 *reslen = len;
170} 166}
171 167
172/* 168/*
@@ -280,7 +276,6 @@ xfs_alloc_fix_minleft(
280 return 1; 276 return 1;
281 agf = XFS_BUF_TO_AGF(args->agbp); 277 agf = XFS_BUF_TO_AGF(args->agbp);
282 diff = be32_to_cpu(agf->agf_freeblks) 278 diff = be32_to_cpu(agf->agf_freeblks)
283 + be32_to_cpu(agf->agf_flcount)
284 - args->len - args->minleft; 279 - args->len - args->minleft;
285 if (diff >= 0) 280 if (diff >= 0)
286 return 1; 281 return 1;
@@ -541,16 +536,8 @@ xfs_alloc_ag_vextent(
541 if (error) 536 if (error)
542 return error; 537 return error;
543 538
544 /* 539 ASSERT(!xfs_alloc_busy_search(args->mp, args->agno,
545 * Search the busylist for these blocks and mark the 540 args->agbno, args->len));
546 * transaction as synchronous if blocks are found. This
547 * avoids the need to block due to a synchronous log
548 * force to ensure correct ordering as the synchronous
549 * transaction will guarantee that for us.
550 */
551 if (xfs_alloc_busy_search(args->mp, args->agno,
552 args->agbno, args->len))
553 xfs_trans_set_sync(args->tp);
554 } 541 }
555 542
556 if (!args->isfl) { 543 if (!args->isfl) {
@@ -577,14 +564,14 @@ xfs_alloc_ag_vextent_exact(
577{ 564{
578 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ 565 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
579 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ 566 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
580 xfs_agblock_t end; /* end of allocated extent */
581 int error; 567 int error;
582 xfs_agblock_t fbno; /* start block of found extent */ 568 xfs_agblock_t fbno; /* start block of found extent */
583 xfs_agblock_t fend; /* end block of found extent */
584 xfs_extlen_t flen; /* length of found extent */ 569 xfs_extlen_t flen; /* length of found extent */
570 xfs_agblock_t tbno; /* start block of trimmed extent */
571 xfs_extlen_t tlen; /* length of trimmed extent */
572 xfs_agblock_t tend; /* end block of trimmed extent */
573 xfs_agblock_t end; /* end of allocated extent */
585 int i; /* success/failure of operation */ 574 int i; /* success/failure of operation */
586 xfs_agblock_t maxend; /* end of maximal extent */
587 xfs_agblock_t minend; /* end of minimal extent */
588 xfs_extlen_t rlen; /* length of returned extent */ 575 xfs_extlen_t rlen; /* length of returned extent */
589 576
590 ASSERT(args->alignment == 1); 577 ASSERT(args->alignment == 1);
@@ -614,14 +601,22 @@ xfs_alloc_ag_vextent_exact(
614 goto error0; 601 goto error0;
615 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 602 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
616 ASSERT(fbno <= args->agbno); 603 ASSERT(fbno <= args->agbno);
617 minend = args->agbno + args->minlen;
618 maxend = args->agbno + args->maxlen;
619 fend = fbno + flen;
620 604
621 /* 605 /*
622 * Give up if the freespace isn't long enough for the minimum request. 606 * Check for overlapping busy extents.
607 */
608 xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen);
609
610 /*
611 * Give up if the start of the extent is busy, or the freespace isn't
612 * long enough for the minimum request.
623 */ 613 */
624 if (fend < minend) 614 if (tbno > args->agbno)
615 goto not_found;
616 if (tlen < args->minlen)
617 goto not_found;
618 tend = tbno + tlen;
619 if (tend < args->agbno + args->minlen)
625 goto not_found; 620 goto not_found;
626 621
627 /* 622 /*
@@ -630,14 +625,14 @@ xfs_alloc_ag_vextent_exact(
630 * 625 *
631 * Fix the length according to mod and prod if given. 626 * Fix the length according to mod and prod if given.
632 */ 627 */
633 end = XFS_AGBLOCK_MIN(fend, maxend); 628 end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen);
634 args->len = end - args->agbno; 629 args->len = end - args->agbno;
635 xfs_alloc_fix_len(args); 630 xfs_alloc_fix_len(args);
636 if (!xfs_alloc_fix_minleft(args)) 631 if (!xfs_alloc_fix_minleft(args))
637 goto not_found; 632 goto not_found;
638 633
639 rlen = args->len; 634 rlen = args->len;
640 ASSERT(args->agbno + rlen <= fend); 635 ASSERT(args->agbno + rlen <= tend);
641 end = args->agbno + rlen; 636 end = args->agbno + rlen;
642 637
643 /* 638 /*
@@ -686,11 +681,11 @@ xfs_alloc_find_best_extent(
686 struct xfs_btree_cur **scur, /* searching cursor */ 681 struct xfs_btree_cur **scur, /* searching cursor */
687 xfs_agblock_t gdiff, /* difference for search comparison */ 682 xfs_agblock_t gdiff, /* difference for search comparison */
688 xfs_agblock_t *sbno, /* extent found by search */ 683 xfs_agblock_t *sbno, /* extent found by search */
689 xfs_extlen_t *slen, 684 xfs_extlen_t *slen, /* extent length */
690 xfs_extlen_t *slena, /* aligned length */ 685 xfs_agblock_t *sbnoa, /* aligned extent found by search */
686 xfs_extlen_t *slena, /* aligned extent length */
691 int dir) /* 0 = search right, 1 = search left */ 687 int dir) /* 0 = search right, 1 = search left */
692{ 688{
693 xfs_agblock_t bno;
694 xfs_agblock_t new; 689 xfs_agblock_t new;
695 xfs_agblock_t sdiff; 690 xfs_agblock_t sdiff;
696 int error; 691 int error;
@@ -708,16 +703,16 @@ xfs_alloc_find_best_extent(
708 if (error) 703 if (error)
709 goto error0; 704 goto error0;
710 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 705 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
711 xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena); 706 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
712 707
713 /* 708 /*
714 * The good extent is closer than this one. 709 * The good extent is closer than this one.
715 */ 710 */
716 if (!dir) { 711 if (!dir) {
717 if (bno >= args->agbno + gdiff) 712 if (*sbnoa >= args->agbno + gdiff)
718 goto out_use_good; 713 goto out_use_good;
719 } else { 714 } else {
720 if (bno <= args->agbno - gdiff) 715 if (*sbnoa <= args->agbno - gdiff)
721 goto out_use_good; 716 goto out_use_good;
722 } 717 }
723 718
@@ -729,8 +724,8 @@ xfs_alloc_find_best_extent(
729 xfs_alloc_fix_len(args); 724 xfs_alloc_fix_len(args);
730 725
731 sdiff = xfs_alloc_compute_diff(args->agbno, args->len, 726 sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
732 args->alignment, *sbno, 727 args->alignment, *sbnoa,
733 *slen, &new); 728 *slena, &new);
734 729
735 /* 730 /*
736 * Choose closer size and invalidate other cursor. 731 * Choose closer size and invalidate other cursor.
@@ -780,7 +775,7 @@ xfs_alloc_ag_vextent_near(
780 xfs_agblock_t gtbnoa; /* aligned ... */ 775 xfs_agblock_t gtbnoa; /* aligned ... */
781 xfs_extlen_t gtdiff; /* difference to right side entry */ 776 xfs_extlen_t gtdiff; /* difference to right side entry */
782 xfs_extlen_t gtlen; /* length of right side entry */ 777 xfs_extlen_t gtlen; /* length of right side entry */
783 xfs_extlen_t gtlena = 0; /* aligned ... */ 778 xfs_extlen_t gtlena; /* aligned ... */
784 xfs_agblock_t gtnew; /* useful start bno of right side */ 779 xfs_agblock_t gtnew; /* useful start bno of right side */
785 int error; /* error code */ 780 int error; /* error code */
786 int i; /* result code, temporary */ 781 int i; /* result code, temporary */
@@ -789,9 +784,10 @@ xfs_alloc_ag_vextent_near(
789 xfs_agblock_t ltbnoa; /* aligned ... */ 784 xfs_agblock_t ltbnoa; /* aligned ... */
790 xfs_extlen_t ltdiff; /* difference to left side entry */ 785 xfs_extlen_t ltdiff; /* difference to left side entry */
791 xfs_extlen_t ltlen; /* length of left side entry */ 786 xfs_extlen_t ltlen; /* length of left side entry */
792 xfs_extlen_t ltlena = 0; /* aligned ... */ 787 xfs_extlen_t ltlena; /* aligned ... */
793 xfs_agblock_t ltnew; /* useful start bno of left side */ 788 xfs_agblock_t ltnew; /* useful start bno of left side */
794 xfs_extlen_t rlen; /* length of returned extent */ 789 xfs_extlen_t rlen; /* length of returned extent */
790 int forced = 0;
795#if defined(DEBUG) && defined(__KERNEL__) 791#if defined(DEBUG) && defined(__KERNEL__)
796 /* 792 /*
797 * Randomly don't execute the first algorithm. 793 * Randomly don't execute the first algorithm.
@@ -800,13 +796,20 @@ xfs_alloc_ag_vextent_near(
800 796
801 dofirst = random32() & 1; 797 dofirst = random32() & 1;
802#endif 798#endif
799
800restart:
801 bno_cur_lt = NULL;
802 bno_cur_gt = NULL;
803 ltlen = 0;
804 gtlena = 0;
805 ltlena = 0;
806
803 /* 807 /*
804 * Get a cursor for the by-size btree. 808 * Get a cursor for the by-size btree.
805 */ 809 */
806 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 810 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
807 args->agno, XFS_BTNUM_CNT); 811 args->agno, XFS_BTNUM_CNT);
808 ltlen = 0; 812
809 bno_cur_lt = bno_cur_gt = NULL;
810 /* 813 /*
811 * See if there are any free extents as big as maxlen. 814 * See if there are any free extents as big as maxlen.
812 */ 815 */
@@ -822,11 +825,13 @@ xfs_alloc_ag_vextent_near(
822 goto error0; 825 goto error0;
823 if (i == 0 || ltlen == 0) { 826 if (i == 0 || ltlen == 0) {
824 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 827 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
828 trace_xfs_alloc_near_noentry(args);
825 return 0; 829 return 0;
826 } 830 }
827 ASSERT(i == 1); 831 ASSERT(i == 1);
828 } 832 }
829 args->wasfromfl = 0; 833 args->wasfromfl = 0;
834
830 /* 835 /*
831 * First algorithm. 836 * First algorithm.
832 * If the requested extent is large wrt the freespaces available 837 * If the requested extent is large wrt the freespaces available
@@ -890,7 +895,7 @@ xfs_alloc_ag_vextent_near(
890 if (args->len < blen) 895 if (args->len < blen)
891 continue; 896 continue;
892 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 897 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
893 args->alignment, ltbno, ltlen, &ltnew); 898 args->alignment, ltbnoa, ltlena, &ltnew);
894 if (ltnew != NULLAGBLOCK && 899 if (ltnew != NULLAGBLOCK &&
895 (args->len > blen || ltdiff < bdiff)) { 900 (args->len > blen || ltdiff < bdiff)) {
896 bdiff = ltdiff; 901 bdiff = ltdiff;
@@ -1042,11 +1047,12 @@ xfs_alloc_ag_vextent_near(
1042 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1047 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1043 xfs_alloc_fix_len(args); 1048 xfs_alloc_fix_len(args);
1044 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1049 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1045 args->alignment, ltbno, ltlen, &ltnew); 1050 args->alignment, ltbnoa, ltlena, &ltnew);
1046 1051
1047 error = xfs_alloc_find_best_extent(args, 1052 error = xfs_alloc_find_best_extent(args,
1048 &bno_cur_lt, &bno_cur_gt, 1053 &bno_cur_lt, &bno_cur_gt,
1049 ltdiff, &gtbno, &gtlen, &gtlena, 1054 ltdiff, &gtbno, &gtlen,
1055 &gtbnoa, &gtlena,
1050 0 /* search right */); 1056 0 /* search right */);
1051 } else { 1057 } else {
1052 ASSERT(gtlena >= args->minlen); 1058 ASSERT(gtlena >= args->minlen);
@@ -1057,11 +1063,12 @@ xfs_alloc_ag_vextent_near(
1057 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); 1063 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1058 xfs_alloc_fix_len(args); 1064 xfs_alloc_fix_len(args);
1059 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1065 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1060 args->alignment, gtbno, gtlen, &gtnew); 1066 args->alignment, gtbnoa, gtlena, &gtnew);
1061 1067
1062 error = xfs_alloc_find_best_extent(args, 1068 error = xfs_alloc_find_best_extent(args,
1063 &bno_cur_gt, &bno_cur_lt, 1069 &bno_cur_gt, &bno_cur_lt,
1064 gtdiff, &ltbno, &ltlen, &ltlena, 1070 gtdiff, &ltbno, &ltlen,
1071 &ltbnoa, &ltlena,
1065 1 /* search left */); 1072 1 /* search left */);
1066 } 1073 }
1067 1074
@@ -1073,6 +1080,12 @@ xfs_alloc_ag_vextent_near(
1073 * If we couldn't get anything, give up. 1080 * If we couldn't get anything, give up.
1074 */ 1081 */
1075 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1082 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1083 if (!forced++) {
1084 trace_xfs_alloc_near_busy(args);
1085 xfs_log_force(args->mp, XFS_LOG_SYNC);
1086 goto restart;
1087 }
1088
1076 trace_xfs_alloc_size_neither(args); 1089 trace_xfs_alloc_size_neither(args);
1077 args->agbno = NULLAGBLOCK; 1090 args->agbno = NULLAGBLOCK;
1078 return 0; 1091 return 0;
@@ -1107,12 +1120,13 @@ xfs_alloc_ag_vextent_near(
1107 return 0; 1120 return 0;
1108 } 1121 }
1109 rlen = args->len; 1122 rlen = args->len;
1110 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, 1123 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1111 ltlen, &ltnew); 1124 ltbnoa, ltlena, &ltnew);
1112 ASSERT(ltnew >= ltbno); 1125 ASSERT(ltnew >= ltbno);
1113 ASSERT(ltnew + rlen <= ltbno + ltlen); 1126 ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1114 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1127 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1115 args->agbno = ltnew; 1128 args->agbno = ltnew;
1129
1116 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1130 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1117 ltnew, rlen, XFSA_FIXUP_BNO_OK))) 1131 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1118 goto error0; 1132 goto error0;
@@ -1155,26 +1169,35 @@ xfs_alloc_ag_vextent_size(
1155 int i; /* temp status variable */ 1169 int i; /* temp status variable */
1156 xfs_agblock_t rbno; /* returned block number */ 1170 xfs_agblock_t rbno; /* returned block number */
1157 xfs_extlen_t rlen; /* length of returned extent */ 1171 xfs_extlen_t rlen; /* length of returned extent */
1172 int forced = 0;
1158 1173
1174restart:
1159 /* 1175 /*
1160 * Allocate and initialize a cursor for the by-size btree. 1176 * Allocate and initialize a cursor for the by-size btree.
1161 */ 1177 */
1162 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1178 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1163 args->agno, XFS_BTNUM_CNT); 1179 args->agno, XFS_BTNUM_CNT);
1164 bno_cur = NULL; 1180 bno_cur = NULL;
1181
1165 /* 1182 /*
1166 * Look for an entry >= maxlen+alignment-1 blocks. 1183 * Look for an entry >= maxlen+alignment-1 blocks.
1167 */ 1184 */
1168 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, 1185 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1169 args->maxlen + args->alignment - 1, &i))) 1186 args->maxlen + args->alignment - 1, &i)))
1170 goto error0; 1187 goto error0;
1188
1171 /* 1189 /*
1172 * If none, then pick up the last entry in the tree unless the 1190 * If none or we have busy extents that we cannot allocate from, then
1173 * tree is empty. 1191 * we have to settle for a smaller extent. In the case that there are
1192 * no large extents, this will return the last entry in the tree unless
1193 * the tree is empty. In the case that there are only busy large
1194 * extents, this will return the largest small extent unless there
1195 * are no smaller extents available.
1174 */ 1196 */
1175 if (!i) { 1197 if (!i || forced > 1) {
1176 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, 1198 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1177 &flen, &i))) 1199 &fbno, &flen, &i);
1200 if (error)
1178 goto error0; 1201 goto error0;
1179 if (i == 0 || flen == 0) { 1202 if (i == 0 || flen == 0) {
1180 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1203 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -1182,22 +1205,56 @@ xfs_alloc_ag_vextent_size(
1182 return 0; 1205 return 0;
1183 } 1206 }
1184 ASSERT(i == 1); 1207 ASSERT(i == 1);
1208 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1209 } else {
1210 /*
1211 * Search for a non-busy extent that is large enough.
1212 * If we are at low space, don't check, or if we fall of
1213 * the end of the btree, turn off the busy check and
1214 * restart.
1215 */
1216 for (;;) {
1217 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1218 if (error)
1219 goto error0;
1220 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1221
1222 xfs_alloc_compute_aligned(args, fbno, flen,
1223 &rbno, &rlen);
1224
1225 if (rlen >= args->maxlen)
1226 break;
1227
1228 error = xfs_btree_increment(cnt_cur, 0, &i);
1229 if (error)
1230 goto error0;
1231 if (i == 0) {
1232 /*
1233 * Our only valid extents must have been busy.
1234 * Make it unbusy by forcing the log out and
1235 * retrying. If we've been here before, forcing
1236 * the log isn't making the extents available,
1237 * which means they have probably been freed in
1238 * this transaction. In that case, we have to
1239 * give up on them and we'll attempt a minlen
1240 * allocation the next time around.
1241 */
1242 xfs_btree_del_cursor(cnt_cur,
1243 XFS_BTREE_NOERROR);
1244 trace_xfs_alloc_size_busy(args);
1245 if (!forced++)
1246 xfs_log_force(args->mp, XFS_LOG_SYNC);
1247 goto restart;
1248 }
1249 }
1185 } 1250 }
1186 /* 1251
1187 * There's a freespace as big as maxlen+alignment-1, get it.
1188 */
1189 else {
1190 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
1191 goto error0;
1192 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1193 }
1194 /* 1252 /*
1195 * In the first case above, we got the last entry in the 1253 * In the first case above, we got the last entry in the
1196 * by-size btree. Now we check to see if the space hits maxlen 1254 * by-size btree. Now we check to see if the space hits maxlen
1197 * once aligned; if not, we search left for something better. 1255 * once aligned; if not, we search left for something better.
1198 * This can't happen in the second case above. 1256 * This can't happen in the second case above.
1199 */ 1257 */
1200 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1201 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1258 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1202 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1259 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1203 (rlen <= flen && rbno + rlen <= fbno + flen), error0); 1260 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
@@ -1251,13 +1308,19 @@ xfs_alloc_ag_vextent_size(
1251 * Fix up the length. 1308 * Fix up the length.
1252 */ 1309 */
1253 args->len = rlen; 1310 args->len = rlen;
1254 xfs_alloc_fix_len(args); 1311 if (rlen < args->minlen) {
1255 if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { 1312 if (!forced++) {
1256 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1313 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1257 trace_xfs_alloc_size_nominleft(args); 1314 trace_xfs_alloc_size_busy(args);
1258 args->agbno = NULLAGBLOCK; 1315 xfs_log_force(args->mp, XFS_LOG_SYNC);
1259 return 0; 1316 goto restart;
1317 }
1318 goto out_nominleft;
1260 } 1319 }
1320 xfs_alloc_fix_len(args);
1321
1322 if (!xfs_alloc_fix_minleft(args))
1323 goto out_nominleft;
1261 rlen = args->len; 1324 rlen = args->len;
1262 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); 1325 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
1263 /* 1326 /*
@@ -1287,6 +1350,12 @@ error0:
1287 if (bno_cur) 1350 if (bno_cur)
1288 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1351 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1289 return error; 1352 return error;
1353
1354out_nominleft:
1355 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1356 trace_xfs_alloc_size_nominleft(args);
1357 args->agbno = NULLAGBLOCK;
1358 return 0;
1290} 1359}
1291 1360
1292/* 1361/*
@@ -1326,6 +1395,9 @@ xfs_alloc_ag_vextent_small(
1326 if (error) 1395 if (error)
1327 goto error0; 1396 goto error0;
1328 if (fbno != NULLAGBLOCK) { 1397 if (fbno != NULLAGBLOCK) {
1398 xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1,
1399 args->userdata);
1400
1329 if (args->userdata) { 1401 if (args->userdata) {
1330 xfs_buf_t *bp; 1402 xfs_buf_t *bp;
1331 1403
@@ -1617,18 +1689,6 @@ xfs_free_ag_extent(
1617 1689
1618 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); 1690 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1619 1691
1620 /*
1621 * Since blocks move to the free list without the coordination
1622 * used in xfs_bmap_finish, we can't allow block to be available
1623 * for reallocation and non-transaction writing (user data)
1624 * until we know that the transaction that moved it to the free
1625 * list is permanently on disk. We track the blocks by declaring
1626 * these blocks as "busy"; the busy list is maintained on a per-ag
1627 * basis and each transaction records which entries should be removed
1628 * when the iclog commits to disk. If a busy block is allocated,
1629 * the iclog is pushed up to the LSN that freed the block.
1630 */
1631 xfs_alloc_busy_insert(tp, agno, bno, len);
1632 return 0; 1692 return 0;
1633 1693
1634 error0: 1694 error0:
@@ -1923,21 +1983,6 @@ xfs_alloc_get_freelist(
1923 xfs_alloc_log_agf(tp, agbp, logflags); 1983 xfs_alloc_log_agf(tp, agbp, logflags);
1924 *bnop = bno; 1984 *bnop = bno;
1925 1985
1926 /*
1927 * As blocks are freed, they are added to the per-ag busy list and
1928 * remain there until the freeing transaction is committed to disk.
1929 * Now that we have allocated blocks, this list must be searched to see
1930 * if a block is being reused. If one is, then the freeing transaction
1931 * must be pushed to disk before this transaction.
1932 *
1933 * We do this by setting the current transaction to a sync transaction
1934 * which guarantees that the freeing transaction is on disk before this
1935 * transaction. This is done instead of a synchronous log force here so
1936 * that we don't sit and wait with the AGF locked in the transaction
1937 * during the log force.
1938 */
1939 if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1))
1940 xfs_trans_set_sync(tp);
1941 return 0; 1986 return 0;
1942} 1987}
1943 1988
@@ -2423,105 +2468,13 @@ xfs_free_extent(
2423 } 2468 }
2424 2469
2425 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); 2470 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
2471 if (!error)
2472 xfs_alloc_busy_insert(tp, args.agno, args.agbno, len);
2426error0: 2473error0:
2427 xfs_perag_put(args.pag); 2474 xfs_perag_put(args.pag);
2428 return error; 2475 return error;
2429} 2476}
2430 2477
2431
2432/*
2433 * AG Busy list management
2434 * The busy list contains block ranges that have been freed but whose
2435 * transactions have not yet hit disk. If any block listed in a busy
2436 * list is reused, the transaction that freed it must be forced to disk
2437 * before continuing to use the block.
2438 *
2439 * xfs_alloc_busy_insert - add to the per-ag busy list
2440 * xfs_alloc_busy_clear - remove an item from the per-ag busy list
2441 * xfs_alloc_busy_search - search for a busy extent
2442 */
2443
2444/*
2445 * Insert a new extent into the busy tree.
2446 *
2447 * The busy extent tree is indexed by the start block of the busy extent.
2448 * there can be multiple overlapping ranges in the busy extent tree but only
2449 * ever one entry at a given start block. The reason for this is that
2450 * multi-block extents can be freed, then smaller chunks of that extent
2451 * allocated and freed again before the first transaction commit is on disk.
2452 * If the exact same start block is freed a second time, we have to wait for
2453 * that busy extent to pass out of the tree before the new extent is inserted.
2454 * There are two main cases we have to handle here.
2455 *
2456 * The first case is a transaction that triggers a "free - allocate - free"
2457 * cycle. This can occur during btree manipulations as a btree block is freed
2458 * to the freelist, then allocated from the free list, then freed again. In
2459 * this case, the second extxpnet free is what triggers the duplicate and as
2460 * such the transaction IDs should match. Because the extent was allocated in
2461 * this transaction, the transaction must be marked as synchronous. This is
2462 * true for all cases where the free/alloc/free occurs in the one transaction,
2463 * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case.
2464 * This serves to catch violations of the second case quite effectively.
2465 *
2466 * The second case is where the free/alloc/free occur in different
2467 * transactions. In this case, the thread freeing the extent the second time
2468 * can't mark the extent busy immediately because it is already tracked in a
2469 * transaction that may be committing. When the log commit for the existing
2470 * busy extent completes, the busy extent will be removed from the tree. If we
2471 * allow the second busy insert to continue using that busy extent structure,
2472 * it can be freed before this transaction is safely in the log. Hence our
2473 * only option in this case is to force the log to remove the existing busy
2474 * extent from the list before we insert the new one with the current
2475 * transaction ID.
2476 *
2477 * The problem we are trying to avoid in the free-alloc-free in separate
2478 * transactions is most easily described with a timeline:
2479 *
2480 * Thread 1 Thread 2 Thread 3 xfslogd
2481 * xact alloc
2482 * free X
2483 * mark busy
2484 * commit xact
2485 * free xact
2486 * xact alloc
2487 * alloc X
2488 * busy search
2489 * mark xact sync
2490 * commit xact
2491 * free xact
2492 * force log
2493 * checkpoint starts
2494 * ....
2495 * xact alloc
2496 * free X
2497 * mark busy
2498 * finds match
2499 * *** KABOOM! ***
2500 * ....
2501 * log IO completes
2502 * unbusy X
2503 * checkpoint completes
2504 *
2505 * By issuing a log force in thread 3 @ "KABOOM", the thread will block until
2506 * the checkpoint completes, and the busy extent it matched will have been
2507 * removed from the tree when it is woken. Hence it can then continue safely.
2508 *
2509 * However, to ensure this matching process is robust, we need to use the
2510 * transaction ID for identifying transaction, as delayed logging results in
2511 * the busy extent and transaction lifecycles being different. i.e. the busy
2512 * extent is active for a lot longer than the transaction. Hence the
2513 * transaction structure can be freed and reallocated, then mark the same
2514 * extent busy again in the new transaction. In this case the new transaction
2515 * will have a different tid but can have the same address, and hence we need
2516 * to check against the tid.
2517 *
2518 * Future: for delayed logging, we could avoid the log force if the extent was
2519 * first freed in the current checkpoint sequence. This, however, requires the
2520 * ability to pin the current checkpoint in memory until this transaction
2521 * commits to ensure that both the original free and the current one combine
2522 * logically into the one checkpoint. If the checkpoint sequences are
2523 * different, however, we still need to wait on a log force.
2524 */
2525void 2478void
2526xfs_alloc_busy_insert( 2479xfs_alloc_busy_insert(
2527 struct xfs_trans *tp, 2480 struct xfs_trans *tp,
@@ -2533,9 +2486,7 @@ xfs_alloc_busy_insert(
2533 struct xfs_busy_extent *busyp; 2486 struct xfs_busy_extent *busyp;
2534 struct xfs_perag *pag; 2487 struct xfs_perag *pag;
2535 struct rb_node **rbp; 2488 struct rb_node **rbp;
2536 struct rb_node *parent; 2489 struct rb_node *parent = NULL;
2537 int match;
2538
2539 2490
2540 new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); 2491 new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
2541 if (!new) { 2492 if (!new) {
@@ -2544,7 +2495,7 @@ xfs_alloc_busy_insert(
2544 * block, make this a synchronous transaction to insure that 2495 * block, make this a synchronous transaction to insure that
2545 * the block is not reused before this transaction commits. 2496 * the block is not reused before this transaction commits.
2546 */ 2497 */
2547 trace_xfs_alloc_busy(tp, agno, bno, len, 1); 2498 trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len);
2548 xfs_trans_set_sync(tp); 2499 xfs_trans_set_sync(tp);
2549 return; 2500 return;
2550 } 2501 }
@@ -2552,66 +2503,28 @@ xfs_alloc_busy_insert(
2552 new->agno = agno; 2503 new->agno = agno;
2553 new->bno = bno; 2504 new->bno = bno;
2554 new->length = len; 2505 new->length = len;
2555 new->tid = xfs_log_get_trans_ident(tp);
2556
2557 INIT_LIST_HEAD(&new->list); 2506 INIT_LIST_HEAD(&new->list);
2558 2507
2559 /* trace before insert to be able to see failed inserts */ 2508 /* trace before insert to be able to see failed inserts */
2560 trace_xfs_alloc_busy(tp, agno, bno, len, 0); 2509 trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len);
2561 2510
2562 pag = xfs_perag_get(tp->t_mountp, new->agno); 2511 pag = xfs_perag_get(tp->t_mountp, new->agno);
2563restart:
2564 spin_lock(&pag->pagb_lock); 2512 spin_lock(&pag->pagb_lock);
2565 rbp = &pag->pagb_tree.rb_node; 2513 rbp = &pag->pagb_tree.rb_node;
2566 parent = NULL; 2514 while (*rbp) {
2567 busyp = NULL;
2568 match = 0;
2569 while (*rbp && match >= 0) {
2570 parent = *rbp; 2515 parent = *rbp;
2571 busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); 2516 busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
2572 2517
2573 if (new->bno < busyp->bno) { 2518 if (new->bno < busyp->bno) {
2574 /* may overlap, but exact start block is lower */
2575 rbp = &(*rbp)->rb_left; 2519 rbp = &(*rbp)->rb_left;
2576 if (new->bno + new->length > busyp->bno) 2520 ASSERT(new->bno + new->length <= busyp->bno);
2577 match = busyp->tid == new->tid ? 1 : -1;
2578 } else if (new->bno > busyp->bno) { 2521 } else if (new->bno > busyp->bno) {
2579 /* may overlap, but exact start block is higher */
2580 rbp = &(*rbp)->rb_right; 2522 rbp = &(*rbp)->rb_right;
2581 if (bno < busyp->bno + busyp->length) 2523 ASSERT(bno >= busyp->bno + busyp->length);
2582 match = busyp->tid == new->tid ? 1 : -1;
2583 } else { 2524 } else {
2584 match = busyp->tid == new->tid ? 1 : -1; 2525 ASSERT(0);
2585 break;
2586 } 2526 }
2587 } 2527 }
2588 if (match < 0) {
2589 /* overlap marked busy in different transaction */
2590 spin_unlock(&pag->pagb_lock);
2591 xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
2592 goto restart;
2593 }
2594 if (match > 0) {
2595 /*
2596 * overlap marked busy in same transaction. Update if exact
2597 * start block match, otherwise combine the busy extents into
2598 * a single range.
2599 */
2600 if (busyp->bno == new->bno) {
2601 busyp->length = max(busyp->length, new->length);
2602 spin_unlock(&pag->pagb_lock);
2603 ASSERT(tp->t_flags & XFS_TRANS_SYNC);
2604 xfs_perag_put(pag);
2605 kmem_free(new);
2606 return;
2607 }
2608 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2609 new->length = max(busyp->bno + busyp->length,
2610 new->bno + new->length) -
2611 min(busyp->bno, new->bno);
2612 new->bno = min(busyp->bno, new->bno);
2613 } else
2614 busyp = NULL;
2615 2528
2616 rb_link_node(&new->rb_node, parent, rbp); 2529 rb_link_node(&new->rb_node, parent, rbp);
2617 rb_insert_color(&new->rb_node, &pag->pagb_tree); 2530 rb_insert_color(&new->rb_node, &pag->pagb_tree);
@@ -2619,7 +2532,6 @@ restart:
2619 list_add(&new->list, &tp->t_busy); 2532 list_add(&new->list, &tp->t_busy);
2620 spin_unlock(&pag->pagb_lock); 2533 spin_unlock(&pag->pagb_lock);
2621 xfs_perag_put(pag); 2534 xfs_perag_put(pag);
2622 kmem_free(busyp);
2623} 2535}
2624 2536
2625/* 2537/*
@@ -2668,31 +2580,443 @@ xfs_alloc_busy_search(
2668 } 2580 }
2669 } 2581 }
2670 spin_unlock(&pag->pagb_lock); 2582 spin_unlock(&pag->pagb_lock);
2671 trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match);
2672 xfs_perag_put(pag); 2583 xfs_perag_put(pag);
2673 return match; 2584 return match;
2674} 2585}
2675 2586
2587/*
2588 * The found free extent [fbno, fend] overlaps part or all of the given busy
2589 * extent. If the overlap covers the beginning, the end, or all of the busy
2590 * extent, the overlapping portion can be made unbusy and used for the
2591 * allocation. We can't split a busy extent because we can't modify a
2592 * transaction/CIL context busy list, but we can update an entries block
2593 * number or length.
2594 *
2595 * Returns true if the extent can safely be reused, or false if the search
2596 * needs to be restarted.
2597 */
2598STATIC bool
2599xfs_alloc_busy_update_extent(
2600 struct xfs_mount *mp,
2601 struct xfs_perag *pag,
2602 struct xfs_busy_extent *busyp,
2603 xfs_agblock_t fbno,
2604 xfs_extlen_t flen,
2605 bool userdata)
2606{
2607 xfs_agblock_t fend = fbno + flen;
2608 xfs_agblock_t bbno = busyp->bno;
2609 xfs_agblock_t bend = bbno + busyp->length;
2610
2611 /*
2612 * If there is a busy extent overlapping a user allocation, we have
2613 * no choice but to force the log and retry the search.
2614 *
2615 * Fortunately this does not happen during normal operation, but
2616 * only if the filesystem is very low on space and has to dip into
2617 * the AGFL for normal allocations.
2618 */
2619 if (userdata)
2620 goto out_force_log;
2621
2622 if (bbno < fbno && bend > fend) {
2623 /*
2624 * Case 1:
2625 * bbno bend
2626 * +BBBBBBBBBBBBBBBBB+
2627 * +---------+
2628 * fbno fend
2629 */
2630
2631 /*
2632 * We would have to split the busy extent to be able to track
2633 * it correct, which we cannot do because we would have to
2634 * modify the list of busy extents attached to the transaction
2635 * or CIL context, which is immutable.
2636 *
2637 * Force out the log to clear the busy extent and retry the
2638 * search.
2639 */
2640 goto out_force_log;
2641 } else if (bbno >= fbno && bend <= fend) {
2642 /*
2643 * Case 2:
2644 * bbno bend
2645 * +BBBBBBBBBBBBBBBBB+
2646 * +-----------------+
2647 * fbno fend
2648 *
2649 * Case 3:
2650 * bbno bend
2651 * +BBBBBBBBBBBBBBBBB+
2652 * +--------------------------+
2653 * fbno fend
2654 *
2655 * Case 4:
2656 * bbno bend
2657 * +BBBBBBBBBBBBBBBBB+
2658 * +--------------------------+
2659 * fbno fend
2660 *
2661 * Case 5:
2662 * bbno bend
2663 * +BBBBBBBBBBBBBBBBB+
2664 * +-----------------------------------+
2665 * fbno fend
2666 *
2667 */
2668
2669 /*
2670 * The busy extent is fully covered by the extent we are
2671 * allocating, and can simply be removed from the rbtree.
2672 * However we cannot remove it from the immutable list
2673 * tracking busy extents in the transaction or CIL context,
2674 * so set the length to zero to mark it invalid.
2675 *
2676 * We also need to restart the busy extent search from the
2677 * tree root, because erasing the node can rearrange the
2678 * tree topology.
2679 */
2680 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2681 busyp->length = 0;
2682 return false;
2683 } else if (fend < bend) {
2684 /*
2685 * Case 6:
2686 * bbno bend
2687 * +BBBBBBBBBBBBBBBBB+
2688 * +---------+
2689 * fbno fend
2690 *
2691 * Case 7:
2692 * bbno bend
2693 * +BBBBBBBBBBBBBBBBB+
2694 * +------------------+
2695 * fbno fend
2696 *
2697 */
2698 busyp->bno = fend;
2699 } else if (bbno < fbno) {
2700 /*
2701 * Case 8:
2702 * bbno bend
2703 * +BBBBBBBBBBBBBBBBB+
2704 * +-------------+
2705 * fbno fend
2706 *
2707 * Case 9:
2708 * bbno bend
2709 * +BBBBBBBBBBBBBBBBB+
2710 * +----------------------+
2711 * fbno fend
2712 */
2713 busyp->length = fbno - busyp->bno;
2714 } else {
2715 ASSERT(0);
2716 }
2717
2718 trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen);
2719 return true;
2720
2721out_force_log:
2722 spin_unlock(&pag->pagb_lock);
2723 xfs_log_force(mp, XFS_LOG_SYNC);
2724 trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen);
2725 spin_lock(&pag->pagb_lock);
2726 return false;
2727}
2728
2729
2730/*
2731 * For a given extent [fbno, flen], make sure we can reuse it safely.
2732 */
2676void 2733void
2677xfs_alloc_busy_clear( 2734xfs_alloc_busy_reuse(
2678 struct xfs_mount *mp, 2735 struct xfs_mount *mp,
2679 struct xfs_busy_extent *busyp) 2736 xfs_agnumber_t agno,
2737 xfs_agblock_t fbno,
2738 xfs_extlen_t flen,
2739 bool userdata)
2680{ 2740{
2681 struct xfs_perag *pag; 2741 struct xfs_perag *pag;
2742 struct rb_node *rbp;
2682 2743
2683 trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno, 2744 ASSERT(flen > 0);
2684 busyp->length);
2685 2745
2686 ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno, 2746 pag = xfs_perag_get(mp, agno);
2687 busyp->length) == 1); 2747 spin_lock(&pag->pagb_lock);
2748restart:
2749 rbp = pag->pagb_tree.rb_node;
2750 while (rbp) {
2751 struct xfs_busy_extent *busyp =
2752 rb_entry(rbp, struct xfs_busy_extent, rb_node);
2753 xfs_agblock_t bbno = busyp->bno;
2754 xfs_agblock_t bend = bbno + busyp->length;
2688 2755
2689 list_del_init(&busyp->list); 2756 if (fbno + flen <= bbno) {
2757 rbp = rbp->rb_left;
2758 continue;
2759 } else if (fbno >= bend) {
2760 rbp = rbp->rb_right;
2761 continue;
2762 }
2690 2763
2691 pag = xfs_perag_get(mp, busyp->agno); 2764 if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen,
2692 spin_lock(&pag->pagb_lock); 2765 userdata))
2693 rb_erase(&busyp->rb_node, &pag->pagb_tree); 2766 goto restart;
2767 }
2694 spin_unlock(&pag->pagb_lock); 2768 spin_unlock(&pag->pagb_lock);
2695 xfs_perag_put(pag); 2769 xfs_perag_put(pag);
2770}
2771
2772/*
2773 * For a given extent [fbno, flen], search the busy extent list to find a
2774 * subset of the extent that is not busy. If *rlen is smaller than
2775 * args->minlen no suitable extent could be found, and the higher level
2776 * code needs to force out the log and retry the allocation.
2777 */
2778STATIC void
2779xfs_alloc_busy_trim(
2780 struct xfs_alloc_arg *args,
2781 xfs_agblock_t bno,
2782 xfs_extlen_t len,
2783 xfs_agblock_t *rbno,
2784 xfs_extlen_t *rlen)
2785{
2786 xfs_agblock_t fbno;
2787 xfs_extlen_t flen;
2788 struct rb_node *rbp;
2789
2790 ASSERT(len > 0);
2696 2791
2792 spin_lock(&args->pag->pagb_lock);
2793restart:
2794 fbno = bno;
2795 flen = len;
2796 rbp = args->pag->pagb_tree.rb_node;
2797 while (rbp && flen >= args->minlen) {
2798 struct xfs_busy_extent *busyp =
2799 rb_entry(rbp, struct xfs_busy_extent, rb_node);
2800 xfs_agblock_t fend = fbno + flen;
2801 xfs_agblock_t bbno = busyp->bno;
2802 xfs_agblock_t bend = bbno + busyp->length;
2803
2804 if (fend <= bbno) {
2805 rbp = rbp->rb_left;
2806 continue;
2807 } else if (fbno >= bend) {
2808 rbp = rbp->rb_right;
2809 continue;
2810 }
2811
2812 /*
2813 * If this is a metadata allocation, try to reuse the busy
2814 * extent instead of trimming the allocation.
2815 */
2816 if (!args->userdata) {
2817 if (!xfs_alloc_busy_update_extent(args->mp, args->pag,
2818 busyp, fbno, flen,
2819 false))
2820 goto restart;
2821 continue;
2822 }
2823
2824 if (bbno <= fbno) {
2825 /* start overlap */
2826
2827 /*
2828 * Case 1:
2829 * bbno bend
2830 * +BBBBBBBBBBBBBBBBB+
2831 * +---------+
2832 * fbno fend
2833 *
2834 * Case 2:
2835 * bbno bend
2836 * +BBBBBBBBBBBBBBBBB+
2837 * +-------------+
2838 * fbno fend
2839 *
2840 * Case 3:
2841 * bbno bend
2842 * +BBBBBBBBBBBBBBBBB+
2843 * +-------------+
2844 * fbno fend
2845 *
2846 * Case 4:
2847 * bbno bend
2848 * +BBBBBBBBBBBBBBBBB+
2849 * +-----------------+
2850 * fbno fend
2851 *
2852 * No unbusy region in extent, return failure.
2853 */
2854 if (fend <= bend)
2855 goto fail;
2856
2857 /*
2858 * Case 5:
2859 * bbno bend
2860 * +BBBBBBBBBBBBBBBBB+
2861 * +----------------------+
2862 * fbno fend
2863 *
2864 * Case 6:
2865 * bbno bend
2866 * +BBBBBBBBBBBBBBBBB+
2867 * +--------------------------+
2868 * fbno fend
2869 *
2870 * Needs to be trimmed to:
2871 * +-------+
2872 * fbno fend
2873 */
2874 fbno = bend;
2875 } else if (bend >= fend) {
2876 /* end overlap */
2877
2878 /*
2879 * Case 7:
2880 * bbno bend
2881 * +BBBBBBBBBBBBBBBBB+
2882 * +------------------+
2883 * fbno fend
2884 *
2885 * Case 8:
2886 * bbno bend
2887 * +BBBBBBBBBBBBBBBBB+
2888 * +--------------------------+
2889 * fbno fend
2890 *
2891 * Needs to be trimmed to:
2892 * +-------+
2893 * fbno fend
2894 */
2895 fend = bbno;
2896 } else {
2897 /* middle overlap */
2898
2899 /*
2900 * Case 9:
2901 * bbno bend
2902 * +BBBBBBBBBBBBBBBBB+
2903 * +-----------------------------------+
2904 * fbno fend
2905 *
2906 * Can be trimmed to:
2907 * +-------+ OR +-------+
2908 * fbno fend fbno fend
2909 *
2910 * Backward allocation leads to significant
2911 * fragmentation of directories, which degrades
2912 * directory performance, therefore we always want to
2913 * choose the option that produces forward allocation
2914 * patterns.
2915 * Preferring the lower bno extent will make the next
2916 * request use "fend" as the start of the next
2917 * allocation; if the segment is no longer busy at
2918 * that point, we'll get a contiguous allocation, but
2919 * even if it is still busy, we will get a forward
2920 * allocation.
2921 * We try to avoid choosing the segment at "bend",
2922 * because that can lead to the next allocation
2923 * taking the segment at "fbno", which would be a
2924 * backward allocation. We only use the segment at
2925 * "fbno" if it is much larger than the current
2926 * requested size, because in that case there's a
2927 * good chance subsequent allocations will be
2928 * contiguous.
2929 */
2930 if (bbno - fbno >= args->maxlen) {
2931 /* left candidate fits perfect */
2932 fend = bbno;
2933 } else if (fend - bend >= args->maxlen * 4) {
2934 /* right candidate has enough free space */
2935 fbno = bend;
2936 } else if (bbno - fbno >= args->minlen) {
2937 /* left candidate fits minimum requirement */
2938 fend = bbno;
2939 } else {
2940 goto fail;
2941 }
2942 }
2943
2944 flen = fend - fbno;
2945 }
2946 spin_unlock(&args->pag->pagb_lock);
2947
2948 if (fbno != bno || flen != len) {
2949 trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len,
2950 fbno, flen);
2951 }
2952 *rbno = fbno;
2953 *rlen = flen;
2954 return;
2955fail:
2956 /*
2957 * Return a zero extent length as failure indications. All callers
2958 * re-check if the trimmed extent satisfies the minlen requirement.
2959 */
2960 spin_unlock(&args->pag->pagb_lock);
2961 trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
2962 *rbno = fbno;
2963 *rlen = 0;
2964}
2965
2966static void
2967xfs_alloc_busy_clear_one(
2968 struct xfs_mount *mp,
2969 struct xfs_perag *pag,
2970 struct xfs_busy_extent *busyp)
2971{
2972 if (busyp->length) {
2973 trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno,
2974 busyp->length);
2975 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2976 }
2977
2978 list_del_init(&busyp->list);
2697 kmem_free(busyp); 2979 kmem_free(busyp);
2698} 2980}
2981
2982void
2983xfs_alloc_busy_clear(
2984 struct xfs_mount *mp,
2985 struct list_head *list)
2986{
2987 struct xfs_busy_extent *busyp, *n;
2988 struct xfs_perag *pag = NULL;
2989 xfs_agnumber_t agno = NULLAGNUMBER;
2990
2991 list_for_each_entry_safe(busyp, n, list, list) {
2992 if (busyp->agno != agno) {
2993 if (pag) {
2994 spin_unlock(&pag->pagb_lock);
2995 xfs_perag_put(pag);
2996 }
2997 pag = xfs_perag_get(mp, busyp->agno);
2998 spin_lock(&pag->pagb_lock);
2999 agno = busyp->agno;
3000 }
3001
3002 xfs_alloc_busy_clear_one(mp, pag, busyp);
3003 }
3004
3005 if (pag) {
3006 spin_unlock(&pag->pagb_lock);
3007 xfs_perag_put(pag);
3008 }
3009}
3010
3011/*
3012 * Callback for list_sort to sort busy extents by the AG they reside in.
3013 */
3014int
3015xfs_busy_extent_ag_cmp(
3016 void *priv,
3017 struct list_head *a,
3018 struct list_head *b)
3019{
3020 return container_of(a, struct xfs_busy_extent, list)->agno -
3021 container_of(b, struct xfs_busy_extent, list)->agno;
3022}
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index d0b3bc72005b..240ad288f2f9 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -140,11 +140,24 @@ xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno,
140 xfs_agblock_t bno, xfs_extlen_t len); 140 xfs_agblock_t bno, xfs_extlen_t len);
141 141
142void 142void
143xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); 143xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list);
144 144
145int 145int
146xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, 146xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
147 xfs_agblock_t bno, xfs_extlen_t len); 147 xfs_agblock_t bno, xfs_extlen_t len);
148
149void
150xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
151 xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
152
153int
154xfs_busy_extent_ag_cmp(void *priv, struct list_head *a, struct list_head *b);
155
156static inline void xfs_alloc_busy_sort(struct list_head *list)
157{
158 list_sort(NULL, list, xfs_busy_extent_ag_cmp);
159}
160
148#endif /* __KERNEL__ */ 161#endif /* __KERNEL__ */
149 162
150/* 163/*
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 3916925e2584..8b469d53599f 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -95,6 +95,8 @@ xfs_allocbt_alloc_block(
95 return 0; 95 return 0;
96 } 96 }
97 97
98 xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
99
98 xfs_trans_agbtree_delta(cur->bc_tp, 1); 100 xfs_trans_agbtree_delta(cur->bc_tp, 1);
99 new->s = cpu_to_be32(bno); 101 new->s = cpu_to_be32(bno);
100 102
@@ -118,17 +120,6 @@ xfs_allocbt_free_block(
118 if (error) 120 if (error)
119 return error; 121 return error;
120 122
121 /*
122 * Since blocks move to the free list without the coordination used in
123 * xfs_bmap_finish, we can't allow block to be available for
124 * reallocation and non-transaction writing (user data) until we know
125 * that the transaction that moved it to the free list is permanently
126 * on disk. We track the blocks by declaring these blocks as "busy";
127 * the busy list is maintained on a per-ag basis and each transaction
128 * records which entries should be removed when the iclog commits to
129 * disk. If a busy block is allocated, the iclog is pushed up to the
130 * LSN that freed the block.
131 */
132 xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); 123 xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
133 xfs_trans_agbtree_delta(cur->bc_tp, -1); 124 xfs_trans_agbtree_delta(cur->bc_tp, -1);
134 return 0; 125 return 0;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index be628677c288..9a84a85c03b1 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -202,7 +202,7 @@ xfs_swap_extents(
202 xfs_inode_t *tip, /* tmp inode */ 202 xfs_inode_t *tip, /* tmp inode */
203 xfs_swapext_t *sxp) 203 xfs_swapext_t *sxp)
204{ 204{
205 xfs_mount_t *mp; 205 xfs_mount_t *mp = ip->i_mount;
206 xfs_trans_t *tp; 206 xfs_trans_t *tp;
207 xfs_bstat_t *sbp = &sxp->sx_stat; 207 xfs_bstat_t *sbp = &sxp->sx_stat;
208 xfs_ifork_t *tempifp, *ifp, *tifp; 208 xfs_ifork_t *tempifp, *ifp, *tifp;
@@ -212,16 +212,12 @@ xfs_swap_extents(
212 int taforkblks = 0; 212 int taforkblks = 0;
213 __uint64_t tmp; 213 __uint64_t tmp;
214 214
215 mp = ip->i_mount;
216
217 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); 215 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
218 if (!tempifp) { 216 if (!tempifp) {
219 error = XFS_ERROR(ENOMEM); 217 error = XFS_ERROR(ENOMEM);
220 goto out; 218 goto out;
221 } 219 }
222 220
223 sbp = &sxp->sx_stat;
224
225 /* 221 /*
226 * we have to do two separate lock calls here to keep lockdep 222 * we have to do two separate lock calls here to keep lockdep
227 * happy. If we try to get all the locks in one call, lock will 223 * happy. If we try to get all the locks in one call, lock will
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a37480a6e023..c8e3349c287c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1354,7 +1354,7 @@ xfs_itruncate_start(
1354 return 0; 1354 return 0;
1355 } 1355 }
1356 last_byte = xfs_file_last_byte(ip); 1356 last_byte = xfs_file_last_byte(ip);
1357 trace_xfs_itruncate_start(ip, flags, new_size, toss_start, last_byte); 1357 trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte);
1358 if (last_byte > toss_start) { 1358 if (last_byte > toss_start) {
1359 if (flags & XFS_ITRUNC_DEFINITE) { 1359 if (flags & XFS_ITRUNC_DEFINITE) {
1360 xfs_tosspages(ip, toss_start, 1360 xfs_tosspages(ip, toss_start,
@@ -1470,7 +1470,7 @@ xfs_itruncate_finish(
1470 * file but the log buffers containing the free and reallocation 1470 * file but the log buffers containing the free and reallocation
1471 * don't, then we'd end up with garbage in the blocks being freed. 1471 * don't, then we'd end up with garbage in the blocks being freed.
1472 * As long as we make the new_size permanent before actually 1472 * As long as we make the new_size permanent before actually
1473 * freeing any blocks it doesn't matter if they get writtten to. 1473 * freeing any blocks it doesn't matter if they get written to.
1474 * 1474 *
1475 * The callers must signal into us whether or not the size 1475 * The callers must signal into us whether or not the size
1476 * setting here must be synchronous. There are a few cases 1476 * setting here must be synchronous. There are a few cases
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 576fdfe81d60..09983a3344a5 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -970,7 +970,6 @@ xfs_iflush_abort(
970{ 970{
971 xfs_inode_log_item_t *iip = ip->i_itemp; 971 xfs_inode_log_item_t *iip = ip->i_itemp;
972 972
973 iip = ip->i_itemp;
974 if (iip) { 973 if (iip) {
975 struct xfs_ail *ailp = iip->ili_item.li_ailp; 974 struct xfs_ail *ailp = iip->ili_item.li_ailp;
976 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { 975 if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index b612ce4520ae..211930246f20 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1449,6 +1449,13 @@ xlog_dealloc_log(xlog_t *log)
1449 1449
1450 xlog_cil_destroy(log); 1450 xlog_cil_destroy(log);
1451 1451
1452 /*
1453 * always need to ensure that the extra buffer does not point to memory
1454 * owned by another log buffer before we free it.
1455 */
1456 xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size);
1457 xfs_buf_free(log->l_xbuf);
1458
1452 iclog = log->l_iclog; 1459 iclog = log->l_iclog;
1453 for (i=0; i<log->l_iclog_bufs; i++) { 1460 for (i=0; i<log->l_iclog_bufs; i++) {
1454 xfs_buf_free(iclog->ic_bp); 1461 xfs_buf_free(iclog->ic_bp);
@@ -1458,7 +1465,6 @@ xlog_dealloc_log(xlog_t *log)
1458 } 1465 }
1459 spinlock_destroy(&log->l_icloglock); 1466 spinlock_destroy(&log->l_icloglock);
1460 1467
1461 xfs_buf_free(log->l_xbuf);
1462 log->l_mp->m_log = NULL; 1468 log->l_mp->m_log = NULL;
1463 kmem_free(log); 1469 kmem_free(log);
1464} /* xlog_dealloc_log */ 1470} /* xlog_dealloc_log */
@@ -3248,13 +3254,6 @@ xfs_log_ticket_get(
3248 return ticket; 3254 return ticket;
3249} 3255}
3250 3256
3251xlog_tid_t
3252xfs_log_get_trans_ident(
3253 struct xfs_trans *tp)
3254{
3255 return tp->t_ticket->t_tid;
3256}
3257
3258/* 3257/*
3259 * Allocate and initialise a new log ticket. 3258 * Allocate and initialise a new log ticket.
3260 */ 3259 */
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 3bd3291ef8d2..78c9039994af 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -189,8 +189,6 @@ void xlog_iodone(struct xfs_buf *);
189struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); 189struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
190void xfs_log_ticket_put(struct xlog_ticket *ticket); 190void xfs_log_ticket_put(struct xlog_ticket *ticket);
191 191
192xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
193
194void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 192void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
195 struct xfs_log_vec *log_vector, 193 struct xfs_log_vec *log_vector,
196 xfs_lsn_t *commit_lsn, int flags); 194 xfs_lsn_t *commit_lsn, int flags);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 9ca59be08977..7d56e88a3f0e 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -361,13 +361,12 @@ xlog_cil_committed(
361 int abort) 361 int abort)
362{ 362{
363 struct xfs_cil_ctx *ctx = args; 363 struct xfs_cil_ctx *ctx = args;
364 struct xfs_busy_extent *busyp, *n;
365 364
366 xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, 365 xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
367 ctx->start_lsn, abort); 366 ctx->start_lsn, abort);
368 367
369 list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) 368 xfs_alloc_busy_sort(&ctx->busy_extents);
370 xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); 369 xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, &ctx->busy_extents);
371 370
372 spin_lock(&ctx->cil->xc_cil_lock); 371 spin_lock(&ctx->cil->xc_cil_lock);
373 list_del(&ctx->committing); 372 list_del(&ctx->committing);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 5864850e9e34..2d3b6a498d63 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -146,6 +146,8 @@ static inline uint xlog_get_client_id(__be32 i)
146 shutdown */ 146 shutdown */
147#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ 147#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */
148 148
149typedef __uint32_t xlog_tid_t;
150
149#ifdef __KERNEL__ 151#ifdef __KERNEL__
150/* 152/*
151 * Below are states for covering allocation transactions. 153 * Below are states for covering allocation transactions.
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 5cc464a17c93..04142caedb2b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -205,6 +205,35 @@ xlog_bread(
205} 205}
206 206
207/* 207/*
208 * Read at an offset into the buffer. Returns with the buffer in it's original
209 * state regardless of the result of the read.
210 */
211STATIC int
212xlog_bread_offset(
213 xlog_t *log,
214 xfs_daddr_t blk_no, /* block to read from */
215 int nbblks, /* blocks to read */
216 xfs_buf_t *bp,
217 xfs_caddr_t offset)
218{
219 xfs_caddr_t orig_offset = XFS_BUF_PTR(bp);
220 int orig_len = bp->b_buffer_length;
221 int error, error2;
222
223 error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks));
224 if (error)
225 return error;
226
227 error = xlog_bread_noalign(log, blk_no, nbblks, bp);
228
229 /* must reset buffer pointer even on error */
230 error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len);
231 if (error)
232 return error;
233 return error2;
234}
235
236/*
208 * Write out the buffer at the given block for the given number of blocks. 237 * Write out the buffer at the given block for the given number of blocks.
209 * The buffer is kept locked across the write and is returned locked. 238 * The buffer is kept locked across the write and is returned locked.
210 * This can only be used for synchronous log writes. 239 * This can only be used for synchronous log writes.
@@ -1229,20 +1258,12 @@ xlog_write_log_records(
1229 */ 1258 */
1230 ealign = round_down(end_block, sectbb); 1259 ealign = round_down(end_block, sectbb);
1231 if (j == 0 && (start_block + endcount > ealign)) { 1260 if (j == 0 && (start_block + endcount > ealign)) {
1232 offset = XFS_BUF_PTR(bp); 1261 offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block);
1233 balign = BBTOB(ealign - start_block); 1262 error = xlog_bread_offset(log, ealign, sectbb,
1234 error = XFS_BUF_SET_PTR(bp, offset + balign, 1263 bp, offset);
1235 BBTOB(sectbb));
1236 if (error) 1264 if (error)
1237 break; 1265 break;
1238 1266
1239 error = xlog_bread_noalign(log, ealign, sectbb, bp);
1240 if (error)
1241 break;
1242
1243 error = XFS_BUF_SET_PTR(bp, offset, bufblks);
1244 if (error)
1245 break;
1246 } 1267 }
1247 1268
1248 offset = xlog_align(log, start_block, endcount, bp); 1269 offset = xlog_align(log, start_block, endcount, bp);
@@ -3448,19 +3469,9 @@ xlog_do_recovery_pass(
3448 * - order is important. 3469 * - order is important.
3449 */ 3470 */
3450 wrapped_hblks = hblks - split_hblks; 3471 wrapped_hblks = hblks - split_hblks;
3451 error = XFS_BUF_SET_PTR(hbp, 3472 error = xlog_bread_offset(log, 0,
3452 offset + BBTOB(split_hblks), 3473 wrapped_hblks, hbp,
3453 BBTOB(hblks - split_hblks)); 3474 offset + BBTOB(split_hblks));
3454 if (error)
3455 goto bread_err2;
3456
3457 error = xlog_bread_noalign(log, 0,
3458 wrapped_hblks, hbp);
3459 if (error)
3460 goto bread_err2;
3461
3462 error = XFS_BUF_SET_PTR(hbp, offset,
3463 BBTOB(hblks));
3464 if (error) 3475 if (error)
3465 goto bread_err2; 3476 goto bread_err2;
3466 } 3477 }
@@ -3511,19 +3522,9 @@ xlog_do_recovery_pass(
3511 * _first_, then the log start (LR header end) 3522 * _first_, then the log start (LR header end)
3512 * - order is important. 3523 * - order is important.
3513 */ 3524 */
3514 error = XFS_BUF_SET_PTR(dbp, 3525 error = xlog_bread_offset(log, 0,
3515 offset + BBTOB(split_bblks), 3526 bblks - split_bblks, hbp,
3516 BBTOB(bblks - split_bblks)); 3527 offset + BBTOB(split_bblks));
3517 if (error)
3518 goto bread_err2;
3519
3520 error = xlog_bread_noalign(log, wrapped_hblks,
3521 bblks - split_bblks,
3522 dbp);
3523 if (error)
3524 goto bread_err2;
3525
3526 error = XFS_BUF_SET_PTR(dbp, offset, h_size);
3527 if (error) 3528 if (error)
3528 goto bread_err2; 3529 goto bread_err2;
3529 } 3530 }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index bb3f9a7b24ed..b49b82363d20 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1900,7 +1900,7 @@ xfs_mod_incore_sb_batch(
1900 uint nmsb, 1900 uint nmsb,
1901 int rsvd) 1901 int rsvd)
1902{ 1902{
1903 xfs_mod_sb_t *msbp = &msb[0]; 1903 xfs_mod_sb_t *msbp;
1904 int error = 0; 1904 int error = 0;
1905 1905
1906 /* 1906 /*
@@ -1910,7 +1910,7 @@ xfs_mod_incore_sb_batch(
1910 * changes will be atomic. 1910 * changes will be atomic.
1911 */ 1911 */
1912 spin_lock(&mp->m_sb_lock); 1912 spin_lock(&mp->m_sb_lock);
1913 for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { 1913 for (msbp = msb; msbp < (msb + nmsb); msbp++) {
1914 ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || 1914 ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
1915 msbp->msb_field > XFS_SBS_FDBLOCKS); 1915 msbp->msb_field > XFS_SBS_FDBLOCKS);
1916 1916
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 76922793f64f..d1f24858ccc4 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -608,10 +608,8 @@ STATIC void
608xfs_trans_free( 608xfs_trans_free(
609 struct xfs_trans *tp) 609 struct xfs_trans *tp)
610{ 610{
611 struct xfs_busy_extent *busyp, *n; 611 xfs_alloc_busy_sort(&tp->t_busy);
612 612 xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy);
613 list_for_each_entry_safe(busyp, n, &tp->t_busy, list)
614 xfs_alloc_busy_clear(tp->t_mountp, busyp);
615 613
616 atomic_dec(&tp->t_mountp->m_active_trans); 614 atomic_dec(&tp->t_mountp->m_active_trans);
617 xfs_trans_free_dqinfo(tp); 615 xfs_trans_free_dqinfo(tp);
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 26d1867d8156..65584b55607d 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -73,8 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */
73typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ 73typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
74typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ 74typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */
75 75
76typedef __uint32_t xlog_tid_t; /* transaction ID type */
77
78/* 76/*
79 * These types are 64 bits on disk but are either 32 or 64 bits in memory. 77 * These types are 64 bits on disk but are either 32 or 64 bits in memory.
80 * Disk based types: 78 * Disk based types: