aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-12-18 15:54:49 -0500
committerIngo Molnar <mingo@elte.hu>2008-12-18 15:54:49 -0500
commitd110ec3a1e1f522e2e9dfceb9c36d6590c26d2d4 (patch)
tree86b2f8f1d22b74b05239525c55bd42e3db6afc03 /fs
parent343e9099c8152daff20e10d6269edec21da44fc0 (diff)
parent55dac3a5553b13891f0ae4bbd11920619b5436d4 (diff)
Merge branch 'linus' into core/rcu
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile2
-rw-r--r--fs/autofs4/dev-ioctl.c5
-rw-r--r--fs/autofs4/expire.c19
-rw-r--r--fs/block_dev.c44
-rw-r--r--fs/buffer.c1
-rw-r--r--fs/cifs/CHANGES10
-rw-r--r--fs/cifs/cifs_debug.c277
-rw-r--r--fs/cifs/cifs_dfs_ref.c71
-rw-r--r--fs/cifs/cifs_spnego.c4
-rw-r--r--fs/cifs/cifsfs.c30
-rw-r--r--fs/cifs/cifsglob.h51
-rw-r--r--fs/cifs/cifsproto.h2
-rw-r--r--fs/cifs/cifssmb.c137
-rw-r--r--fs/cifs/connect.c873
-rw-r--r--fs/cifs/file.c107
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/cifs/misc.c93
-rw-r--r--fs/cifs/readdir.c5
-rw-r--r--fs/cifs/transport.c48
-rw-r--r--fs/dlm/lockspace.c2
-rw-r--r--fs/ecryptfs/keystore.c31
-rw-r--r--fs/eventpoll.c85
-rw-r--r--fs/exec.c10
-rw-r--r--fs/exportfs/expfs.c4
-rw-r--r--fs/ext3/super.c18
-rw-r--r--fs/ext4/balloc.c4
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c7
-rw-r--r--fs/ext4/mballoc.c1
-rw-r--r--fs/ext4/super.c24
-rw-r--r--fs/fat/Makefile6
-rw-r--r--fs/fat/cache.c25
-rw-r--r--fs/fat/dir.c20
-rw-r--r--fs/fat/fat.h329
-rw-r--r--fs/fat/fatent.c24
-rw-r--r--fs/fat/file.c49
-rw-r--r--fs/fat/inode.c131
-rw-r--r--fs/fat/misc.c155
-rw-r--r--fs/fat/namei_msdos.c (renamed from fs/msdos/namei.c)42
-rw-r--r--fs/fat/namei_vfat.c (renamed from fs/vfat/namei.c)161
-rw-r--r--fs/fcntl.c7
-rw-r--r--fs/hostfs/hostfs.h2
-rw-r--r--fs/hostfs/hostfs_kern.c4
-rw-r--r--fs/hostfs/hostfs_user.c2
-rw-r--r--fs/inotify.c152
-rw-r--r--fs/ioctl.c12
-rw-r--r--fs/jbd/checkpoint.c31
-rw-r--r--fs/jbd2/checkpoint.c32
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jffs2/background.c10
-rw-r--r--fs/jffs2/compr_lzo.c15
-rw-r--r--fs/jffs2/nodemgmt.c2
-rw-r--r--fs/lockd/host.c3
-rw-r--r--fs/lockd/svc.c1
-rw-r--r--fs/msdos/Makefile7
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c4
-rw-r--r--fs/nfsd/nfs4recover.c2
-rw-r--r--fs/nfsd/nfs4state.c1
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/ntfs/debug.h8
-rw-r--r--fs/ocfs2/buffer_head_io.c15
-rw-r--r--fs/ocfs2/dlm/dlmfs.c4
-rw-r--r--fs/ocfs2/dlm/userdlm.h2
-rw-r--r--fs/ocfs2/dlmglue.c3
-rw-r--r--fs/ocfs2/file.c27
-rw-r--r--fs/ocfs2/inode.c6
-rw-r--r--fs/ocfs2/journal.c1
-rw-r--r--fs/ocfs2/mmap.c6
-rw-r--r--fs/ocfs2/namei.c8
-rw-r--r--fs/ocfs2/ocfs2.h5
-rw-r--r--fs/ocfs2/ocfs2_fs.h25
-rw-r--r--fs/ocfs2/stack_user.c3
-rw-r--r--fs/ocfs2/xattr.c376
-rw-r--r--fs/ocfs2/xattr.h38
-rw-r--r--fs/partitions/check.c31
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/proc/proc_sysctl.c1
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/proc/uptime.c38
-rw-r--r--fs/ubifs/commit.c4
-rw-r--r--fs/ubifs/debug.c66
-rw-r--r--fs/ubifs/dir.c5
-rw-r--r--fs/ubifs/file.c91
-rw-r--r--fs/ubifs/journal.c8
-rw-r--r--fs/ubifs/key.h4
-rw-r--r--fs/ubifs/lpt_commit.c2
-rw-r--r--fs/ubifs/orphan.c28
-rw-r--r--fs/ubifs/recovery.c17
-rw-r--r--fs/ubifs/replay.c2
-rw-r--r--fs/ubifs/sb.c9
-rw-r--r--fs/ubifs/super.c70
-rw-r--r--fs/ubifs/tnc.c12
-rw-r--r--fs/ubifs/ubifs.h12
-rw-r--r--fs/udf/inode.c1
-rw-r--r--fs/vfat/Makefile7
-rw-r--r--fs/xfs/xfs_da_btree.c5
-rw-r--r--fs/xfs/xfs_dir2.c6
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_log.c39
-rw-r--r--fs/xfs/xfs_log_recover.c8
-rw-r--r--fs/xfs/xfs_mount.c5
-rw-r--r--fs/xfs/xfs_rename.c2
103 files changed, 2676 insertions, 1541 deletions
diff --git a/fs/Makefile b/fs/Makefile
index 2168c902d5ca..d9f8afe6f0c4 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -81,8 +81,6 @@ obj-$(CONFIG_HUGETLBFS) += hugetlbfs/
81obj-$(CONFIG_CODA_FS) += coda/ 81obj-$(CONFIG_CODA_FS) += coda/
82obj-$(CONFIG_MINIX_FS) += minix/ 82obj-$(CONFIG_MINIX_FS) += minix/
83obj-$(CONFIG_FAT_FS) += fat/ 83obj-$(CONFIG_FAT_FS) += fat/
84obj-$(CONFIG_MSDOS_FS) += msdos/
85obj-$(CONFIG_VFAT_FS) += vfat/
86obj-$(CONFIG_BFS_FS) += bfs/ 84obj-$(CONFIG_BFS_FS) += bfs/
87obj-$(CONFIG_ISO9660_FS) += isofs/ 85obj-$(CONFIG_ISO9660_FS) += isofs/
88obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ 86obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 625abf5422e2..33bf8cbfd051 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -128,9 +128,10 @@ static inline void free_dev_ioctl(struct autofs_dev_ioctl *param)
128 */ 128 */
129static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) 129static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
130{ 130{
131 int err = -EINVAL; 131 int err;
132 132
133 if (check_dev_ioctl_version(cmd, param)) { 133 err = check_dev_ioctl_version(cmd, param);
134 if (err) {
134 AUTOFS_WARN("invalid device control module version " 135 AUTOFS_WARN("invalid device control module version "
135 "supplied for cmd(0x%08x)", cmd); 136 "supplied for cmd(0x%08x)", cmd);
136 goto out; 137 goto out;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index cde2f8e8935a..4b6fb3f628c0 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -56,12 +56,23 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
56 mntget(mnt); 56 mntget(mnt);
57 dget(dentry); 57 dget(dentry);
58 58
59 if (!autofs4_follow_mount(&mnt, &dentry)) 59 if (!follow_down(&mnt, &dentry))
60 goto done; 60 goto done;
61 61
62 /* This is an autofs submount, we can't expire it */ 62 if (is_autofs4_dentry(dentry)) {
63 if (is_autofs4_dentry(dentry)) 63 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
64 goto done; 64
65 /* This is an autofs submount, we can't expire it */
66 if (sbi->type == AUTOFS_TYPE_INDIRECT)
67 goto done;
68
69 /*
70 * Otherwise it's an offset mount and we need to check
71 * if we can umount its mount, if there is one.
72 */
73 if (!d_mountpoint(dentry))
74 goto done;
75 }
65 76
66 /* Update the expiry counter if fs is busy */ 77 /* Update the expiry counter if fs is busy */
67 if (!may_umount_tree(mnt)) { 78 if (!may_umount_tree(mnt)) {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 88a776fa0ef6..99e0ae1a4c78 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -986,7 +986,6 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
986static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) 986static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
987{ 987{
988 struct gendisk *disk; 988 struct gendisk *disk;
989 struct hd_struct *part = NULL;
990 int ret; 989 int ret;
991 int partno; 990 int partno;
992 int perm = 0; 991 int perm = 0;
@@ -1004,24 +1003,25 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1004 return ret; 1003 return ret;
1005 } 1004 }
1006 1005
1007 ret = -ENXIO;
1008
1009 lock_kernel(); 1006 lock_kernel();
1010 1007
1008 ret = -ENXIO;
1011 disk = get_gendisk(bdev->bd_dev, &partno); 1009 disk = get_gendisk(bdev->bd_dev, &partno);
1012 if (!disk) 1010 if (!disk)
1013 goto out_unlock_kernel; 1011 goto out_unlock_kernel;
1014 part = disk_get_part(disk, partno);
1015 if (!part)
1016 goto out_unlock_kernel;
1017 1012
1018 mutex_lock_nested(&bdev->bd_mutex, for_part); 1013 mutex_lock_nested(&bdev->bd_mutex, for_part);
1019 if (!bdev->bd_openers) { 1014 if (!bdev->bd_openers) {
1020 bdev->bd_disk = disk; 1015 bdev->bd_disk = disk;
1021 bdev->bd_part = part;
1022 bdev->bd_contains = bdev; 1016 bdev->bd_contains = bdev;
1023 if (!partno) { 1017 if (!partno) {
1024 struct backing_dev_info *bdi; 1018 struct backing_dev_info *bdi;
1019
1020 ret = -ENXIO;
1021 bdev->bd_part = disk_get_part(disk, partno);
1022 if (!bdev->bd_part)
1023 goto out_clear;
1024
1025 if (disk->fops->open) { 1025 if (disk->fops->open) {
1026 ret = disk->fops->open(bdev, mode); 1026 ret = disk->fops->open(bdev, mode);
1027 if (ret) 1027 if (ret)
@@ -1049,18 +1049,17 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1049 bdev->bd_contains = whole; 1049 bdev->bd_contains = whole;
1050 bdev->bd_inode->i_data.backing_dev_info = 1050 bdev->bd_inode->i_data.backing_dev_info =
1051 whole->bd_inode->i_data.backing_dev_info; 1051 whole->bd_inode->i_data.backing_dev_info;
1052 bdev->bd_part = disk_get_part(disk, partno);
1052 if (!(disk->flags & GENHD_FL_UP) || 1053 if (!(disk->flags & GENHD_FL_UP) ||
1053 !part || !part->nr_sects) { 1054 !bdev->bd_part || !bdev->bd_part->nr_sects) {
1054 ret = -ENXIO; 1055 ret = -ENXIO;
1055 goto out_clear; 1056 goto out_clear;
1056 } 1057 }
1057 bd_set_size(bdev, (loff_t)part->nr_sects << 9); 1058 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1058 } 1059 }
1059 } else { 1060 } else {
1060 disk_put_part(part);
1061 put_disk(disk); 1061 put_disk(disk);
1062 module_put(disk->fops->owner); 1062 module_put(disk->fops->owner);
1063 part = NULL;
1064 disk = NULL; 1063 disk = NULL;
1065 if (bdev->bd_contains == bdev) { 1064 if (bdev->bd_contains == bdev) {
1066 if (bdev->bd_disk->fops->open) { 1065 if (bdev->bd_disk->fops->open) {
@@ -1080,6 +1079,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1080 return 0; 1079 return 0;
1081 1080
1082 out_clear: 1081 out_clear:
1082 disk_put_part(bdev->bd_part);
1083 bdev->bd_disk = NULL; 1083 bdev->bd_disk = NULL;
1084 bdev->bd_part = NULL; 1084 bdev->bd_part = NULL;
1085 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1085 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
@@ -1091,7 +1091,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1091 out_unlock_kernel: 1091 out_unlock_kernel:
1092 unlock_kernel(); 1092 unlock_kernel();
1093 1093
1094 disk_put_part(part);
1095 if (disk) 1094 if (disk)
1096 module_put(disk->fops->owner); 1095 module_put(disk->fops->owner);
1097 put_disk(disk); 1096 put_disk(disk);
@@ -1136,12 +1135,15 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1136 if (res) 1135 if (res)
1137 return res; 1136 return res;
1138 1137
1139 if (!(filp->f_mode & FMODE_EXCL)) 1138 if (filp->f_mode & FMODE_EXCL) {
1140 return 0; 1139 res = bd_claim(bdev, filp);
1140 if (res)
1141 goto out_blkdev_put;
1142 }
1141 1143
1142 if (!(res = bd_claim(bdev, filp))) 1144 return 0;
1143 return 0;
1144 1145
1146 out_blkdev_put:
1145 blkdev_put(bdev, filp->f_mode); 1147 blkdev_put(bdev, filp->f_mode);
1146 return res; 1148 return res;
1147} 1149}
@@ -1204,8 +1206,16 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1204{ 1206{
1205 struct block_device *bdev = I_BDEV(file->f_mapping->host); 1207 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1206 fmode_t mode = file->f_mode; 1208 fmode_t mode = file->f_mode;
1209
1210 /*
1211 * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
1212 * to updated it before every ioctl.
1213 */
1207 if (file->f_flags & O_NDELAY) 1214 if (file->f_flags & O_NDELAY)
1208 mode |= FMODE_NDELAY_NOW; 1215 mode |= FMODE_NDELAY;
1216 else
1217 mode &= ~FMODE_NDELAY;
1218
1209 return blkdev_ioctl(bdev, mode, cmd, arg); 1219 return blkdev_ioctl(bdev, mode, cmd, arg);
1210} 1220}
1211 1221
diff --git a/fs/buffer.c b/fs/buffer.c
index 6569fda5cfed..10179cfa1152 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -878,6 +878,7 @@ void invalidate_inode_buffers(struct inode *inode)
878 spin_unlock(&buffer_mapping->private_lock); 878 spin_unlock(&buffer_mapping->private_lock);
879 } 879 }
880} 880}
881EXPORT_SYMBOL(invalidate_inode_buffers);
881 882
882/* 883/*
883 * Remove any clean buffers from the inode's buffer list. This is called 884 * Remove any clean buffers from the inode's buffer list. This is called
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 8f528ea24c48..e078b7aea143 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -4,7 +4,15 @@ Various fixes to make delete of open files behavior more predictable
4(when delete of an open file fails we mark the file as "delete-on-close" 4(when delete of an open file fails we mark the file as "delete-on-close"
5in a way that more servers accept, but only if we can first rename the 5in a way that more servers accept, but only if we can first rename the
6file to a temporary name). Add experimental support for more safely 6file to a temporary name). Add experimental support for more safely
7handling fcntl(F_SETLEASE). 7handling fcntl(F_SETLEASE). Convert cifs to using blocking tcp
8sends, and also let tcp autotune the socket send and receive buffers.
9This reduces the number of EAGAIN errors returned by TCP/IP in
10high stress workloads (and the number of retries on socket writes
11when sending large SMBWriteX requests). Fix case in which a portion of
12data can in some cases not get written to the file on the server before the
13file is closed. Fix DFS parsing to properly handle path consumed field,
14and to handle certain codepage conversions better. Fix mount and
15umount race that can cause oops in mount or umount or reconnect.
8 16
9Version 1.54 17Version 1.54
10------------ 18------------
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 69a12aae91d3..490e34bbf27a 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -107,12 +107,13 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
107#ifdef CONFIG_PROC_FS 107#ifdef CONFIG_PROC_FS
108static int cifs_debug_data_proc_show(struct seq_file *m, void *v) 108static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
109{ 109{
110 struct list_head *tmp; 110 struct list_head *tmp1, *tmp2, *tmp3;
111 struct list_head *tmp1;
112 struct mid_q_entry *mid_entry; 111 struct mid_q_entry *mid_entry;
112 struct TCP_Server_Info *server;
113 struct cifsSesInfo *ses; 113 struct cifsSesInfo *ses;
114 struct cifsTconInfo *tcon; 114 struct cifsTconInfo *tcon;
115 int i; 115 int i, j;
116 __u32 dev_type;
116 117
117 seq_puts(m, 118 seq_puts(m,
118 "Display Internal CIFS Data Structures for Debugging\n" 119 "Display Internal CIFS Data Structures for Debugging\n"
@@ -122,46 +123,78 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
122 seq_printf(m, "Servers:"); 123 seq_printf(m, "Servers:");
123 124
124 i = 0; 125 i = 0;
125 read_lock(&GlobalSMBSeslock); 126 read_lock(&cifs_tcp_ses_lock);
126 list_for_each(tmp, &GlobalSMBSessionList) { 127 list_for_each(tmp1, &cifs_tcp_ses_list) {
128 server = list_entry(tmp1, struct TCP_Server_Info,
129 tcp_ses_list);
127 i++; 130 i++;
128 ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); 131 list_for_each(tmp2, &server->smb_ses_list) {
129 if ((ses->serverDomain == NULL) || (ses->serverOS == NULL) || 132 ses = list_entry(tmp2, struct cifsSesInfo,
130 (ses->serverNOS == NULL)) { 133 smb_ses_list);
131 seq_printf(m, "\nentry for %s not fully " 134 if ((ses->serverDomain == NULL) ||
132 "displayed\n\t", ses->serverName); 135 (ses->serverOS == NULL) ||
133 } else { 136 (ses->serverNOS == NULL)) {
134 seq_printf(m, 137 seq_printf(m, "\n%d) entry for %s not fully "
135 "\n%d) Name: %s Domain: %s Mounts: %d OS:" 138 "displayed\n\t", i, ses->serverName);
136 " %s \n\tNOS: %s\tCapability: 0x%x\n\tSMB" 139 } else {
140 seq_printf(m,
141 "\n%d) Name: %s Domain: %s Uses: %d OS:"
142 " %s\n\tNOS: %s\tCapability: 0x%x\n\tSMB"
137 " session status: %d\t", 143 " session status: %d\t",
138 i, ses->serverName, ses->serverDomain, 144 i, ses->serverName, ses->serverDomain,
139 atomic_read(&ses->inUse), 145 ses->ses_count, ses->serverOS, ses->serverNOS,
140 ses->serverOS, ses->serverNOS,
141 ses->capabilities, ses->status); 146 ses->capabilities, ses->status);
142 } 147 }
143 if (ses->server) {
144 seq_printf(m, "TCP status: %d\n\tLocal Users To " 148 seq_printf(m, "TCP status: %d\n\tLocal Users To "
145 "Server: %d SecMode: 0x%x Req On Wire: %d", 149 "Server: %d SecMode: 0x%x Req On Wire: %d",
146 ses->server->tcpStatus, 150 server->tcpStatus, server->srv_count,
147 atomic_read(&ses->server->socketUseCount), 151 server->secMode,
148 ses->server->secMode, 152 atomic_read(&server->inFlight));
149 atomic_read(&ses->server->inFlight));
150 153
151#ifdef CONFIG_CIFS_STATS2 154#ifdef CONFIG_CIFS_STATS2
152 seq_printf(m, " In Send: %d In MaxReq Wait: %d", 155 seq_printf(m, " In Send: %d In MaxReq Wait: %d",
153 atomic_read(&ses->server->inSend), 156 atomic_read(&server->inSend),
154 atomic_read(&ses->server->num_waiters)); 157 atomic_read(&server->num_waiters));
155#endif 158#endif
156 159
157 seq_puts(m, "\nMIDs:\n"); 160 seq_puts(m, "\n\tShares:");
161 j = 0;
162 list_for_each(tmp3, &ses->tcon_list) {
163 tcon = list_entry(tmp3, struct cifsTconInfo,
164 tcon_list);
165 ++j;
166 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
167 seq_printf(m, "\n\t%d) %s Mounts: %d ", j,
168 tcon->treeName, tcon->tc_count);
169 if (tcon->nativeFileSystem) {
170 seq_printf(m, "Type: %s ",
171 tcon->nativeFileSystem);
172 }
173 seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x"
174 "\nPathComponentMax: %d Status: 0x%d",
175 le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
176 le32_to_cpu(tcon->fsAttrInfo.Attributes),
177 le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
178 tcon->tidStatus);
179 if (dev_type == FILE_DEVICE_DISK)
180 seq_puts(m, " type: DISK ");
181 else if (dev_type == FILE_DEVICE_CD_ROM)
182 seq_puts(m, " type: CDROM ");
183 else
184 seq_printf(m, " type: %d ", dev_type);
185
186 if (tcon->need_reconnect)
187 seq_puts(m, "\tDISCONNECTED ");
188 seq_putc(m, '\n');
189 }
190
191 seq_puts(m, "\n\tMIDs:\n");
158 192
159 spin_lock(&GlobalMid_Lock); 193 spin_lock(&GlobalMid_Lock);
160 list_for_each(tmp1, &ses->server->pending_mid_q) { 194 list_for_each(tmp3, &server->pending_mid_q) {
161 mid_entry = list_entry(tmp1, struct 195 mid_entry = list_entry(tmp3, struct mid_q_entry,
162 mid_q_entry,
163 qhead); 196 qhead);
164 seq_printf(m, "State: %d com: %d pid:" 197 seq_printf(m, "\tState: %d com: %d pid:"
165 " %d tsk: %p mid %d\n", 198 " %d tsk: %p mid %d\n",
166 mid_entry->midState, 199 mid_entry->midState,
167 (int)mid_entry->command, 200 (int)mid_entry->command,
@@ -171,44 +204,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
171 } 204 }
172 spin_unlock(&GlobalMid_Lock); 205 spin_unlock(&GlobalMid_Lock);
173 } 206 }
174
175 }
176 read_unlock(&GlobalSMBSeslock);
177 seq_putc(m, '\n');
178
179 seq_puts(m, "Shares:");
180
181 i = 0;
182 read_lock(&GlobalSMBSeslock);
183 list_for_each(tmp, &GlobalTreeConnectionList) {
184 __u32 dev_type;
185 i++;
186 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
187 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
188 seq_printf(m, "\n%d) %s Uses: %d ", i,
189 tcon->treeName, atomic_read(&tcon->useCount));
190 if (tcon->nativeFileSystem) {
191 seq_printf(m, "Type: %s ",
192 tcon->nativeFileSystem);
193 }
194 seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x"
195 "\nPathComponentMax: %d Status: %d",
196 le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
197 le32_to_cpu(tcon->fsAttrInfo.Attributes),
198 le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
199 tcon->tidStatus);
200 if (dev_type == FILE_DEVICE_DISK)
201 seq_puts(m, " type: DISK ");
202 else if (dev_type == FILE_DEVICE_CD_ROM)
203 seq_puts(m, " type: CDROM ");
204 else
205 seq_printf(m, " type: %d ", dev_type);
206
207 if (tcon->tidStatus == CifsNeedReconnect)
208 seq_puts(m, "\tDISCONNECTED ");
209 } 207 }
210 read_unlock(&GlobalSMBSeslock); 208 read_unlock(&cifs_tcp_ses_lock);
211
212 seq_putc(m, '\n'); 209 seq_putc(m, '\n');
213 210
214 /* BB add code to dump additional info such as TCP session info now */ 211 /* BB add code to dump additional info such as TCP session info now */
@@ -234,7 +231,9 @@ static ssize_t cifs_stats_proc_write(struct file *file,
234{ 231{
235 char c; 232 char c;
236 int rc; 233 int rc;
237 struct list_head *tmp; 234 struct list_head *tmp1, *tmp2, *tmp3;
235 struct TCP_Server_Info *server;
236 struct cifsSesInfo *ses;
238 struct cifsTconInfo *tcon; 237 struct cifsTconInfo *tcon;
239 238
240 rc = get_user(c, buffer); 239 rc = get_user(c, buffer);
@@ -242,33 +241,42 @@ static ssize_t cifs_stats_proc_write(struct file *file,
242 return rc; 241 return rc;
243 242
244 if (c == '1' || c == 'y' || c == 'Y' || c == '0') { 243 if (c == '1' || c == 'y' || c == 'Y' || c == '0') {
245 read_lock(&GlobalSMBSeslock);
246#ifdef CONFIG_CIFS_STATS2 244#ifdef CONFIG_CIFS_STATS2
247 atomic_set(&totBufAllocCount, 0); 245 atomic_set(&totBufAllocCount, 0);
248 atomic_set(&totSmBufAllocCount, 0); 246 atomic_set(&totSmBufAllocCount, 0);
249#endif /* CONFIG_CIFS_STATS2 */ 247#endif /* CONFIG_CIFS_STATS2 */
250 list_for_each(tmp, &GlobalTreeConnectionList) { 248 read_lock(&cifs_tcp_ses_lock);
251 tcon = list_entry(tmp, struct cifsTconInfo, 249 list_for_each(tmp1, &cifs_tcp_ses_list) {
252 cifsConnectionList); 250 server = list_entry(tmp1, struct TCP_Server_Info,
253 atomic_set(&tcon->num_smbs_sent, 0); 251 tcp_ses_list);
254 atomic_set(&tcon->num_writes, 0); 252 list_for_each(tmp2, &server->smb_ses_list) {
255 atomic_set(&tcon->num_reads, 0); 253 ses = list_entry(tmp2, struct cifsSesInfo,
256 atomic_set(&tcon->num_oplock_brks, 0); 254 smb_ses_list);
257 atomic_set(&tcon->num_opens, 0); 255 list_for_each(tmp3, &ses->tcon_list) {
258 atomic_set(&tcon->num_closes, 0); 256 tcon = list_entry(tmp3,
259 atomic_set(&tcon->num_deletes, 0); 257 struct cifsTconInfo,
260 atomic_set(&tcon->num_mkdirs, 0); 258 tcon_list);
261 atomic_set(&tcon->num_rmdirs, 0); 259 atomic_set(&tcon->num_smbs_sent, 0);
262 atomic_set(&tcon->num_renames, 0); 260 atomic_set(&tcon->num_writes, 0);
263 atomic_set(&tcon->num_t2renames, 0); 261 atomic_set(&tcon->num_reads, 0);
264 atomic_set(&tcon->num_ffirst, 0); 262 atomic_set(&tcon->num_oplock_brks, 0);
265 atomic_set(&tcon->num_fnext, 0); 263 atomic_set(&tcon->num_opens, 0);
266 atomic_set(&tcon->num_fclose, 0); 264 atomic_set(&tcon->num_closes, 0);
267 atomic_set(&tcon->num_hardlinks, 0); 265 atomic_set(&tcon->num_deletes, 0);
268 atomic_set(&tcon->num_symlinks, 0); 266 atomic_set(&tcon->num_mkdirs, 0);
269 atomic_set(&tcon->num_locks, 0); 267 atomic_set(&tcon->num_rmdirs, 0);
268 atomic_set(&tcon->num_renames, 0);
269 atomic_set(&tcon->num_t2renames, 0);
270 atomic_set(&tcon->num_ffirst, 0);
271 atomic_set(&tcon->num_fnext, 0);
272 atomic_set(&tcon->num_fclose, 0);
273 atomic_set(&tcon->num_hardlinks, 0);
274 atomic_set(&tcon->num_symlinks, 0);
275 atomic_set(&tcon->num_locks, 0);
276 }
277 }
270 } 278 }
271 read_unlock(&GlobalSMBSeslock); 279 read_unlock(&cifs_tcp_ses_lock);
272 } 280 }
273 281
274 return count; 282 return count;
@@ -277,7 +285,9 @@ static ssize_t cifs_stats_proc_write(struct file *file,
277static int cifs_stats_proc_show(struct seq_file *m, void *v) 285static int cifs_stats_proc_show(struct seq_file *m, void *v)
278{ 286{
279 int i; 287 int i;
280 struct list_head *tmp; 288 struct list_head *tmp1, *tmp2, *tmp3;
289 struct TCP_Server_Info *server;
290 struct cifsSesInfo *ses;
281 struct cifsTconInfo *tcon; 291 struct cifsTconInfo *tcon;
282 292
283 seq_printf(m, 293 seq_printf(m,
@@ -306,44 +316,55 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v)
306 GlobalCurrentXid, GlobalMaxActiveXid); 316 GlobalCurrentXid, GlobalMaxActiveXid);
307 317
308 i = 0; 318 i = 0;
309 read_lock(&GlobalSMBSeslock); 319 read_lock(&cifs_tcp_ses_lock);
310 list_for_each(tmp, &GlobalTreeConnectionList) { 320 list_for_each(tmp1, &cifs_tcp_ses_list) {
311 i++; 321 server = list_entry(tmp1, struct TCP_Server_Info,
312 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); 322 tcp_ses_list);
313 seq_printf(m, "\n%d) %s", i, tcon->treeName); 323 list_for_each(tmp2, &server->smb_ses_list) {
314 if (tcon->tidStatus == CifsNeedReconnect) 324 ses = list_entry(tmp2, struct cifsSesInfo,
315 seq_puts(m, "\tDISCONNECTED "); 325 smb_ses_list);
316 seq_printf(m, "\nSMBs: %d Oplock Breaks: %d", 326 list_for_each(tmp3, &ses->tcon_list) {
317 atomic_read(&tcon->num_smbs_sent), 327 tcon = list_entry(tmp3,
318 atomic_read(&tcon->num_oplock_brks)); 328 struct cifsTconInfo,
319 seq_printf(m, "\nReads: %d Bytes: %lld", 329 tcon_list);
320 atomic_read(&tcon->num_reads), 330 i++;
321 (long long)(tcon->bytes_read)); 331 seq_printf(m, "\n%d) %s", i, tcon->treeName);
322 seq_printf(m, "\nWrites: %d Bytes: %lld", 332 if (tcon->need_reconnect)
323 atomic_read(&tcon->num_writes), 333 seq_puts(m, "\tDISCONNECTED ");
324 (long long)(tcon->bytes_written)); 334 seq_printf(m, "\nSMBs: %d Oplock Breaks: %d",
325 seq_printf(m, 335 atomic_read(&tcon->num_smbs_sent),
326 "\nLocks: %d HardLinks: %d Symlinks: %d", 336 atomic_read(&tcon->num_oplock_brks));
327 atomic_read(&tcon->num_locks), 337 seq_printf(m, "\nReads: %d Bytes: %lld",
328 atomic_read(&tcon->num_hardlinks), 338 atomic_read(&tcon->num_reads),
329 atomic_read(&tcon->num_symlinks)); 339 (long long)(tcon->bytes_read));
330 340 seq_printf(m, "\nWrites: %d Bytes: %lld",
331 seq_printf(m, "\nOpens: %d Closes: %d Deletes: %d", 341 atomic_read(&tcon->num_writes),
332 atomic_read(&tcon->num_opens), 342 (long long)(tcon->bytes_written));
333 atomic_read(&tcon->num_closes), 343 seq_printf(m, "\nLocks: %d HardLinks: %d "
334 atomic_read(&tcon->num_deletes)); 344 "Symlinks: %d",
335 seq_printf(m, "\nMkdirs: %d Rmdirs: %d", 345 atomic_read(&tcon->num_locks),
336 atomic_read(&tcon->num_mkdirs), 346 atomic_read(&tcon->num_hardlinks),
337 atomic_read(&tcon->num_rmdirs)); 347 atomic_read(&tcon->num_symlinks));
338 seq_printf(m, "\nRenames: %d T2 Renames %d", 348 seq_printf(m, "\nOpens: %d Closes: %d"
339 atomic_read(&tcon->num_renames), 349 "Deletes: %d",
340 atomic_read(&tcon->num_t2renames)); 350 atomic_read(&tcon->num_opens),
341 seq_printf(m, "\nFindFirst: %d FNext %d FClose %d", 351 atomic_read(&tcon->num_closes),
342 atomic_read(&tcon->num_ffirst), 352 atomic_read(&tcon->num_deletes));
343 atomic_read(&tcon->num_fnext), 353 seq_printf(m, "\nMkdirs: %d Rmdirs: %d",
344 atomic_read(&tcon->num_fclose)); 354 atomic_read(&tcon->num_mkdirs),
355 atomic_read(&tcon->num_rmdirs));
356 seq_printf(m, "\nRenames: %d T2 Renames %d",
357 atomic_read(&tcon->num_renames),
358 atomic_read(&tcon->num_t2renames));
359 seq_printf(m, "\nFindFirst: %d FNext %d "
360 "FClose %d",
361 atomic_read(&tcon->num_ffirst),
362 atomic_read(&tcon->num_fnext),
363 atomic_read(&tcon->num_fclose));
364 }
365 }
345 } 366 }
346 read_unlock(&GlobalSMBSeslock); 367 read_unlock(&cifs_tcp_ses_lock);
347 368
348 seq_putc(m, '\n'); 369 seq_putc(m, '\n');
349 return 0; 370 return 0;
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index d2c8eef84f3c..e1c18362ba46 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -106,7 +106,8 @@ static char *cifs_get_share_name(const char *node_name)
106/** 106/**
107 * compose_mount_options - creates mount options for refferral 107 * compose_mount_options - creates mount options for refferral
108 * @sb_mountdata: parent/root DFS mount options (template) 108 * @sb_mountdata: parent/root DFS mount options (template)
109 * @ref_unc: refferral server UNC 109 * @dentry: point where we are going to mount
110 * @ref: server's referral
110 * @devname: pointer for saving device name 111 * @devname: pointer for saving device name
111 * 112 *
112 * creates mount options for submount based on template options sb_mountdata 113 * creates mount options for submount based on template options sb_mountdata
@@ -116,7 +117,8 @@ static char *cifs_get_share_name(const char *node_name)
116 * Caller is responcible for freeing retunrned value if it is not error. 117 * Caller is responcible for freeing retunrned value if it is not error.
117 */ 118 */
118static char *compose_mount_options(const char *sb_mountdata, 119static char *compose_mount_options(const char *sb_mountdata,
119 const char *ref_unc, 120 struct dentry *dentry,
121 const struct dfs_info3_param *ref,
120 char **devname) 122 char **devname)
121{ 123{
122 int rc; 124 int rc;
@@ -126,11 +128,12 @@ static char *compose_mount_options(const char *sb_mountdata,
126 char *srvIP = NULL; 128 char *srvIP = NULL;
127 char sep = ','; 129 char sep = ',';
128 int off, noff; 130 int off, noff;
131 char *fullpath;
129 132
130 if (sb_mountdata == NULL) 133 if (sb_mountdata == NULL)
131 return ERR_PTR(-EINVAL); 134 return ERR_PTR(-EINVAL);
132 135
133 *devname = cifs_get_share_name(ref_unc); 136 *devname = cifs_get_share_name(ref->node_name);
134 rc = dns_resolve_server_name_to_ip(*devname, &srvIP); 137 rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
135 if (rc != 0) { 138 if (rc != 0) {
136 cERROR(1, ("%s: Failed to resolve server part of %s to IP", 139 cERROR(1, ("%s: Failed to resolve server part of %s to IP",
@@ -138,7 +141,12 @@ static char *compose_mount_options(const char *sb_mountdata,
138 mountdata = ERR_PTR(rc); 141 mountdata = ERR_PTR(rc);
139 goto compose_mount_options_out; 142 goto compose_mount_options_out;
140 } 143 }
141 md_len = strlen(sb_mountdata) + strlen(srvIP) + strlen(ref_unc) + 3; 144 /* md_len = strlen(...) + 12 for 'sep+prefixpath='
145 * assuming that we have 'unc=' and 'ip=' in
146 * the original sb_mountdata
147 */
148 md_len = strlen(sb_mountdata) + strlen(srvIP) +
149 strlen(ref->node_name) + 12;
142 mountdata = kzalloc(md_len+1, GFP_KERNEL); 150 mountdata = kzalloc(md_len+1, GFP_KERNEL);
143 if (mountdata == NULL) { 151 if (mountdata == NULL) {
144 mountdata = ERR_PTR(-ENOMEM); 152 mountdata = ERR_PTR(-ENOMEM);
@@ -152,41 +160,56 @@ static char *compose_mount_options(const char *sb_mountdata,
152 strncpy(mountdata, sb_mountdata, 5); 160 strncpy(mountdata, sb_mountdata, 5);
153 off += 5; 161 off += 5;
154 } 162 }
155 while ((tkn_e = strchr(sb_mountdata+off, sep))) { 163
156 noff = (tkn_e - (sb_mountdata+off)) + 1; 164 do {
157 if (strnicmp(sb_mountdata+off, "unc=", 4) == 0) { 165 tkn_e = strchr(sb_mountdata + off, sep);
166 if (tkn_e == NULL)
167 noff = strlen(sb_mountdata + off);
168 else
169 noff = tkn_e - (sb_mountdata + off) + 1;
170
171 if (strnicmp(sb_mountdata + off, "unc=", 4) == 0) {
158 off += noff; 172 off += noff;
159 continue; 173 continue;
160 } 174 }
161 if (strnicmp(sb_mountdata+off, "ip=", 3) == 0) { 175 if (strnicmp(sb_mountdata + off, "ip=", 3) == 0) {
162 off += noff; 176 off += noff;
163 continue; 177 continue;
164 } 178 }
165 if (strnicmp(sb_mountdata+off, "prefixpath=", 3) == 0) { 179 if (strnicmp(sb_mountdata + off, "prefixpath=", 11) == 0) {
166 off += noff; 180 off += noff;
167 continue; 181 continue;
168 } 182 }
169 strncat(mountdata, sb_mountdata+off, noff); 183 strncat(mountdata, sb_mountdata + off, noff);
170 off += noff; 184 off += noff;
171 } 185 } while (tkn_e);
172 strcat(mountdata, sb_mountdata+off); 186 strcat(mountdata, sb_mountdata + off);
173 mountdata[md_len] = '\0'; 187 mountdata[md_len] = '\0';
174 188
175 /* copy new IP and ref share name */ 189 /* copy new IP and ref share name */
176 strcat(mountdata, ",ip="); 190 if (mountdata[strlen(mountdata) - 1] != sep)
191 strncat(mountdata, &sep, 1);
192 strcat(mountdata, "ip=");
177 strcat(mountdata, srvIP); 193 strcat(mountdata, srvIP);
178 strcat(mountdata, ",unc="); 194 strncat(mountdata, &sep, 1);
195 strcat(mountdata, "unc=");
179 strcat(mountdata, *devname); 196 strcat(mountdata, *devname);
180 197
181 /* find & copy prefixpath */ 198 /* find & copy prefixpath */
182 tkn_e = strchr(ref_unc+2, '\\'); 199 tkn_e = strchr(ref->node_name + 2, '\\');
183 if (tkn_e) { 200 if (tkn_e == NULL) /* invalid unc, missing share name*/
184 tkn_e = strchr(tkn_e+1, '\\'); 201 goto compose_mount_options_out;
185 if (tkn_e) { 202
186 strcat(mountdata, ",prefixpath="); 203 fullpath = build_path_from_dentry(dentry);
187 strcat(mountdata, tkn_e+1); 204 tkn_e = strchr(tkn_e + 1, '\\');
188 } 205 if (tkn_e || strlen(fullpath) - (ref->path_consumed)) {
206 strncat(mountdata, &sep, 1);
207 strcat(mountdata, "prefixpath=");
208 if (tkn_e)
209 strcat(mountdata, tkn_e + 1);
210 strcat(mountdata, fullpath + (ref->path_consumed));
189 } 211 }
212 kfree(fullpath);
190 213
191 /*cFYI(1,("%s: parent mountdata: %s", __func__,sb_mountdata));*/ 214 /*cFYI(1,("%s: parent mountdata: %s", __func__,sb_mountdata));*/
192 /*cFYI(1, ("%s: submount mountdata: %s", __func__, mountdata ));*/ 215 /*cFYI(1, ("%s: submount mountdata: %s", __func__, mountdata ));*/
@@ -198,7 +221,7 @@ compose_mount_options_out:
198 221
199 222
200static struct vfsmount *cifs_dfs_do_refmount(const struct vfsmount *mnt_parent, 223static struct vfsmount *cifs_dfs_do_refmount(const struct vfsmount *mnt_parent,
201 struct dentry *dentry, char *ref_unc) 224 struct dentry *dentry, const struct dfs_info3_param *ref)
202{ 225{
203 struct cifs_sb_info *cifs_sb; 226 struct cifs_sb_info *cifs_sb;
204 struct vfsmount *mnt; 227 struct vfsmount *mnt;
@@ -207,7 +230,7 @@ static struct vfsmount *cifs_dfs_do_refmount(const struct vfsmount *mnt_parent,
207 230
208 cifs_sb = CIFS_SB(dentry->d_inode->i_sb); 231 cifs_sb = CIFS_SB(dentry->d_inode->i_sb);
209 mountdata = compose_mount_options(cifs_sb->mountdata, 232 mountdata = compose_mount_options(cifs_sb->mountdata,
210 ref_unc, &devname); 233 dentry, ref, &devname);
211 234
212 if (IS_ERR(mountdata)) 235 if (IS_ERR(mountdata))
213 return (struct vfsmount *)mountdata; 236 return (struct vfsmount *)mountdata;
@@ -310,7 +333,7 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
310 } 333 }
311 mnt = cifs_dfs_do_refmount(nd->path.mnt, 334 mnt = cifs_dfs_do_refmount(nd->path.mnt,
312 nd->path.dentry, 335 nd->path.dentry,
313 referrals[i].node_name); 336 referrals + i);
314 cFYI(1, ("%s: cifs_dfs_do_refmount:%s , mnt:%p", 337 cFYI(1, ("%s: cifs_dfs_do_refmount:%s , mnt:%p",
315 __func__, 338 __func__,
316 referrals[i].node_name, mnt)); 339 referrals[i].node_name, mnt));
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index fcee9298b620..0ab2fb5afef1 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -73,8 +73,8 @@ struct key_type cifs_spnego_key_type = {
73 * strlen(";sec=ntlmsspi") */ 73 * strlen(";sec=ntlmsspi") */
74#define MAX_MECH_STR_LEN 13 74#define MAX_MECH_STR_LEN 13
75 75
76/* max possible addr len eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */ 76/* max possible addr len eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/128 */
77#define MAX_IPV6_ADDR_LEN 42 77#define MAX_IPV6_ADDR_LEN 43
78 78
79/* strlen of "host=" */ 79/* strlen of "host=" */
80#define HOST_KEY_LEN 5 80#define HOST_KEY_LEN 5
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ac5915d61dca..d9cf467309e8 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -514,10 +514,11 @@ static void cifs_umount_begin(struct super_block *sb)
514 tcon = cifs_sb->tcon; 514 tcon = cifs_sb->tcon;
515 if (tcon == NULL) 515 if (tcon == NULL)
516 return; 516 return;
517 down(&tcon->tconSem); 517
518 if (atomic_read(&tcon->useCount) == 1) 518 read_lock(&cifs_tcp_ses_lock);
519 if (tcon->tc_count == 1)
519 tcon->tidStatus = CifsExiting; 520 tcon->tidStatus = CifsExiting;
520 up(&tcon->tconSem); 521 read_unlock(&cifs_tcp_ses_lock);
521 522
522 /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */ 523 /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */
523 /* cancel_notify_requests(tcon); */ 524 /* cancel_notify_requests(tcon); */
@@ -1013,7 +1014,7 @@ static int cifs_oplock_thread(void *dummyarg)
1013 not bother sending an oplock release if session 1014 not bother sending an oplock release if session
1014 to server still is disconnected since oplock 1015 to server still is disconnected since oplock
1015 already released by the server in that case */ 1016 already released by the server in that case */
1016 if (pTcon->tidStatus != CifsNeedReconnect) { 1017 if (!pTcon->need_reconnect) {
1017 rc = CIFSSMBLock(0, pTcon, netfid, 1018 rc = CIFSSMBLock(0, pTcon, netfid,
1018 0 /* len */ , 0 /* offset */, 0, 1019 0 /* len */ , 0 /* offset */, 0,
1019 0, LOCKING_ANDX_OPLOCK_RELEASE, 1020 0, LOCKING_ANDX_OPLOCK_RELEASE,
@@ -1031,24 +1032,24 @@ static int cifs_oplock_thread(void *dummyarg)
1031static int cifs_dnotify_thread(void *dummyarg) 1032static int cifs_dnotify_thread(void *dummyarg)
1032{ 1033{
1033 struct list_head *tmp; 1034 struct list_head *tmp;
1034 struct cifsSesInfo *ses; 1035 struct TCP_Server_Info *server;
1035 1036
1036 do { 1037 do {
1037 if (try_to_freeze()) 1038 if (try_to_freeze())
1038 continue; 1039 continue;
1039 set_current_state(TASK_INTERRUPTIBLE); 1040 set_current_state(TASK_INTERRUPTIBLE);
1040 schedule_timeout(15*HZ); 1041 schedule_timeout(15*HZ);
1041 read_lock(&GlobalSMBSeslock);
1042 /* check if any stuck requests that need 1042 /* check if any stuck requests that need
1043 to be woken up and wakeq so the 1043 to be woken up and wakeq so the
1044 thread can wake up and error out */ 1044 thread can wake up and error out */
1045 list_for_each(tmp, &GlobalSMBSessionList) { 1045 read_lock(&cifs_tcp_ses_lock);
1046 ses = list_entry(tmp, struct cifsSesInfo, 1046 list_for_each(tmp, &cifs_tcp_ses_list) {
1047 cifsSessionList); 1047 server = list_entry(tmp, struct TCP_Server_Info,
1048 if (ses->server && atomic_read(&ses->server->inFlight)) 1048 tcp_ses_list);
1049 wake_up_all(&ses->server->response_q); 1049 if (atomic_read(&server->inFlight))
1050 wake_up_all(&server->response_q);
1050 } 1051 }
1051 read_unlock(&GlobalSMBSeslock); 1052 read_unlock(&cifs_tcp_ses_lock);
1052 } while (!kthread_should_stop()); 1053 } while (!kthread_should_stop());
1053 1054
1054 return 0; 1055 return 0;
@@ -1059,9 +1060,7 @@ init_cifs(void)
1059{ 1060{
1060 int rc = 0; 1061 int rc = 0;
1061 cifs_proc_init(); 1062 cifs_proc_init();
1062/* INIT_LIST_HEAD(&GlobalServerList);*/ /* BB not implemented yet */ 1063 INIT_LIST_HEAD(&cifs_tcp_ses_list);
1063 INIT_LIST_HEAD(&GlobalSMBSessionList);
1064 INIT_LIST_HEAD(&GlobalTreeConnectionList);
1065 INIT_LIST_HEAD(&GlobalOplock_Q); 1064 INIT_LIST_HEAD(&GlobalOplock_Q);
1066#ifdef CONFIG_CIFS_EXPERIMENTAL 1065#ifdef CONFIG_CIFS_EXPERIMENTAL
1067 INIT_LIST_HEAD(&GlobalDnotifyReqList); 1066 INIT_LIST_HEAD(&GlobalDnotifyReqList);
@@ -1089,6 +1088,7 @@ init_cifs(void)
1089 GlobalMaxActiveXid = 0; 1088 GlobalMaxActiveXid = 0;
1090 memset(Local_System_Name, 0, 15); 1089 memset(Local_System_Name, 0, 15);
1091 rwlock_init(&GlobalSMBSeslock); 1090 rwlock_init(&GlobalSMBSeslock);
1091 rwlock_init(&cifs_tcp_ses_lock);
1092 spin_lock_init(&GlobalMid_Lock); 1092 spin_lock_init(&GlobalMid_Lock);
1093 1093
1094 if (cifs_max_pending < 2) { 1094 if (cifs_max_pending < 2) {
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index c791e5b5a914..c57c0565547f 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -85,8 +85,7 @@ enum securityEnum {
85}; 85};
86 86
87enum protocolEnum { 87enum protocolEnum {
88 IPV4 = 0, 88 TCP = 0,
89 IPV6,
90 SCTP 89 SCTP
91 /* Netbios frames protocol not supported at this time */ 90 /* Netbios frames protocol not supported at this time */
92}; 91};
@@ -122,6 +121,9 @@ struct cifs_cred {
122 */ 121 */
123 122
124struct TCP_Server_Info { 123struct TCP_Server_Info {
124 struct list_head tcp_ses_list;
125 struct list_head smb_ses_list;
126 int srv_count; /* reference counter */
125 /* 15 character server name + 0x20 16th byte indicating type = srv */ 127 /* 15 character server name + 0x20 16th byte indicating type = srv */
126 char server_RFC1001_name[SERVER_NAME_LEN_WITH_NULL]; 128 char server_RFC1001_name[SERVER_NAME_LEN_WITH_NULL];
127 char unicode_server_Name[SERVER_NAME_LEN_WITH_NULL * 2]; 129 char unicode_server_Name[SERVER_NAME_LEN_WITH_NULL * 2];
@@ -141,7 +143,8 @@ struct TCP_Server_Info {
141 char versionMajor; 143 char versionMajor;
142 char versionMinor; 144 char versionMinor;
143 bool svlocal:1; /* local server or remote */ 145 bool svlocal:1; /* local server or remote */
144 atomic_t socketUseCount; /* number of open cifs sessions on socket */ 146 bool noblocksnd; /* use blocking sendmsg */
147 bool noautotune; /* do not autotune send buf sizes */
145 atomic_t inFlight; /* number of requests on the wire to server */ 148 atomic_t inFlight; /* number of requests on the wire to server */
146#ifdef CONFIG_CIFS_STATS2 149#ifdef CONFIG_CIFS_STATS2
147 atomic_t inSend; /* requests trying to send */ 150 atomic_t inSend; /* requests trying to send */
@@ -192,13 +195,14 @@ struct cifsUidInfo {
192 * Session structure. One of these for each uid session with a particular host 195 * Session structure. One of these for each uid session with a particular host
193 */ 196 */
194struct cifsSesInfo { 197struct cifsSesInfo {
195 struct list_head cifsSessionList; 198 struct list_head smb_ses_list;
199 struct list_head tcon_list;
196 struct semaphore sesSem; 200 struct semaphore sesSem;
197#if 0 201#if 0
198 struct cifsUidInfo *uidInfo; /* pointer to user info */ 202 struct cifsUidInfo *uidInfo; /* pointer to user info */
199#endif 203#endif
200 struct TCP_Server_Info *server; /* pointer to server info */ 204 struct TCP_Server_Info *server; /* pointer to server info */
201 atomic_t inUse; /* # of mounts (tree connections) on this ses */ 205 int ses_count; /* reference counter */
202 enum statusEnum status; 206 enum statusEnum status;
203 unsigned overrideSecFlg; /* if non-zero override global sec flags */ 207 unsigned overrideSecFlg; /* if non-zero override global sec flags */
204 __u16 ipc_tid; /* special tid for connection to IPC share */ 208 __u16 ipc_tid; /* special tid for connection to IPC share */
@@ -214,6 +218,7 @@ struct cifsSesInfo {
214 char userName[MAX_USERNAME_SIZE + 1]; 218 char userName[MAX_USERNAME_SIZE + 1];
215 char *domainName; 219 char *domainName;
216 char *password; 220 char *password;
221 bool need_reconnect:1; /* connection reset, uid now invalid */
217}; 222};
218/* no more than one of the following three session flags may be set */ 223/* no more than one of the following three session flags may be set */
219#define CIFS_SES_NT4 1 224#define CIFS_SES_NT4 1
@@ -228,16 +233,15 @@ struct cifsSesInfo {
228 * session 233 * session
229 */ 234 */
230struct cifsTconInfo { 235struct cifsTconInfo {
231 struct list_head cifsConnectionList; 236 struct list_head tcon_list;
237 int tc_count;
232 struct list_head openFileList; 238 struct list_head openFileList;
233 struct semaphore tconSem;
234 struct cifsSesInfo *ses; /* pointer to session associated with */ 239 struct cifsSesInfo *ses; /* pointer to session associated with */
235 char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */ 240 char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */
236 char *nativeFileSystem; 241 char *nativeFileSystem;
237 __u16 tid; /* The 2 byte tree id */ 242 __u16 tid; /* The 2 byte tree id */
238 __u16 Flags; /* optional support bits */ 243 __u16 Flags; /* optional support bits */
239 enum statusEnum tidStatus; 244 enum statusEnum tidStatus;
240 atomic_t useCount; /* how many explicit/implicit mounts to share */
241#ifdef CONFIG_CIFS_STATS 245#ifdef CONFIG_CIFS_STATS
242 atomic_t num_smbs_sent; 246 atomic_t num_smbs_sent;
243 atomic_t num_writes; 247 atomic_t num_writes;
@@ -286,6 +290,7 @@ struct cifsTconInfo {
286 bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol 290 bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol
287 for this mount even if server would support */ 291 for this mount even if server would support */
288 bool local_lease:1; /* check leases (only) on local system not remote */ 292 bool local_lease:1; /* check leases (only) on local system not remote */
293 bool need_reconnect:1; /* connection reset, tid now invalid */
289 /* BB add field for back pointer to sb struct(s)? */ 294 /* BB add field for back pointer to sb struct(s)? */
290}; 295};
291 296
@@ -586,22 +591,30 @@ require use of the stronger protocol */
586#endif 591#endif
587 592
588/* 593/*
589 * The list of servers that did not respond with NT LM 0.12. 594 * the list of TCP_Server_Info structures, ie each of the sockets
590 * This list helps improve performance and eliminate the messages indicating 595 * connecting our client to a distinct server (ip address), is
591 * that we had a communications error talking to the server in this list. 596 * chained together by cifs_tcp_ses_list. The list of all our SMB
597 * sessions (and from that the tree connections) can be found
598 * by iterating over cifs_tcp_ses_list
592 */ 599 */
593/* Feature not supported */ 600GLOBAL_EXTERN struct list_head cifs_tcp_ses_list;
594/* GLOBAL_EXTERN struct servers_not_supported *NotSuppList; */
595 601
596/* 602/*
597 * The following is a hash table of all the users we know about. 603 * This lock protects the cifs_tcp_ses_list, the list of smb sessions per
604 * tcp session, and the list of tcon's per smb session. It also protects
605 * the reference counters for the server, smb session, and tcon. Finally,
606 * changes to the tcon->tidStatus should be done while holding this lock.
598 */ 607 */
599GLOBAL_EXTERN struct smbUidInfo *GlobalUidList[UID_HASH]; 608GLOBAL_EXTERN rwlock_t cifs_tcp_ses_lock;
600 609
601/* GLOBAL_EXTERN struct list_head GlobalServerList; BB not implemented yet */ 610/*
602GLOBAL_EXTERN struct list_head GlobalSMBSessionList; 611 * This lock protects the cifs_file->llist and cifs_file->flist
603GLOBAL_EXTERN struct list_head GlobalTreeConnectionList; 612 * list operations, and updates to some flags (cifs_file->invalidHandle)
604GLOBAL_EXTERN rwlock_t GlobalSMBSeslock; /* protects list inserts on 3 above */ 613 * It will be moved to either use the tcon->stat_lock or equivalent later.
614 * If cifs_tcp_ses_lock and the lock below are both needed to be held, then
615 * the cifs_tcp_ses_lock must be grabbed first and released last.
616 */
617GLOBAL_EXTERN rwlock_t GlobalSMBSeslock;
605 618
606GLOBAL_EXTERN struct list_head GlobalOplock_Q; 619GLOBAL_EXTERN struct list_head GlobalOplock_Q;
607 620
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 0cff7fe986e8..6f21ecb85ce5 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -36,7 +36,7 @@ extern void cifs_buf_release(void *);
36extern struct smb_hdr *cifs_small_buf_get(void); 36extern struct smb_hdr *cifs_small_buf_get(void);
37extern void cifs_small_buf_release(void *); 37extern void cifs_small_buf_release(void *);
38extern int smb_send(struct socket *, struct smb_hdr *, 38extern int smb_send(struct socket *, struct smb_hdr *,
39 unsigned int /* length */ , struct sockaddr *); 39 unsigned int /* length */ , struct sockaddr *, bool);
40extern unsigned int _GetXid(void); 40extern unsigned int _GetXid(void);
41extern void _FreeXid(unsigned int); 41extern void _FreeXid(unsigned int);
42#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current->fsuid)); 42#define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current->fsuid));
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 843a85fb8b9a..6d51696dc762 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -190,10 +190,10 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
190 /* need to prevent multiple threads trying to 190 /* need to prevent multiple threads trying to
191 simultaneously reconnect the same SMB session */ 191 simultaneously reconnect the same SMB session */
192 down(&tcon->ses->sesSem); 192 down(&tcon->ses->sesSem);
193 if (tcon->ses->status == CifsNeedReconnect) 193 if (tcon->ses->need_reconnect)
194 rc = cifs_setup_session(0, tcon->ses, 194 rc = cifs_setup_session(0, tcon->ses,
195 nls_codepage); 195 nls_codepage);
196 if (!rc && (tcon->tidStatus == CifsNeedReconnect)) { 196 if (!rc && (tcon->need_reconnect)) {
197 mark_open_files_invalid(tcon); 197 mark_open_files_invalid(tcon);
198 rc = CIFSTCon(0, tcon->ses, tcon->treeName, 198 rc = CIFSTCon(0, tcon->ses, tcon->treeName,
199 tcon, nls_codepage); 199 tcon, nls_codepage);
@@ -337,10 +337,10 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
337 /* need to prevent multiple threads trying to 337 /* need to prevent multiple threads trying to
338 simultaneously reconnect the same SMB session */ 338 simultaneously reconnect the same SMB session */
339 down(&tcon->ses->sesSem); 339 down(&tcon->ses->sesSem);
340 if (tcon->ses->status == CifsNeedReconnect) 340 if (tcon->ses->need_reconnect)
341 rc = cifs_setup_session(0, tcon->ses, 341 rc = cifs_setup_session(0, tcon->ses,
342 nls_codepage); 342 nls_codepage);
343 if (!rc && (tcon->tidStatus == CifsNeedReconnect)) { 343 if (!rc && (tcon->need_reconnect)) {
344 mark_open_files_invalid(tcon); 344 mark_open_files_invalid(tcon);
345 rc = CIFSTCon(0, tcon->ses, tcon->treeName, 345 rc = CIFSTCon(0, tcon->ses, tcon->treeName,
346 tcon, nls_codepage); 346 tcon, nls_codepage);
@@ -664,8 +664,9 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
664 rc = -EIO; 664 rc = -EIO;
665 goto neg_err_exit; 665 goto neg_err_exit;
666 } 666 }
667 667 read_lock(&cifs_tcp_ses_lock);
668 if (server->socketUseCount.counter > 1) { 668 if (server->srv_count > 1) {
669 read_unlock(&cifs_tcp_ses_lock);
669 if (memcmp(server->server_GUID, 670 if (memcmp(server->server_GUID,
670 pSMBr->u.extended_response. 671 pSMBr->u.extended_response.
671 GUID, 16) != 0) { 672 GUID, 16) != 0) {
@@ -674,9 +675,11 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
674 pSMBr->u.extended_response.GUID, 675 pSMBr->u.extended_response.GUID,
675 16); 676 16);
676 } 677 }
677 } else 678 } else {
679 read_unlock(&cifs_tcp_ses_lock);
678 memcpy(server->server_GUID, 680 memcpy(server->server_GUID,
679 pSMBr->u.extended_response.GUID, 16); 681 pSMBr->u.extended_response.GUID, 16);
682 }
680 683
681 if (count == 16) { 684 if (count == 16) {
682 server->secType = RawNTLMSSP; 685 server->secType = RawNTLMSSP;
@@ -739,50 +742,31 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon)
739 int rc = 0; 742 int rc = 0;
740 743
741 cFYI(1, ("In tree disconnect")); 744 cFYI(1, ("In tree disconnect"));
742 /*
743 * If last user of the connection and
744 * connection alive - disconnect it
745 * If this is the last connection on the server session disconnect it
746 * (and inside session disconnect we should check if tcp socket needs
747 * to be freed and kernel thread woken up).
748 */
749 if (tcon)
750 down(&tcon->tconSem);
751 else
752 return -EIO;
753 745
754 atomic_dec(&tcon->useCount); 746 /* BB: do we need to check this? These should never be NULL. */
755 if (atomic_read(&tcon->useCount) > 0) { 747 if ((tcon->ses == NULL) || (tcon->ses->server == NULL))
756 up(&tcon->tconSem); 748 return -EIO;
757 return -EBUSY;
758 }
759 749
760 /* No need to return error on this operation if tid invalidated and 750 /*
761 closed on server already e.g. due to tcp session crashing */ 751 * No need to return error on this operation if tid invalidated and
762 if (tcon->tidStatus == CifsNeedReconnect) { 752 * closed on server already e.g. due to tcp session crashing. Also,
763 up(&tcon->tconSem); 753 * the tcon is no longer on the list, so no need to take lock before
754 * checking this.
755 */
756 if (tcon->need_reconnect)
764 return 0; 757 return 0;
765 }
766 758
767 if ((tcon->ses == NULL) || (tcon->ses->server == NULL)) {
768 up(&tcon->tconSem);
769 return -EIO;
770 }
771 rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon, 759 rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon,
772 (void **)&smb_buffer); 760 (void **)&smb_buffer);
773 if (rc) { 761 if (rc)
774 up(&tcon->tconSem);
775 return rc; 762 return rc;
776 }
777 763
778 rc = SendReceiveNoRsp(xid, tcon->ses, smb_buffer, 0); 764 rc = SendReceiveNoRsp(xid, tcon->ses, smb_buffer, 0);
779 if (rc) 765 if (rc)
780 cFYI(1, ("Tree disconnect failed %d", rc)); 766 cFYI(1, ("Tree disconnect failed %d", rc));
781 767
782 up(&tcon->tconSem);
783
784 /* No need to return error on this operation if tid invalidated and 768 /* No need to return error on this operation if tid invalidated and
785 closed on server already e.g. due to tcp session crashing */ 769 closed on server already e.g. due to tcp session crashing */
786 if (rc == -EAGAIN) 770 if (rc == -EAGAIN)
787 rc = 0; 771 rc = 0;
788 772
@@ -796,43 +780,36 @@ CIFSSMBLogoff(const int xid, struct cifsSesInfo *ses)
796 int rc = 0; 780 int rc = 0;
797 781
798 cFYI(1, ("In SMBLogoff for session disconnect")); 782 cFYI(1, ("In SMBLogoff for session disconnect"));
799 if (ses) 783
800 down(&ses->sesSem); 784 /*
801 else 785 * BB: do we need to check validity of ses and server? They should
786 * always be valid since we have an active reference. If not, that
787 * should probably be a BUG()
788 */
789 if (!ses || !ses->server)
802 return -EIO; 790 return -EIO;
803 791
804 atomic_dec(&ses->inUse); 792 down(&ses->sesSem);
805 if (atomic_read(&ses->inUse) > 0) { 793 if (ses->need_reconnect)
806 up(&ses->sesSem); 794 goto session_already_dead; /* no need to send SMBlogoff if uid
807 return -EBUSY; 795 already closed due to reconnect */
808 }
809 rc = small_smb_init(SMB_COM_LOGOFF_ANDX, 2, NULL, (void **)&pSMB); 796 rc = small_smb_init(SMB_COM_LOGOFF_ANDX, 2, NULL, (void **)&pSMB);
810 if (rc) { 797 if (rc) {
811 up(&ses->sesSem); 798 up(&ses->sesSem);
812 return rc; 799 return rc;
813 } 800 }
814 801
815 if (ses->server) { 802 pSMB->hdr.Mid = GetNextMid(ses->server);
816 pSMB->hdr.Mid = GetNextMid(ses->server);
817 803
818 if (ses->server->secMode & 804 if (ses->server->secMode &
819 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) 805 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
820 pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; 806 pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
821 }
822 807
823 pSMB->hdr.Uid = ses->Suid; 808 pSMB->hdr.Uid = ses->Suid;
824 809
825 pSMB->AndXCommand = 0xFF; 810 pSMB->AndXCommand = 0xFF;
826 rc = SendReceiveNoRsp(xid, ses, (struct smb_hdr *) pSMB, 0); 811 rc = SendReceiveNoRsp(xid, ses, (struct smb_hdr *) pSMB, 0);
827 if (ses->server) { 812session_already_dead:
828 atomic_dec(&ses->server->socketUseCount);
829 if (atomic_read(&ses->server->socketUseCount) == 0) {
830 spin_lock(&GlobalMid_Lock);
831 ses->server->tcpStatus = CifsExiting;
832 spin_unlock(&GlobalMid_Lock);
833 rc = -ESHUTDOWN;
834 }
835 }
836 up(&ses->sesSem); 813 up(&ses->sesSem);
837 814
838 /* if session dead then we do not need to do ulogoff, 815 /* if session dead then we do not need to do ulogoff,
@@ -1536,7 +1513,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon,
1536 __u32 bytes_sent; 1513 __u32 bytes_sent;
1537 __u16 byte_count; 1514 __u16 byte_count;
1538 1515
1539 /* cFYI(1,("write at %lld %d bytes",offset,count));*/ 1516 /* cFYI(1, ("write at %lld %d bytes", offset, count));*/
1540 if (tcon->ses == NULL) 1517 if (tcon->ses == NULL)
1541 return -ECONNABORTED; 1518 return -ECONNABORTED;
1542 1519
@@ -3922,6 +3899,27 @@ GetInodeNumOut:
3922 return rc; 3899 return rc;
3923} 3900}
3924 3901
3902/* computes length of UCS string converted to host codepage
3903 * @src: UCS string
3904 * @maxlen: length of the input string in UCS characters
3905 * (not in bytes)
3906 *
3907 * return: size of input string in host codepage
3908 */
3909static int hostlen_fromUCS(const __le16 *src, const int maxlen,
3910 const struct nls_table *nls_codepage) {
3911 int i;
3912 int hostlen = 0;
3913 char to[4];
3914 int charlen;
3915 for (i = 0; (i < maxlen) && src[i]; ++i) {
3916 charlen = nls_codepage->uni2char(le16_to_cpu(src[i]),
3917 to, NLS_MAX_CHARSET_SIZE);
3918 hostlen += charlen > 0 ? charlen : 1;
3919 }
3920 return hostlen;
3921}
3922
3925/* parses DFS refferal V3 structure 3923/* parses DFS refferal V3 structure
3926 * caller is responsible for freeing target_nodes 3924 * caller is responsible for freeing target_nodes
3927 * returns: 3925 * returns:
@@ -3932,7 +3930,8 @@ static int
3932parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, 3930parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
3933 unsigned int *num_of_nodes, 3931 unsigned int *num_of_nodes,
3934 struct dfs_info3_param **target_nodes, 3932 struct dfs_info3_param **target_nodes,
3935 const struct nls_table *nls_codepage) 3933 const struct nls_table *nls_codepage, int remap,
3934 const char *searchName)
3936{ 3935{
3937 int i, rc = 0; 3936 int i, rc = 0;
3938 char *data_end; 3937 char *data_end;
@@ -3983,7 +3982,18 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
3983 struct dfs_info3_param *node = (*target_nodes)+i; 3982 struct dfs_info3_param *node = (*target_nodes)+i;
3984 3983
3985 node->flags = le16_to_cpu(pSMBr->DFSFlags); 3984 node->flags = le16_to_cpu(pSMBr->DFSFlags);
3986 node->path_consumed = le16_to_cpu(pSMBr->PathConsumed); 3985 if (is_unicode) {
3986 __le16 *tmp = kmalloc(strlen(searchName)*2 + 2,
3987 GFP_KERNEL);
3988 cifsConvertToUCS((__le16 *) tmp, searchName,
3989 PATH_MAX, nls_codepage, remap);
3990 node->path_consumed = hostlen_fromUCS(tmp,
3991 le16_to_cpu(pSMBr->PathConsumed)/2,
3992 nls_codepage);
3993 kfree(tmp);
3994 } else
3995 node->path_consumed = le16_to_cpu(pSMBr->PathConsumed);
3996
3987 node->server_type = le16_to_cpu(ref->ServerType); 3997 node->server_type = le16_to_cpu(ref->ServerType);
3988 node->ref_flag = le16_to_cpu(ref->ReferralEntryFlags); 3998 node->ref_flag = le16_to_cpu(ref->ReferralEntryFlags);
3989 3999
@@ -4116,7 +4126,8 @@ getDFSRetry:
4116 4126
4117 /* parse returned result into more usable form */ 4127 /* parse returned result into more usable form */
4118 rc = parse_DFS_referrals(pSMBr, num_of_nodes, 4128 rc = parse_DFS_referrals(pSMBr, num_of_nodes,
4119 target_nodes, nls_codepage); 4129 target_nodes, nls_codepage, remap,
4130 searchName);
4120 4131
4121GetDFSRefExit: 4132GetDFSRefExit:
4122 cifs_buf_release(pSMB); 4133 cifs_buf_release(pSMB);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 71b7661e2260..c7d341714586 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -92,6 +92,8 @@ struct smb_vol {
92 bool seal:1; /* request transport encryption on share */ 92 bool seal:1; /* request transport encryption on share */
93 bool nodfs:1; /* Do not request DFS, even if available */ 93 bool nodfs:1; /* Do not request DFS, even if available */
94 bool local_lease:1; /* check leases only on local system, not remote */ 94 bool local_lease:1; /* check leases only on local system, not remote */
95 bool noblocksnd:1;
96 bool noautotune:1;
95 unsigned int rsize; 97 unsigned int rsize;
96 unsigned int wsize; 98 unsigned int wsize;
97 unsigned int sockopt; 99 unsigned int sockopt;
@@ -102,9 +104,11 @@ struct smb_vol {
102static int ipv4_connect(struct sockaddr_in *psin_server, 104static int ipv4_connect(struct sockaddr_in *psin_server,
103 struct socket **csocket, 105 struct socket **csocket,
104 char *netb_name, 106 char *netb_name,
105 char *server_netb_name); 107 char *server_netb_name,
108 bool noblocksnd,
109 bool nosndbuf); /* ipv6 never set sndbuf size */
106static int ipv6_connect(struct sockaddr_in6 *psin_server, 110static int ipv6_connect(struct sockaddr_in6 *psin_server,
107 struct socket **csocket); 111 struct socket **csocket, bool noblocksnd);
108 112
109 113
110 /* 114 /*
@@ -120,7 +124,7 @@ static int
120cifs_reconnect(struct TCP_Server_Info *server) 124cifs_reconnect(struct TCP_Server_Info *server)
121{ 125{
122 int rc = 0; 126 int rc = 0;
123 struct list_head *tmp; 127 struct list_head *tmp, *tmp2;
124 struct cifsSesInfo *ses; 128 struct cifsSesInfo *ses;
125 struct cifsTconInfo *tcon; 129 struct cifsTconInfo *tcon;
126 struct mid_q_entry *mid_entry; 130 struct mid_q_entry *mid_entry;
@@ -140,23 +144,17 @@ cifs_reconnect(struct TCP_Server_Info *server)
140 144
141 /* before reconnecting the tcp session, mark the smb session (uid) 145 /* before reconnecting the tcp session, mark the smb session (uid)
142 and the tid bad so they are not used until reconnected */ 146 and the tid bad so they are not used until reconnected */
143 read_lock(&GlobalSMBSeslock); 147 read_lock(&cifs_tcp_ses_lock);
144 list_for_each(tmp, &GlobalSMBSessionList) { 148 list_for_each(tmp, &server->smb_ses_list) {
145 ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); 149 ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list);
146 if (ses->server) { 150 ses->need_reconnect = true;
147 if (ses->server == server) { 151 ses->ipc_tid = 0;
148 ses->status = CifsNeedReconnect; 152 list_for_each(tmp2, &ses->tcon_list) {
149 ses->ipc_tid = 0; 153 tcon = list_entry(tmp2, struct cifsTconInfo, tcon_list);
150 } 154 tcon->need_reconnect = true;
151 } 155 }
152 /* else tcp and smb sessions need reconnection */
153 }
154 list_for_each(tmp, &GlobalTreeConnectionList) {
155 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
156 if ((tcon->ses) && (tcon->ses->server == server))
157 tcon->tidStatus = CifsNeedReconnect;
158 } 156 }
159 read_unlock(&GlobalSMBSeslock); 157 read_unlock(&cifs_tcp_ses_lock);
160 /* do not want to be sending data on a socket we are freeing */ 158 /* do not want to be sending data on a socket we are freeing */
161 down(&server->tcpSem); 159 down(&server->tcpSem);
162 if (server->ssocket) { 160 if (server->ssocket) {
@@ -189,14 +187,15 @@ cifs_reconnect(struct TCP_Server_Info *server)
189 while ((server->tcpStatus != CifsExiting) && 187 while ((server->tcpStatus != CifsExiting) &&
190 (server->tcpStatus != CifsGood)) { 188 (server->tcpStatus != CifsGood)) {
191 try_to_freeze(); 189 try_to_freeze();
192 if (server->protocolType == IPV6) { 190 if (server->addr.sockAddr6.sin6_family == AF_INET6) {
193 rc = ipv6_connect(&server->addr.sockAddr6, 191 rc = ipv6_connect(&server->addr.sockAddr6,
194 &server->ssocket); 192 &server->ssocket, server->noautotune);
195 } else { 193 } else {
196 rc = ipv4_connect(&server->addr.sockAddr, 194 rc = ipv4_connect(&server->addr.sockAddr,
197 &server->ssocket, 195 &server->ssocket,
198 server->workstation_RFC1001_name, 196 server->workstation_RFC1001_name,
199 server->server_RFC1001_name); 197 server->server_RFC1001_name,
198 server->noblocksnd, server->noautotune);
200 } 199 }
201 if (rc) { 200 if (rc) {
202 cFYI(1, ("reconnect error %d", rc)); 201 cFYI(1, ("reconnect error %d", rc));
@@ -412,9 +411,14 @@ incomplete_rcv:
412 msleep(1); /* minimum sleep to prevent looping 411 msleep(1); /* minimum sleep to prevent looping
413 allowing socket to clear and app threads to set 412 allowing socket to clear and app threads to set
414 tcpStatus CifsNeedReconnect if server hung */ 413 tcpStatus CifsNeedReconnect if server hung */
415 if (pdu_length < 4) 414 if (pdu_length < 4) {
415 iov.iov_base = (4 - pdu_length) +
416 (char *)smb_buffer;
417 iov.iov_len = pdu_length;
418 smb_msg.msg_control = NULL;
419 smb_msg.msg_controllen = 0;
416 goto incomplete_rcv; 420 goto incomplete_rcv;
417 else 421 } else
418 continue; 422 continue;
419 } else if (length <= 0) { 423 } else if (length <= 0) {
420 if (server->tcpStatus == CifsNew) { 424 if (server->tcpStatus == CifsNew) {
@@ -649,6 +653,11 @@ multi_t2_fnd:
649 } 653 }
650 } /* end while !EXITING */ 654 } /* end while !EXITING */
651 655
656 /* take it off the list, if it's not already */
657 write_lock(&cifs_tcp_ses_lock);
658 list_del_init(&server->tcp_ses_list);
659 write_unlock(&cifs_tcp_ses_lock);
660
652 spin_lock(&GlobalMid_Lock); 661 spin_lock(&GlobalMid_Lock);
653 server->tcpStatus = CifsExiting; 662 server->tcpStatus = CifsExiting;
654 spin_unlock(&GlobalMid_Lock); 663 spin_unlock(&GlobalMid_Lock);
@@ -681,29 +690,29 @@ multi_t2_fnd:
681 if (smallbuf) /* no sense logging a debug message if NULL */ 690 if (smallbuf) /* no sense logging a debug message if NULL */
682 cifs_small_buf_release(smallbuf); 691 cifs_small_buf_release(smallbuf);
683 692
684 read_lock(&GlobalSMBSeslock); 693 /*
694 * BB: we shouldn't have to do any of this. It shouldn't be
695 * possible to exit from the thread with active SMB sessions
696 */
697 read_lock(&cifs_tcp_ses_lock);
685 if (list_empty(&server->pending_mid_q)) { 698 if (list_empty(&server->pending_mid_q)) {
686 /* loop through server session structures attached to this and 699 /* loop through server session structures attached to this and
687 mark them dead */ 700 mark them dead */
688 list_for_each(tmp, &GlobalSMBSessionList) { 701 list_for_each(tmp, &server->smb_ses_list) {
689 ses = 702 ses = list_entry(tmp, struct cifsSesInfo,
690 list_entry(tmp, struct cifsSesInfo, 703 smb_ses_list);
691 cifsSessionList); 704 ses->status = CifsExiting;
692 if (ses->server == server) { 705 ses->server = NULL;
693 ses->status = CifsExiting;
694 ses->server = NULL;
695 }
696 } 706 }
697 read_unlock(&GlobalSMBSeslock); 707 read_unlock(&cifs_tcp_ses_lock);
698 } else { 708 } else {
699 /* although we can not zero the server struct pointer yet, 709 /* although we can not zero the server struct pointer yet,
700 since there are active requests which may depnd on them, 710 since there are active requests which may depnd on them,
701 mark the corresponding SMB sessions as exiting too */ 711 mark the corresponding SMB sessions as exiting too */
702 list_for_each(tmp, &GlobalSMBSessionList) { 712 list_for_each(tmp, &server->smb_ses_list) {
703 ses = list_entry(tmp, struct cifsSesInfo, 713 ses = list_entry(tmp, struct cifsSesInfo,
704 cifsSessionList); 714 smb_ses_list);
705 if (ses->server == server) 715 ses->status = CifsExiting;
706 ses->status = CifsExiting;
707 } 716 }
708 717
709 spin_lock(&GlobalMid_Lock); 718 spin_lock(&GlobalMid_Lock);
@@ -718,7 +727,7 @@ multi_t2_fnd:
718 } 727 }
719 } 728 }
720 spin_unlock(&GlobalMid_Lock); 729 spin_unlock(&GlobalMid_Lock);
721 read_unlock(&GlobalSMBSeslock); 730 read_unlock(&cifs_tcp_ses_lock);
722 /* 1/8th of sec is more than enough time for them to exit */ 731 /* 1/8th of sec is more than enough time for them to exit */
723 msleep(125); 732 msleep(125);
724 } 733 }
@@ -740,14 +749,13 @@ multi_t2_fnd:
740 if there are any pointing to this (e.g 749 if there are any pointing to this (e.g
741 if a crazy root user tried to kill cifsd 750 if a crazy root user tried to kill cifsd
742 kernel thread explicitly this might happen) */ 751 kernel thread explicitly this might happen) */
743 write_lock(&GlobalSMBSeslock); 752 /* BB: This shouldn't be necessary, see above */
744 list_for_each(tmp, &GlobalSMBSessionList) { 753 read_lock(&cifs_tcp_ses_lock);
745 ses = list_entry(tmp, struct cifsSesInfo, 754 list_for_each(tmp, &server->smb_ses_list) {
746 cifsSessionList); 755 ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list);
747 if (ses->server == server) 756 ses->server = NULL;
748 ses->server = NULL;
749 } 757 }
750 write_unlock(&GlobalSMBSeslock); 758 read_unlock(&cifs_tcp_ses_lock);
751 759
752 kfree(server->hostname); 760 kfree(server->hostname);
753 task_to_wake = xchg(&server->tsk, NULL); 761 task_to_wake = xchg(&server->tsk, NULL);
@@ -1192,6 +1200,10 @@ cifs_parse_mount_options(char *options, const char *devname,
1192 /* ignore */ 1200 /* ignore */
1193 } else if (strnicmp(data, "rw", 2) == 0) { 1201 } else if (strnicmp(data, "rw", 2) == 0) {
1194 vol->rw = true; 1202 vol->rw = true;
1203 } else if (strnicmp(data, "noblocksend", 11) == 0) {
1204 vol->noblocksnd = 1;
1205 } else if (strnicmp(data, "noautotune", 10) == 0) {
1206 vol->noautotune = 1;
1195 } else if ((strnicmp(data, "suid", 4) == 0) || 1207 } else if ((strnicmp(data, "suid", 4) == 0) ||
1196 (strnicmp(data, "nosuid", 6) == 0) || 1208 (strnicmp(data, "nosuid", 6) == 0) ||
1197 (strnicmp(data, "exec", 4) == 0) || 1209 (strnicmp(data, "exec", 4) == 0) ||
@@ -1343,94 +1355,158 @@ cifs_parse_mount_options(char *options, const char *devname,
1343 return 0; 1355 return 0;
1344} 1356}
1345 1357
1346static struct cifsSesInfo * 1358static struct TCP_Server_Info *
1347cifs_find_tcp_session(struct in_addr *target_ip_addr, 1359cifs_find_tcp_session(struct sockaddr *addr)
1348 struct in6_addr *target_ip6_addr,
1349 char *userName, struct TCP_Server_Info **psrvTcp)
1350{ 1360{
1351 struct list_head *tmp; 1361 struct list_head *tmp;
1352 struct cifsSesInfo *ses; 1362 struct TCP_Server_Info *server;
1353 1363 struct sockaddr_in *addr4 = (struct sockaddr_in *) addr;
1354 *psrvTcp = NULL; 1364 struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) addr;
1365
1366 write_lock(&cifs_tcp_ses_lock);
1367 list_for_each(tmp, &cifs_tcp_ses_list) {
1368 server = list_entry(tmp, struct TCP_Server_Info,
1369 tcp_ses_list);
1370 /*
1371 * the demux thread can exit on its own while still in CifsNew
1372 * so don't accept any sockets in that state. Since the
1373 * tcpStatus never changes back to CifsNew it's safe to check
1374 * for this without a lock.
1375 */
1376 if (server->tcpStatus == CifsNew)
1377 continue;
1355 1378
1356 read_lock(&GlobalSMBSeslock); 1379 if (addr->sa_family == AF_INET &&
1357 list_for_each(tmp, &GlobalSMBSessionList) { 1380 (addr4->sin_addr.s_addr !=
1358 ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); 1381 server->addr.sockAddr.sin_addr.s_addr))
1359 if (!ses->server) 1382 continue;
1383 else if (addr->sa_family == AF_INET6 &&
1384 memcmp(&server->addr.sockAddr6.sin6_addr,
1385 &addr6->sin6_addr, sizeof(addr6->sin6_addr)))
1360 continue; 1386 continue;
1361 1387
1362 if (target_ip_addr && 1388 ++server->srv_count;
1363 ses->server->addr.sockAddr.sin_addr.s_addr != target_ip_addr->s_addr) 1389 write_unlock(&cifs_tcp_ses_lock);
1364 continue; 1390 cFYI(1, ("Existing tcp session with server found"));
1365 else if (target_ip6_addr && 1391 return server;
1366 memcmp(&ses->server->addr.sockAddr6.sin6_addr, 1392 }
1367 target_ip6_addr, sizeof(*target_ip6_addr))) 1393 write_unlock(&cifs_tcp_ses_lock);
1368 continue; 1394 return NULL;
1369 /* BB lock server and tcp session; increment use count here?? */ 1395}
1370 1396
1371 /* found a match on the TCP session */ 1397static void
1372 *psrvTcp = ses->server; 1398cifs_put_tcp_session(struct TCP_Server_Info *server)
1399{
1400 struct task_struct *task;
1373 1401
1374 /* BB check if reconnection needed */ 1402 write_lock(&cifs_tcp_ses_lock);
1375 if (strncmp(ses->userName, userName, MAX_USERNAME_SIZE) == 0) { 1403 if (--server->srv_count > 0) {
1376 read_unlock(&GlobalSMBSeslock); 1404 write_unlock(&cifs_tcp_ses_lock);
1377 /* Found exact match on both TCP and 1405 return;
1378 SMB sessions */
1379 return ses;
1380 }
1381 /* else tcp and smb sessions need reconnection */
1382 } 1406 }
1383 read_unlock(&GlobalSMBSeslock);
1384 1407
1385 return NULL; 1408 list_del_init(&server->tcp_ses_list);
1409 write_unlock(&cifs_tcp_ses_lock);
1410
1411 spin_lock(&GlobalMid_Lock);
1412 server->tcpStatus = CifsExiting;
1413 spin_unlock(&GlobalMid_Lock);
1414
1415 task = xchg(&server->tsk, NULL);
1416 if (task)
1417 force_sig(SIGKILL, task);
1386} 1418}
1387 1419
1388static struct cifsTconInfo * 1420static struct cifsSesInfo *
1389find_unc(__be32 new_target_ip_addr, char *uncName, char *userName) 1421cifs_find_smb_ses(struct TCP_Server_Info *server, char *username)
1390{ 1422{
1391 struct list_head *tmp; 1423 struct list_head *tmp;
1392 struct cifsTconInfo *tcon; 1424 struct cifsSesInfo *ses;
1393 __be32 old_ip;
1394
1395 read_lock(&GlobalSMBSeslock);
1396 1425
1397 list_for_each(tmp, &GlobalTreeConnectionList) { 1426 write_lock(&cifs_tcp_ses_lock);
1398 cFYI(1, ("Next tcon")); 1427 list_for_each(tmp, &server->smb_ses_list) {
1399 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); 1428 ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list);
1400 if (!tcon->ses || !tcon->ses->server) 1429 if (strncmp(ses->userName, username, MAX_USERNAME_SIZE))
1401 continue; 1430 continue;
1402 1431
1403 old_ip = tcon->ses->server->addr.sockAddr.sin_addr.s_addr; 1432 ++ses->ses_count;
1404 cFYI(1, ("old ip addr: %x == new ip %x ?", 1433 write_unlock(&cifs_tcp_ses_lock);
1405 old_ip, new_target_ip_addr)); 1434 return ses;
1435 }
1436 write_unlock(&cifs_tcp_ses_lock);
1437 return NULL;
1438}
1439
1440static void
1441cifs_put_smb_ses(struct cifsSesInfo *ses)
1442{
1443 int xid;
1444 struct TCP_Server_Info *server = ses->server;
1406 1445
1407 if (old_ip != new_target_ip_addr) 1446 write_lock(&cifs_tcp_ses_lock);
1408 continue; 1447 if (--ses->ses_count > 0) {
1448 write_unlock(&cifs_tcp_ses_lock);
1449 return;
1450 }
1409 1451
1410 /* BB lock tcon, server, tcp session and increment use count? */ 1452 list_del_init(&ses->smb_ses_list);
1411 /* found a match on the TCP session */ 1453 write_unlock(&cifs_tcp_ses_lock);
1412 /* BB check if reconnection needed */
1413 cFYI(1, ("IP match, old UNC: %s new: %s",
1414 tcon->treeName, uncName));
1415 1454
1416 if (strncmp(tcon->treeName, uncName, MAX_TREE_SIZE)) 1455 if (ses->status == CifsGood) {
1417 continue; 1456 xid = GetXid();
1457 CIFSSMBLogoff(xid, ses);
1458 _FreeXid(xid);
1459 }
1460 sesInfoFree(ses);
1461 cifs_put_tcp_session(server);
1462}
1418 1463
1419 cFYI(1, ("and old usr: %s new: %s", 1464static struct cifsTconInfo *
1420 tcon->treeName, uncName)); 1465cifs_find_tcon(struct cifsSesInfo *ses, const char *unc)
1466{
1467 struct list_head *tmp;
1468 struct cifsTconInfo *tcon;
1421 1469
1422 if (strncmp(tcon->ses->userName, userName, MAX_USERNAME_SIZE)) 1470 write_lock(&cifs_tcp_ses_lock);
1471 list_for_each(tmp, &ses->tcon_list) {
1472 tcon = list_entry(tmp, struct cifsTconInfo, tcon_list);
1473 if (tcon->tidStatus == CifsExiting)
1474 continue;
1475 if (strncmp(tcon->treeName, unc, MAX_TREE_SIZE))
1423 continue; 1476 continue;
1424 1477
1425 /* matched smb session (user name) */ 1478 ++tcon->tc_count;
1426 read_unlock(&GlobalSMBSeslock); 1479 write_unlock(&cifs_tcp_ses_lock);
1427 return tcon; 1480 return tcon;
1428 } 1481 }
1429 1482 write_unlock(&cifs_tcp_ses_lock);
1430 read_unlock(&GlobalSMBSeslock);
1431 return NULL; 1483 return NULL;
1432} 1484}
1433 1485
1486static void
1487cifs_put_tcon(struct cifsTconInfo *tcon)
1488{
1489 int xid;
1490 struct cifsSesInfo *ses = tcon->ses;
1491
1492 write_lock(&cifs_tcp_ses_lock);
1493 if (--tcon->tc_count > 0) {
1494 write_unlock(&cifs_tcp_ses_lock);
1495 return;
1496 }
1497
1498 list_del_init(&tcon->tcon_list);
1499 write_unlock(&cifs_tcp_ses_lock);
1500
1501 xid = GetXid();
1502 CIFSSMBTDis(xid, tcon);
1503 _FreeXid(xid);
1504
1505 DeleteTconOplockQEntries(tcon);
1506 tconInfoFree(tcon);
1507 cifs_put_smb_ses(ses);
1508}
1509
1434int 1510int
1435get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, 1511get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path,
1436 const struct nls_table *nls_codepage, unsigned int *pnum_referrals, 1512 const struct nls_table *nls_codepage, unsigned int *pnum_referrals,
@@ -1518,7 +1594,8 @@ static void rfc1002mangle(char *target, char *source, unsigned int length)
1518 1594
1519static int 1595static int
1520ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, 1596ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1521 char *netbios_name, char *target_name) 1597 char *netbios_name, char *target_name,
1598 bool noblocksnd, bool noautotune)
1522{ 1599{
1523 int rc = 0; 1600 int rc = 0;
1524 int connected = 0; 1601 int connected = 0;
@@ -1590,11 +1667,16 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1590 (*csocket)->sk->sk_sndbuf, 1667 (*csocket)->sk->sk_sndbuf,
1591 (*csocket)->sk->sk_rcvbuf, (*csocket)->sk->sk_rcvtimeo)); 1668 (*csocket)->sk->sk_rcvbuf, (*csocket)->sk->sk_rcvtimeo));
1592 (*csocket)->sk->sk_rcvtimeo = 7 * HZ; 1669 (*csocket)->sk->sk_rcvtimeo = 7 * HZ;
1670 if (!noblocksnd)
1671 (*csocket)->sk->sk_sndtimeo = 3 * HZ;
1672
1593 /* make the bufsizes depend on wsize/rsize and max requests */ 1673 /* make the bufsizes depend on wsize/rsize and max requests */
1594 if ((*csocket)->sk->sk_sndbuf < (200 * 1024)) 1674 if (noautotune) {
1595 (*csocket)->sk->sk_sndbuf = 200 * 1024; 1675 if ((*csocket)->sk->sk_sndbuf < (200 * 1024))
1596 if ((*csocket)->sk->sk_rcvbuf < (140 * 1024)) 1676 (*csocket)->sk->sk_sndbuf = 200 * 1024;
1597 (*csocket)->sk->sk_rcvbuf = 140 * 1024; 1677 if ((*csocket)->sk->sk_rcvbuf < (140 * 1024))
1678 (*csocket)->sk->sk_rcvbuf = 140 * 1024;
1679 }
1598 1680
1599 /* send RFC1001 sessinit */ 1681 /* send RFC1001 sessinit */
1600 if (psin_server->sin_port == htons(RFC1001_PORT)) { 1682 if (psin_server->sin_port == htons(RFC1001_PORT)) {
@@ -1631,7 +1713,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1631 /* sizeof RFC1002_SESSION_REQUEST with no scope */ 1713 /* sizeof RFC1002_SESSION_REQUEST with no scope */
1632 smb_buf->smb_buf_length = 0x81000044; 1714 smb_buf->smb_buf_length = 0x81000044;
1633 rc = smb_send(*csocket, smb_buf, 0x44, 1715 rc = smb_send(*csocket, smb_buf, 0x44,
1634 (struct sockaddr *)psin_server); 1716 (struct sockaddr *)psin_server, noblocksnd);
1635 kfree(ses_init_buf); 1717 kfree(ses_init_buf);
1636 msleep(1); /* RFC1001 layer in at least one server 1718 msleep(1); /* RFC1001 layer in at least one server
1637 requires very short break before negprot 1719 requires very short break before negprot
@@ -1651,7 +1733,8 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1651} 1733}
1652 1734
1653static int 1735static int
1654ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) 1736ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket,
1737 bool noblocksnd)
1655{ 1738{
1656 int rc = 0; 1739 int rc = 0;
1657 int connected = 0; 1740 int connected = 0;
@@ -1720,6 +1803,9 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
1720 the default. sock_setsockopt not used because it expects 1803 the default. sock_setsockopt not used because it expects
1721 user space buffer */ 1804 user space buffer */
1722 (*csocket)->sk->sk_rcvtimeo = 7 * HZ; 1805 (*csocket)->sk->sk_rcvtimeo = 7 * HZ;
1806 if (!noblocksnd)
1807 (*csocket)->sk->sk_sndtimeo = 3 * HZ;
1808
1723 1809
1724 return rc; 1810 return rc;
1725} 1811}
@@ -1857,14 +1943,90 @@ convert_delimiter(char *path, char delim)
1857 } 1943 }
1858} 1944}
1859 1945
1860static void 1946static void setup_cifs_sb(struct smb_vol *pvolume_info,
1861kill_cifsd(struct TCP_Server_Info *server) 1947 struct cifs_sb_info *cifs_sb)
1862{ 1948{
1863 struct task_struct *task; 1949 if (pvolume_info->rsize > CIFSMaxBufSize) {
1864 1950 cERROR(1, ("rsize %d too large, using MaxBufSize",
1865 task = xchg(&server->tsk, NULL); 1951 pvolume_info->rsize));
1866 if (task) 1952 cifs_sb->rsize = CIFSMaxBufSize;
1867 force_sig(SIGKILL, task); 1953 } else if ((pvolume_info->rsize) &&
1954 (pvolume_info->rsize <= CIFSMaxBufSize))
1955 cifs_sb->rsize = pvolume_info->rsize;
1956 else /* default */
1957 cifs_sb->rsize = CIFSMaxBufSize;
1958
1959 if (pvolume_info->wsize > PAGEVEC_SIZE * PAGE_CACHE_SIZE) {
1960 cERROR(1, ("wsize %d too large, using 4096 instead",
1961 pvolume_info->wsize));
1962 cifs_sb->wsize = 4096;
1963 } else if (pvolume_info->wsize)
1964 cifs_sb->wsize = pvolume_info->wsize;
1965 else
1966 cifs_sb->wsize = min_t(const int,
1967 PAGEVEC_SIZE * PAGE_CACHE_SIZE,
1968 127*1024);
1969 /* old default of CIFSMaxBufSize was too small now
1970 that SMB Write2 can send multiple pages in kvec.
1971 RFC1001 does not describe what happens when frame
1972 bigger than 128K is sent so use that as max in
1973 conjunction with 52K kvec constraint on arch with 4K
1974 page size */
1975
1976 if (cifs_sb->rsize < 2048) {
1977 cifs_sb->rsize = 2048;
1978 /* Windows ME may prefer this */
1979 cFYI(1, ("readsize set to minimum: 2048"));
1980 }
1981 /* calculate prepath */
1982 cifs_sb->prepath = pvolume_info->prepath;
1983 if (cifs_sb->prepath) {
1984 cifs_sb->prepathlen = strlen(cifs_sb->prepath);
1985 /* we can not convert the / to \ in the path
1986 separators in the prefixpath yet because we do not
1987 know (until reset_cifs_unix_caps is called later)
1988 whether POSIX PATH CAP is available. We normalize
1989 the / to \ after reset_cifs_unix_caps is called */
1990 pvolume_info->prepath = NULL;
1991 } else
1992 cifs_sb->prepathlen = 0;
1993 cifs_sb->mnt_uid = pvolume_info->linux_uid;
1994 cifs_sb->mnt_gid = pvolume_info->linux_gid;
1995 cifs_sb->mnt_file_mode = pvolume_info->file_mode;
1996 cifs_sb->mnt_dir_mode = pvolume_info->dir_mode;
1997 cFYI(1, ("file mode: 0x%x dir mode: 0x%x",
1998 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode));
1999
2000 if (pvolume_info->noperm)
2001 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
2002 if (pvolume_info->setuids)
2003 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID;
2004 if (pvolume_info->server_ino)
2005 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM;
2006 if (pvolume_info->remap)
2007 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SPECIAL_CHR;
2008 if (pvolume_info->no_xattr)
2009 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_XATTR;
2010 if (pvolume_info->sfu_emul)
2011 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL;
2012 if (pvolume_info->nobrl)
2013 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL;
2014 if (pvolume_info->cifs_acl)
2015 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL;
2016 if (pvolume_info->override_uid)
2017 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID;
2018 if (pvolume_info->override_gid)
2019 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID;
2020 if (pvolume_info->dynperm)
2021 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM;
2022 if (pvolume_info->direct_io) {
2023 cFYI(1, ("mounting share using direct i/o"));
2024 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
2025 }
2026
2027 if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm))
2028 cERROR(1, ("mount option dynperm ignored if cifsacl "
2029 "mount option supported"));
1868} 2030}
1869 2031
1870int 2032int
@@ -1873,13 +2035,12 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1873{ 2035{
1874 int rc = 0; 2036 int rc = 0;
1875 int xid; 2037 int xid;
1876 int address_type = AF_INET;
1877 struct socket *csocket = NULL; 2038 struct socket *csocket = NULL;
1878 struct sockaddr_in sin_server; 2039 struct sockaddr addr;
1879 struct sockaddr_in6 sin_server6; 2040 struct sockaddr_in *sin_server = (struct sockaddr_in *) &addr;
2041 struct sockaddr_in6 *sin_server6 = (struct sockaddr_in6 *) &addr;
1880 struct smb_vol volume_info; 2042 struct smb_vol volume_info;
1881 struct cifsSesInfo *pSesInfo = NULL; 2043 struct cifsSesInfo *pSesInfo = NULL;
1882 struct cifsSesInfo *existingCifsSes = NULL;
1883 struct cifsTconInfo *tcon = NULL; 2044 struct cifsTconInfo *tcon = NULL;
1884 struct TCP_Server_Info *srvTcp = NULL; 2045 struct TCP_Server_Info *srvTcp = NULL;
1885 2046
@@ -1887,6 +2048,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1887 2048
1888/* cFYI(1, ("Entering cifs_mount. Xid: %d with: %s", xid, mount_data)); */ 2049/* cFYI(1, ("Entering cifs_mount. Xid: %d with: %s", xid, mount_data)); */
1889 2050
2051 memset(&addr, 0, sizeof(struct sockaddr));
1890 memset(&volume_info, 0, sizeof(struct smb_vol)); 2052 memset(&volume_info, 0, sizeof(struct smb_vol));
1891 if (cifs_parse_mount_options(mount_data, devname, &volume_info)) { 2053 if (cifs_parse_mount_options(mount_data, devname, &volume_info)) {
1892 rc = -EINVAL; 2054 rc = -EINVAL;
@@ -1909,16 +2071,16 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1909 2071
1910 if (volume_info.UNCip && volume_info.UNC) { 2072 if (volume_info.UNCip && volume_info.UNC) {
1911 rc = cifs_inet_pton(AF_INET, volume_info.UNCip, 2073 rc = cifs_inet_pton(AF_INET, volume_info.UNCip,
1912 &sin_server.sin_addr.s_addr); 2074 &sin_server->sin_addr.s_addr);
1913 2075
1914 if (rc <= 0) { 2076 if (rc <= 0) {
1915 /* not ipv4 address, try ipv6 */ 2077 /* not ipv4 address, try ipv6 */
1916 rc = cifs_inet_pton(AF_INET6, volume_info.UNCip, 2078 rc = cifs_inet_pton(AF_INET6, volume_info.UNCip,
1917 &sin_server6.sin6_addr.in6_u); 2079 &sin_server6->sin6_addr.in6_u);
1918 if (rc > 0) 2080 if (rc > 0)
1919 address_type = AF_INET6; 2081 addr.sa_family = AF_INET6;
1920 } else { 2082 } else {
1921 address_type = AF_INET; 2083 addr.sa_family = AF_INET;
1922 } 2084 }
1923 2085
1924 if (rc <= 0) { 2086 if (rc <= 0) {
@@ -1958,38 +2120,25 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1958 } 2120 }
1959 } 2121 }
1960 2122
1961 if (address_type == AF_INET) 2123 srvTcp = cifs_find_tcp_session(&addr);
1962 existingCifsSes = cifs_find_tcp_session(&sin_server.sin_addr, 2124 if (!srvTcp) { /* create socket */
1963 NULL /* no ipv6 addr */, 2125 if (addr.sa_family == AF_INET6) {
1964 volume_info.username, &srvTcp);
1965 else if (address_type == AF_INET6) {
1966 cFYI(1, ("looking for ipv6 address"));
1967 existingCifsSes = cifs_find_tcp_session(NULL /* no ipv4 addr */,
1968 &sin_server6.sin6_addr,
1969 volume_info.username, &srvTcp);
1970 } else {
1971 rc = -EINVAL;
1972 goto out;
1973 }
1974
1975 if (srvTcp) {
1976 cFYI(1, ("Existing tcp session with server found"));
1977 } else { /* create socket */
1978 if (volume_info.port)
1979 sin_server.sin_port = htons(volume_info.port);
1980 else
1981 sin_server.sin_port = 0;
1982 if (address_type == AF_INET6) {
1983 cFYI(1, ("attempting ipv6 connect")); 2126 cFYI(1, ("attempting ipv6 connect"));
1984 /* BB should we allow ipv6 on port 139? */ 2127 /* BB should we allow ipv6 on port 139? */
1985 /* other OS never observed in Wild doing 139 with v6 */ 2128 /* other OS never observed in Wild doing 139 with v6 */
1986 rc = ipv6_connect(&sin_server6, &csocket); 2129 sin_server6->sin6_port = htons(volume_info.port);
1987 } else 2130 rc = ipv6_connect(sin_server6, &csocket,
1988 rc = ipv4_connect(&sin_server, &csocket, 2131 volume_info.noblocksnd);
2132 } else {
2133 sin_server->sin_port = htons(volume_info.port);
2134 rc = ipv4_connect(sin_server, &csocket,
1989 volume_info.source_rfc1001_name, 2135 volume_info.source_rfc1001_name,
1990 volume_info.target_rfc1001_name); 2136 volume_info.target_rfc1001_name,
2137 volume_info.noblocksnd,
2138 volume_info.noautotune);
2139 }
1991 if (rc < 0) { 2140 if (rc < 0) {
1992 cERROR(1, ("Error connecting to IPv4 socket. " 2141 cERROR(1, ("Error connecting to socket. "
1993 "Aborting operation")); 2142 "Aborting operation"));
1994 if (csocket != NULL) 2143 if (csocket != NULL)
1995 sock_release(csocket); 2144 sock_release(csocket);
@@ -2002,12 +2151,17 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2002 sock_release(csocket); 2151 sock_release(csocket);
2003 goto out; 2152 goto out;
2004 } else { 2153 } else {
2005 memcpy(&srvTcp->addr.sockAddr, &sin_server, 2154 srvTcp->noblocksnd = volume_info.noblocksnd;
2006 sizeof(struct sockaddr_in)); 2155 srvTcp->noautotune = volume_info.noautotune;
2156 if (addr.sa_family == AF_INET6)
2157 memcpy(&srvTcp->addr.sockAddr6, sin_server6,
2158 sizeof(struct sockaddr_in6));
2159 else
2160 memcpy(&srvTcp->addr.sockAddr, sin_server,
2161 sizeof(struct sockaddr_in));
2007 atomic_set(&srvTcp->inFlight, 0); 2162 atomic_set(&srvTcp->inFlight, 0);
2008 /* BB Add code for ipv6 case too */ 2163 /* BB Add code for ipv6 case too */
2009 srvTcp->ssocket = csocket; 2164 srvTcp->ssocket = csocket;
2010 srvTcp->protocolType = IPV4;
2011 srvTcp->hostname = extract_hostname(volume_info.UNC); 2165 srvTcp->hostname = extract_hostname(volume_info.UNC);
2012 if (IS_ERR(srvTcp->hostname)) { 2166 if (IS_ERR(srvTcp->hostname)) {
2013 rc = PTR_ERR(srvTcp->hostname); 2167 rc = PTR_ERR(srvTcp->hostname);
@@ -2037,15 +2191,28 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2037 memcpy(srvTcp->server_RFC1001_name, 2191 memcpy(srvTcp->server_RFC1001_name,
2038 volume_info.target_rfc1001_name, 16); 2192 volume_info.target_rfc1001_name, 16);
2039 srvTcp->sequence_number = 0; 2193 srvTcp->sequence_number = 0;
2194 INIT_LIST_HEAD(&srvTcp->tcp_ses_list);
2195 INIT_LIST_HEAD(&srvTcp->smb_ses_list);
2196 ++srvTcp->srv_count;
2197 write_lock(&cifs_tcp_ses_lock);
2198 list_add(&srvTcp->tcp_ses_list,
2199 &cifs_tcp_ses_list);
2200 write_unlock(&cifs_tcp_ses_lock);
2040 } 2201 }
2041 } 2202 }
2042 2203
2043 if (existingCifsSes) { 2204 pSesInfo = cifs_find_smb_ses(srvTcp, volume_info.username);
2044 pSesInfo = existingCifsSes; 2205 if (pSesInfo) {
2045 cFYI(1, ("Existing smb sess found (status=%d)", 2206 cFYI(1, ("Existing smb sess found (status=%d)",
2046 pSesInfo->status)); 2207 pSesInfo->status));
2208 /*
2209 * The existing SMB session already has a reference to srvTcp,
2210 * so we can put back the extra one we got before
2211 */
2212 cifs_put_tcp_session(srvTcp);
2213
2047 down(&pSesInfo->sesSem); 2214 down(&pSesInfo->sesSem);
2048 if (pSesInfo->status == CifsNeedReconnect) { 2215 if (pSesInfo->need_reconnect) {
2049 cFYI(1, ("Session needs reconnect")); 2216 cFYI(1, ("Session needs reconnect"));
2050 rc = cifs_setup_session(xid, pSesInfo, 2217 rc = cifs_setup_session(xid, pSesInfo,
2051 cifs_sb->local_nls); 2218 cifs_sb->local_nls);
@@ -2054,187 +2221,101 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2054 } else if (!rc) { 2221 } else if (!rc) {
2055 cFYI(1, ("Existing smb sess not found")); 2222 cFYI(1, ("Existing smb sess not found"));
2056 pSesInfo = sesInfoAlloc(); 2223 pSesInfo = sesInfoAlloc();
2057 if (pSesInfo == NULL) 2224 if (pSesInfo == NULL) {
2058 rc = -ENOMEM; 2225 rc = -ENOMEM;
2059 else { 2226 goto mount_fail_check;
2060 pSesInfo->server = srvTcp; 2227 }
2061 sprintf(pSesInfo->serverName, "%u.%u.%u.%u", 2228
2062 NIPQUAD(sin_server.sin_addr.s_addr)); 2229 /* new SMB session uses our srvTcp ref */
2063 } 2230 pSesInfo->server = srvTcp;
2231 sprintf(pSesInfo->serverName, "%u.%u.%u.%u",
2232 NIPQUAD(sin_server->sin_addr.s_addr));
2233
2234 write_lock(&cifs_tcp_ses_lock);
2235 list_add(&pSesInfo->smb_ses_list, &srvTcp->smb_ses_list);
2236 write_unlock(&cifs_tcp_ses_lock);
2237
2238 /* volume_info.password freed at unmount */
2239 if (volume_info.password) {
2240 pSesInfo->password = volume_info.password;
2241 /* set to NULL to prevent freeing on exit */
2242 volume_info.password = NULL;
2243 }
2244 if (volume_info.username)
2245 strncpy(pSesInfo->userName, volume_info.username,
2246 MAX_USERNAME_SIZE);
2247 if (volume_info.domainname) {
2248 int len = strlen(volume_info.domainname);
2249 pSesInfo->domainName = kmalloc(len + 1, GFP_KERNEL);
2250 if (pSesInfo->domainName)
2251 strcpy(pSesInfo->domainName,
2252 volume_info.domainname);
2253 }
2254 pSesInfo->linux_uid = volume_info.linux_uid;
2255 pSesInfo->overrideSecFlg = volume_info.secFlg;
2256 down(&pSesInfo->sesSem);
2064 2257
2065 if (!rc) { 2258 /* BB FIXME need to pass vol->secFlgs BB */
2066 /* volume_info.password freed at unmount */ 2259 rc = cifs_setup_session(xid, pSesInfo,
2067 if (volume_info.password) { 2260 cifs_sb->local_nls);
2068 pSesInfo->password = volume_info.password; 2261 up(&pSesInfo->sesSem);
2069 /* set to NULL to prevent freeing on exit */
2070 volume_info.password = NULL;
2071 }
2072 if (volume_info.username)
2073 strncpy(pSesInfo->userName,
2074 volume_info.username,
2075 MAX_USERNAME_SIZE);
2076 if (volume_info.domainname) {
2077 int len = strlen(volume_info.domainname);
2078 pSesInfo->domainName =
2079 kmalloc(len + 1, GFP_KERNEL);
2080 if (pSesInfo->domainName)
2081 strcpy(pSesInfo->domainName,
2082 volume_info.domainname);
2083 }
2084 pSesInfo->linux_uid = volume_info.linux_uid;
2085 pSesInfo->overrideSecFlg = volume_info.secFlg;
2086 down(&pSesInfo->sesSem);
2087 /* BB FIXME need to pass vol->secFlgs BB */
2088 rc = cifs_setup_session(xid, pSesInfo,
2089 cifs_sb->local_nls);
2090 up(&pSesInfo->sesSem);
2091 if (!rc)
2092 atomic_inc(&srvTcp->socketUseCount);
2093 }
2094 } 2262 }
2095 2263
2096 /* search for existing tcon to this server share */ 2264 /* search for existing tcon to this server share */
2097 if (!rc) { 2265 if (!rc) {
2098 if (volume_info.rsize > CIFSMaxBufSize) { 2266 setup_cifs_sb(&volume_info, cifs_sb);
2099 cERROR(1, ("rsize %d too large, using MaxBufSize",
2100 volume_info.rsize));
2101 cifs_sb->rsize = CIFSMaxBufSize;
2102 } else if ((volume_info.rsize) &&
2103 (volume_info.rsize <= CIFSMaxBufSize))
2104 cifs_sb->rsize = volume_info.rsize;
2105 else /* default */
2106 cifs_sb->rsize = CIFSMaxBufSize;
2107
2108 if (volume_info.wsize > PAGEVEC_SIZE * PAGE_CACHE_SIZE) {
2109 cERROR(1, ("wsize %d too large, using 4096 instead",
2110 volume_info.wsize));
2111 cifs_sb->wsize = 4096;
2112 } else if (volume_info.wsize)
2113 cifs_sb->wsize = volume_info.wsize;
2114 else
2115 cifs_sb->wsize =
2116 min_t(const int, PAGEVEC_SIZE * PAGE_CACHE_SIZE,
2117 127*1024);
2118 /* old default of CIFSMaxBufSize was too small now
2119 that SMB Write2 can send multiple pages in kvec.
2120 RFC1001 does not describe what happens when frame
2121 bigger than 128K is sent so use that as max in
2122 conjunction with 52K kvec constraint on arch with 4K
2123 page size */
2124
2125 if (cifs_sb->rsize < 2048) {
2126 cifs_sb->rsize = 2048;
2127 /* Windows ME may prefer this */
2128 cFYI(1, ("readsize set to minimum: 2048"));
2129 }
2130 /* calculate prepath */
2131 cifs_sb->prepath = volume_info.prepath;
2132 if (cifs_sb->prepath) {
2133 cifs_sb->prepathlen = strlen(cifs_sb->prepath);
2134 /* we can not convert the / to \ in the path
2135 separators in the prefixpath yet because we do not
2136 know (until reset_cifs_unix_caps is called later)
2137 whether POSIX PATH CAP is available. We normalize
2138 the / to \ after reset_cifs_unix_caps is called */
2139 volume_info.prepath = NULL;
2140 } else
2141 cifs_sb->prepathlen = 0;
2142 cifs_sb->mnt_uid = volume_info.linux_uid;
2143 cifs_sb->mnt_gid = volume_info.linux_gid;
2144 cifs_sb->mnt_file_mode = volume_info.file_mode;
2145 cifs_sb->mnt_dir_mode = volume_info.dir_mode;
2146 cFYI(1, ("file mode: 0x%x dir mode: 0x%x",
2147 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode));
2148
2149 if (volume_info.noperm)
2150 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
2151 if (volume_info.setuids)
2152 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID;
2153 if (volume_info.server_ino)
2154 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM;
2155 if (volume_info.remap)
2156 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SPECIAL_CHR;
2157 if (volume_info.no_xattr)
2158 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_XATTR;
2159 if (volume_info.sfu_emul)
2160 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL;
2161 if (volume_info.nobrl)
2162 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL;
2163 if (volume_info.cifs_acl)
2164 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL;
2165 if (volume_info.override_uid)
2166 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID;
2167 if (volume_info.override_gid)
2168 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID;
2169 if (volume_info.dynperm)
2170 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM;
2171 if (volume_info.direct_io) {
2172 cFYI(1, ("mounting share using direct i/o"));
2173 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
2174 }
2175
2176 if ((volume_info.cifs_acl) && (volume_info.dynperm))
2177 cERROR(1, ("mount option dynperm ignored if cifsacl "
2178 "mount option supported"));
2179 2267
2180 tcon = 2268 tcon = cifs_find_tcon(pSesInfo, volume_info.UNC);
2181 find_unc(sin_server.sin_addr.s_addr, volume_info.UNC,
2182 volume_info.username);
2183 if (tcon) { 2269 if (tcon) {
2184 cFYI(1, ("Found match on UNC path")); 2270 cFYI(1, ("Found match on UNC path"));
2185 /* we can have only one retry value for a connection 2271 /* existing tcon already has a reference */
2186 to a share so for resources mounted more than once 2272 cifs_put_smb_ses(pSesInfo);
2187 to the same server share the last value passed in
2188 for the retry flag is used */
2189 tcon->retry = volume_info.retry;
2190 tcon->nocase = volume_info.nocase;
2191 tcon->local_lease = volume_info.local_lease;
2192 if (tcon->seal != volume_info.seal) 2273 if (tcon->seal != volume_info.seal)
2193 cERROR(1, ("transport encryption setting " 2274 cERROR(1, ("transport encryption setting "
2194 "conflicts with existing tid")); 2275 "conflicts with existing tid"));
2195 } else { 2276 } else {
2196 tcon = tconInfoAlloc(); 2277 tcon = tconInfoAlloc();
2197 if (tcon == NULL) 2278 if (tcon == NULL) {
2198 rc = -ENOMEM; 2279 rc = -ENOMEM;
2199 else { 2280 goto mount_fail_check;
2200 /* check for null share name ie connecting to 2281 }
2201 * dfs root */ 2282 tcon->ses = pSesInfo;
2202 2283
2203 /* BB check if this works for exactly length 2284 /* check for null share name ie connect to dfs root */
2204 * three strings */ 2285 if ((strchr(volume_info.UNC + 3, '\\') == NULL)
2205 if ((strchr(volume_info.UNC + 3, '\\') == NULL) 2286 && (strchr(volume_info.UNC + 3, '/') == NULL)) {
2206 && (strchr(volume_info.UNC + 3, '/') == 2287 /* rc = connect_to_dfs_path(...) */
2207 NULL)) { 2288 cFYI(1, ("DFS root not supported"));
2208/* rc = connect_to_dfs_path(xid, pSesInfo, 2289 rc = -ENODEV;
2209 "", cifs_sb->local_nls, 2290 goto mount_fail_check;
2210 cifs_sb->mnt_cifs_flags & 2291 } else {
2211 CIFS_MOUNT_MAP_SPECIAL_CHR);*/ 2292 /* BB Do we need to wrap sesSem around
2212 cFYI(1, ("DFS root not supported")); 2293 * this TCon call and Unix SetFS as
2213 rc = -ENODEV; 2294 * we do on SessSetup and reconnect? */
2214 goto out; 2295 rc = CIFSTCon(xid, pSesInfo, volume_info.UNC,
2215 } else { 2296 tcon, cifs_sb->local_nls);
2216 /* BB Do we need to wrap sesSem around 2297 cFYI(1, ("CIFS Tcon rc = %d", rc));
2217 * this TCon call and Unix SetFS as 2298 if (volume_info.nodfs) {
2218 * we do on SessSetup and reconnect? */ 2299 tcon->Flags &= ~SMB_SHARE_IS_IN_DFS;
2219 rc = CIFSTCon(xid, pSesInfo, 2300 cFYI(1, ("DFS disabled (%d)",
2220 volume_info.UNC, 2301 tcon->Flags));
2221 tcon, cifs_sb->local_nls);
2222 cFYI(1, ("CIFS Tcon rc = %d", rc));
2223 if (volume_info.nodfs) {
2224 tcon->Flags &=
2225 ~SMB_SHARE_IS_IN_DFS;
2226 cFYI(1, ("DFS disabled (%d)",
2227 tcon->Flags));
2228 }
2229 }
2230 if (!rc) {
2231 atomic_inc(&pSesInfo->inUse);
2232 tcon->retry = volume_info.retry;
2233 tcon->nocase = volume_info.nocase;
2234 tcon->seal = volume_info.seal;
2235 } 2302 }
2236 } 2303 }
2237 } 2304 if (rc)
2305 goto mount_fail_check;
2306 tcon->seal = volume_info.seal;
2307 write_lock(&cifs_tcp_ses_lock);
2308 list_add(&tcon->tcon_list, &pSesInfo->tcon_list);
2309 write_unlock(&cifs_tcp_ses_lock);
2310 }
2311
2312 /* we can have only one retry value for a connection
2313 to a share so for resources mounted more than once
2314 to the same server share the last value passed in
2315 for the retry flag is used */
2316 tcon->retry = volume_info.retry;
2317 tcon->nocase = volume_info.nocase;
2318 tcon->local_lease = volume_info.local_lease;
2238 } 2319 }
2239 if (pSesInfo) { 2320 if (pSesInfo) {
2240 if (pSesInfo->capabilities & CAP_LARGE_FILES) { 2321 if (pSesInfo->capabilities & CAP_LARGE_FILES) {
@@ -2246,80 +2327,49 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2246 /* BB FIXME fix time_gran to be larger for LANMAN sessions */ 2327 /* BB FIXME fix time_gran to be larger for LANMAN sessions */
2247 sb->s_time_gran = 100; 2328 sb->s_time_gran = 100;
2248 2329
2249/* on error free sesinfo and tcon struct if needed */ 2330mount_fail_check:
2331 /* on error free sesinfo and tcon struct if needed */
2250 if (rc) { 2332 if (rc) {
2251 /* if session setup failed, use count is zero but 2333 /* If find_unc succeeded then rc == 0 so we can not end */
2252 we still need to free cifsd thread */ 2334 /* up accidently freeing someone elses tcon struct */
2253 if (atomic_read(&srvTcp->socketUseCount) == 0) { 2335 if (tcon)
2254 spin_lock(&GlobalMid_Lock); 2336 cifs_put_tcon(tcon);
2255 srvTcp->tcpStatus = CifsExiting; 2337 else if (pSesInfo)
2256 spin_unlock(&GlobalMid_Lock); 2338 cifs_put_smb_ses(pSesInfo);
2257 kill_cifsd(srvTcp);
2258 }
2259 /* If find_unc succeeded then rc == 0 so we can not end */
2260 if (tcon) /* up accidently freeing someone elses tcon struct */
2261 tconInfoFree(tcon);
2262 if (existingCifsSes == NULL) {
2263 if (pSesInfo) {
2264 if ((pSesInfo->server) &&
2265 (pSesInfo->status == CifsGood)) {
2266 int temp_rc;
2267 temp_rc = CIFSSMBLogoff(xid, pSesInfo);
2268 /* if the socketUseCount is now zero */
2269 if ((temp_rc == -ESHUTDOWN) &&
2270 (pSesInfo->server))
2271 kill_cifsd(pSesInfo->server);
2272 } else {
2273 cFYI(1, ("No session or bad tcon"));
2274 if (pSesInfo->server) {
2275 spin_lock(&GlobalMid_Lock);
2276 srvTcp->tcpStatus = CifsExiting;
2277 spin_unlock(&GlobalMid_Lock);
2278 kill_cifsd(pSesInfo->server);
2279 }
2280 }
2281 sesInfoFree(pSesInfo);
2282 /* pSesInfo = NULL; */
2283 }
2284 }
2285 } else {
2286 atomic_inc(&tcon->useCount);
2287 cifs_sb->tcon = tcon;
2288 tcon->ses = pSesInfo;
2289
2290 /* do not care if following two calls succeed - informational */
2291 if (!tcon->ipc) {
2292 CIFSSMBQFSDeviceInfo(xid, tcon);
2293 CIFSSMBQFSAttributeInfo(xid, tcon);
2294 }
2295
2296 /* tell server which Unix caps we support */
2297 if (tcon->ses->capabilities & CAP_UNIX)
2298 /* reset of caps checks mount to see if unix extensions
2299 disabled for just this mount */
2300 reset_cifs_unix_caps(xid, tcon, sb, &volume_info);
2301 else 2339 else
2302 tcon->unix_ext = 0; /* server does not support them */ 2340 cifs_put_tcp_session(srvTcp);
2341 goto out;
2342 }
2343 cifs_sb->tcon = tcon;
2303 2344
2304 /* convert forward to back slashes in prepath here if needed */ 2345 /* do not care if following two calls succeed - informational */
2305 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0) 2346 if (!tcon->ipc) {
2306 convert_delimiter(cifs_sb->prepath, 2347 CIFSSMBQFSDeviceInfo(xid, tcon);
2307 CIFS_DIR_SEP(cifs_sb)); 2348 CIFSSMBQFSAttributeInfo(xid, tcon);
2349 }
2308 2350
2309 if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) { 2351 /* tell server which Unix caps we support */
2310 cifs_sb->rsize = 1024 * 127; 2352 if (tcon->ses->capabilities & CAP_UNIX)
2311 cFYI(DBG2, 2353 /* reset of caps checks mount to see if unix extensions
2312 ("no very large read support, rsize now 127K")); 2354 disabled for just this mount */
2313 } 2355 reset_cifs_unix_caps(xid, tcon, sb, &volume_info);
2314 if (!(tcon->ses->capabilities & CAP_LARGE_WRITE_X)) 2356 else
2315 cifs_sb->wsize = min(cifs_sb->wsize, 2357 tcon->unix_ext = 0; /* server does not support them */
2316 (tcon->ses->server->maxBuf - 2358
2317 MAX_CIFS_HDR_SIZE)); 2359 /* convert forward to back slashes in prepath here if needed */
2318 if (!(tcon->ses->capabilities & CAP_LARGE_READ_X)) 2360 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0)
2319 cifs_sb->rsize = min(cifs_sb->rsize, 2361 convert_delimiter(cifs_sb->prepath, CIFS_DIR_SEP(cifs_sb));
2320 (tcon->ses->server->maxBuf - 2362
2321 MAX_CIFS_HDR_SIZE)); 2363 if ((tcon->unix_ext == 0) && (cifs_sb->rsize > (1024 * 127))) {
2364 cifs_sb->rsize = 1024 * 127;
2365 cFYI(DBG2, ("no very large read support, rsize now 127K"));
2322 } 2366 }
2367 if (!(tcon->ses->capabilities & CAP_LARGE_WRITE_X))
2368 cifs_sb->wsize = min(cifs_sb->wsize,
2369 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE));
2370 if (!(tcon->ses->capabilities & CAP_LARGE_READ_X))
2371 cifs_sb->rsize = min(cifs_sb->rsize,
2372 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE));
2323 2373
2324 /* volume_info.password is freed above when existing session found 2374 /* volume_info.password is freed above when existing session found
2325 (in which case it is not needed anymore) but when new sesion is created 2375 (in which case it is not needed anymore) but when new sesion is created
@@ -3489,6 +3539,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3489 /* above now done in SendReceive */ 3539 /* above now done in SendReceive */
3490 if ((rc == 0) && (tcon != NULL)) { 3540 if ((rc == 0) && (tcon != NULL)) {
3491 tcon->tidStatus = CifsGood; 3541 tcon->tidStatus = CifsGood;
3542 tcon->need_reconnect = false;
3492 tcon->tid = smb_buffer_response->Tid; 3543 tcon->tid = smb_buffer_response->Tid;
3493 bcc_ptr = pByteArea(smb_buffer_response); 3544 bcc_ptr = pByteArea(smb_buffer_response);
3494 length = strnlen(bcc_ptr, BCC(smb_buffer_response) - 2); 3545 length = strnlen(bcc_ptr, BCC(smb_buffer_response) - 2);
@@ -3560,48 +3611,17 @@ int
3560cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) 3611cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3561{ 3612{
3562 int rc = 0; 3613 int rc = 0;
3563 int xid;
3564 struct cifsSesInfo *ses = NULL;
3565 char *tmp; 3614 char *tmp;
3566 3615
3567 xid = GetXid(); 3616 if (cifs_sb->tcon)
3568 3617 cifs_put_tcon(cifs_sb->tcon);
3569 if (cifs_sb->tcon) {
3570 ses = cifs_sb->tcon->ses; /* save ptr to ses before delete tcon!*/
3571 rc = CIFSSMBTDis(xid, cifs_sb->tcon);
3572 if (rc == -EBUSY) {
3573 FreeXid(xid);
3574 return 0;
3575 }
3576 DeleteTconOplockQEntries(cifs_sb->tcon);
3577 tconInfoFree(cifs_sb->tcon);
3578 if ((ses) && (ses->server)) {
3579 /* save off task so we do not refer to ses later */
3580 cFYI(1, ("About to do SMBLogoff "));
3581 rc = CIFSSMBLogoff(xid, ses);
3582 if (rc == -EBUSY) {
3583 FreeXid(xid);
3584 return 0;
3585 } else if (rc == -ESHUTDOWN) {
3586 cFYI(1, ("Waking up socket by sending signal"));
3587 if (ses->server)
3588 kill_cifsd(ses->server);
3589 rc = 0;
3590 } /* else - we have an smb session
3591 left on this socket do not kill cifsd */
3592 } else
3593 cFYI(1, ("No session or bad tcon"));
3594 }
3595 3618
3596 cifs_sb->tcon = NULL; 3619 cifs_sb->tcon = NULL;
3597 tmp = cifs_sb->prepath; 3620 tmp = cifs_sb->prepath;
3598 cifs_sb->prepathlen = 0; 3621 cifs_sb->prepathlen = 0;
3599 cifs_sb->prepath = NULL; 3622 cifs_sb->prepath = NULL;
3600 kfree(tmp); 3623 kfree(tmp);
3601 if (ses)
3602 sesInfoFree(ses);
3603 3624
3604 FreeXid(xid);
3605 return rc; 3625 return rc;
3606} 3626}
3607 3627
@@ -3717,6 +3737,7 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3717 cFYI(1, ("CIFS Session Established successfully")); 3737 cFYI(1, ("CIFS Session Established successfully"));
3718 spin_lock(&GlobalMid_Lock); 3738 spin_lock(&GlobalMid_Lock);
3719 pSesInfo->status = CifsGood; 3739 pSesInfo->status = CifsGood;
3740 pSesInfo->need_reconnect = false;
3720 spin_unlock(&GlobalMid_Lock); 3741 spin_unlock(&GlobalMid_Lock);
3721 } 3742 }
3722 3743
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 62d8bd8f14c0..f0a81e631ae6 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -488,12 +488,13 @@ int cifs_close(struct inode *inode, struct file *file)
488 pTcon = cifs_sb->tcon; 488 pTcon = cifs_sb->tcon;
489 if (pSMBFile) { 489 if (pSMBFile) {
490 struct cifsLockInfo *li, *tmp; 490 struct cifsLockInfo *li, *tmp;
491 491 write_lock(&GlobalSMBSeslock);
492 pSMBFile->closePend = true; 492 pSMBFile->closePend = true;
493 if (pTcon) { 493 if (pTcon) {
494 /* no sense reconnecting to close a file that is 494 /* no sense reconnecting to close a file that is
495 already closed */ 495 already closed */
496 if (pTcon->tidStatus != CifsNeedReconnect) { 496 if (!pTcon->need_reconnect) {
497 write_unlock(&GlobalSMBSeslock);
497 timeout = 2; 498 timeout = 2;
498 while ((atomic_read(&pSMBFile->wrtPending) != 0) 499 while ((atomic_read(&pSMBFile->wrtPending) != 0)
499 && (timeout <= 2048)) { 500 && (timeout <= 2048)) {
@@ -510,12 +511,15 @@ int cifs_close(struct inode *inode, struct file *file)
510 timeout *= 4; 511 timeout *= 4;
511 } 512 }
512 if (atomic_read(&pSMBFile->wrtPending)) 513 if (atomic_read(&pSMBFile->wrtPending))
513 cERROR(1, 514 cERROR(1, ("close with pending write"));
514 ("close with pending writes")); 515 if (!pTcon->need_reconnect &&
515 rc = CIFSSMBClose(xid, pTcon, 516 !pSMBFile->invalidHandle)
517 rc = CIFSSMBClose(xid, pTcon,
516 pSMBFile->netfid); 518 pSMBFile->netfid);
517 } 519 } else
518 } 520 write_unlock(&GlobalSMBSeslock);
521 } else
522 write_unlock(&GlobalSMBSeslock);
519 523
520 /* Delete any outstanding lock records. 524 /* Delete any outstanding lock records.
521 We'll lose them when the file is closed anyway. */ 525 We'll lose them when the file is closed anyway. */
@@ -587,15 +591,18 @@ int cifs_closedir(struct inode *inode, struct file *file)
587 pTcon = cifs_sb->tcon; 591 pTcon = cifs_sb->tcon;
588 592
589 cFYI(1, ("Freeing private data in close dir")); 593 cFYI(1, ("Freeing private data in close dir"));
594 write_lock(&GlobalSMBSeslock);
590 if (!pCFileStruct->srch_inf.endOfSearch && 595 if (!pCFileStruct->srch_inf.endOfSearch &&
591 !pCFileStruct->invalidHandle) { 596 !pCFileStruct->invalidHandle) {
592 pCFileStruct->invalidHandle = true; 597 pCFileStruct->invalidHandle = true;
598 write_unlock(&GlobalSMBSeslock);
593 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid); 599 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
594 cFYI(1, ("Closing uncompleted readdir with rc %d", 600 cFYI(1, ("Closing uncompleted readdir with rc %d",
595 rc)); 601 rc));
596 /* not much we can do if it fails anyway, ignore rc */ 602 /* not much we can do if it fails anyway, ignore rc */
597 rc = 0; 603 rc = 0;
598 } 604 } else
605 write_unlock(&GlobalSMBSeslock);
599 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start; 606 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
600 if (ptmp) { 607 if (ptmp) {
601 cFYI(1, ("closedir free smb buf in srch struct")); 608 cFYI(1, ("closedir free smb buf in srch struct"));
@@ -1404,7 +1411,10 @@ retry:
1404 if ((wbc->nr_to_write -= n_iov) <= 0) 1411 if ((wbc->nr_to_write -= n_iov) <= 0)
1405 done = 1; 1412 done = 1;
1406 index = next; 1413 index = next;
1407 } 1414 } else
1415 /* Need to re-find the pages we skipped */
1416 index = pvec.pages[0]->index + 1;
1417
1408 pagevec_release(&pvec); 1418 pagevec_release(&pvec);
1409 } 1419 }
1410 if (!scanned && !done) { 1420 if (!scanned && !done) {
@@ -1465,7 +1475,11 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
1465 cFYI(1, ("write_end for page %p from pos %lld with %d bytes", 1475 cFYI(1, ("write_end for page %p from pos %lld with %d bytes",
1466 page, pos, copied)); 1476 page, pos, copied));
1467 1477
1468 if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE) 1478 if (PageChecked(page)) {
1479 if (copied == len)
1480 SetPageUptodate(page);
1481 ClearPageChecked(page);
1482 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1469 SetPageUptodate(page); 1483 SetPageUptodate(page);
1470 1484
1471 if (!PageUptodate(page)) { 1485 if (!PageUptodate(page)) {
@@ -1824,7 +1838,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
1824 pTcon = cifs_sb->tcon; 1838 pTcon = cifs_sb->tcon;
1825 1839
1826 pagevec_init(&lru_pvec, 0); 1840 pagevec_init(&lru_pvec, 0);
1827 cFYI(DBG2, ("rpages: num pages %d", num_pages)); 1841 cFYI(DBG2, ("rpages: num pages %d", num_pages));
1828 for (i = 0; i < num_pages; ) { 1842 for (i = 0; i < num_pages; ) {
1829 unsigned contig_pages; 1843 unsigned contig_pages;
1830 struct page *tmp_page; 1844 struct page *tmp_page;
@@ -2052,39 +2066,70 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
2052{ 2066{
2053 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 2067 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2054 loff_t offset = pos & (PAGE_CACHE_SIZE - 1); 2068 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2069 loff_t page_start = pos & PAGE_MASK;
2070 loff_t i_size;
2071 struct page *page;
2072 int rc = 0;
2055 2073
2056 cFYI(1, ("write_begin from %lld len %d", (long long)pos, len)); 2074 cFYI(1, ("write_begin from %lld len %d", (long long)pos, len));
2057 2075
2058 *pagep = __grab_cache_page(mapping, index); 2076 page = __grab_cache_page(mapping, index);
2059 if (!*pagep) 2077 if (!page) {
2060 return -ENOMEM; 2078 rc = -ENOMEM;
2061 2079 goto out;
2062 if (PageUptodate(*pagep)) 2080 }
2063 return 0;
2064 2081
2065 /* If we are writing a full page it will be up to date, 2082 if (PageUptodate(page))
2066 no need to read from the server */ 2083 goto out;
2067 if (len == PAGE_CACHE_SIZE && flags & AOP_FLAG_UNINTERRUPTIBLE)
2068 return 0;
2069 2084
2070 if ((file->f_flags & O_ACCMODE) != O_WRONLY) { 2085 /*
2071 int rc; 2086 * If we write a full page it will be up to date, no need to read from
2087 * the server. If the write is short, we'll end up doing a sync write
2088 * instead.
2089 */
2090 if (len == PAGE_CACHE_SIZE)
2091 goto out;
2072 2092
2073 /* might as well read a page, it is fast enough */ 2093 /*
2074 rc = cifs_readpage_worker(file, *pagep, &offset); 2094 * optimize away the read when we have an oplock, and we're not
2095 * expecting to use any of the data we'd be reading in. That
2096 * is, when the page lies beyond the EOF, or straddles the EOF
2097 * and the write will cover all of the existing data.
2098 */
2099 if (CIFS_I(mapping->host)->clientCanCacheRead) {
2100 i_size = i_size_read(mapping->host);
2101 if (page_start >= i_size ||
2102 (offset == 0 && (pos + len) >= i_size)) {
2103 zero_user_segments(page, 0, offset,
2104 offset + len,
2105 PAGE_CACHE_SIZE);
2106 /*
2107 * PageChecked means that the parts of the page
2108 * to which we're not writing are considered up
2109 * to date. Once the data is copied to the
2110 * page, it can be set uptodate.
2111 */
2112 SetPageChecked(page);
2113 goto out;
2114 }
2115 }
2075 2116
2076 /* we do not need to pass errors back 2117 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2077 e.g. if we do not have read access to the file 2118 /*
2078 because cifs_write_end will attempt synchronous writes 2119 * might as well read a page, it is fast enough. If we get
2079 -- shaggy */ 2120 * an error, we don't need to return it. cifs_write_end will
2121 * do a sync write instead since PG_uptodate isn't set.
2122 */
2123 cifs_readpage_worker(file, page, &page_start);
2080 } else { 2124 } else {
2081 /* we could try using another file handle if there is one - 2125 /* we could try using another file handle if there is one -
2082 but how would we lock it to prevent close of that handle 2126 but how would we lock it to prevent close of that handle
2083 racing with this read? In any case 2127 racing with this read? In any case
2084 this will be written out by write_end so is fine */ 2128 this will be written out by write_end so is fine */
2085 } 2129 }
2086 2130out:
2087 return 0; 2131 *pagep = page;
2132 return rc;
2088} 2133}
2089 2134
2090const struct address_space_operations cifs_addr_ops = { 2135const struct address_space_operations cifs_addr_ops = {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index d54fa8aeaea9..ff8c68de4a92 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1361,9 +1361,11 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
1361 CIFS_MOUNT_MAP_SPECIAL_CHR); 1361 CIFS_MOUNT_MAP_SPECIAL_CHR);
1362 1362
1363 if (tmprc == 0 && (info_buf_source->UniqueId == 1363 if (tmprc == 0 && (info_buf_source->UniqueId ==
1364 info_buf_target->UniqueId)) 1364 info_buf_target->UniqueId)) {
1365 /* same file, POSIX says that this is a noop */ 1365 /* same file, POSIX says that this is a noop */
1366 rc = 0;
1366 goto cifs_rename_exit; 1367 goto cifs_rename_exit;
1368 }
1367 } /* else ... BB we could add the same check for Windows by 1369 } /* else ... BB we could add the same check for Windows by
1368 checking the UniqueId via FILE_INTERNAL_INFO */ 1370 checking the UniqueId via FILE_INTERNAL_INFO */
1369 1371
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 88786ba02d27..9ee3f689c2b0 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -75,12 +75,12 @@ sesInfoAlloc(void)
75 75
76 ret_buf = kzalloc(sizeof(struct cifsSesInfo), GFP_KERNEL); 76 ret_buf = kzalloc(sizeof(struct cifsSesInfo), GFP_KERNEL);
77 if (ret_buf) { 77 if (ret_buf) {
78 write_lock(&GlobalSMBSeslock);
79 atomic_inc(&sesInfoAllocCount); 78 atomic_inc(&sesInfoAllocCount);
80 ret_buf->status = CifsNew; 79 ret_buf->status = CifsNew;
81 list_add(&ret_buf->cifsSessionList, &GlobalSMBSessionList); 80 ++ret_buf->ses_count;
81 INIT_LIST_HEAD(&ret_buf->smb_ses_list);
82 INIT_LIST_HEAD(&ret_buf->tcon_list);
82 init_MUTEX(&ret_buf->sesSem); 83 init_MUTEX(&ret_buf->sesSem);
83 write_unlock(&GlobalSMBSeslock);
84 } 84 }
85 return ret_buf; 85 return ret_buf;
86} 86}
@@ -93,10 +93,7 @@ sesInfoFree(struct cifsSesInfo *buf_to_free)
93 return; 93 return;
94 } 94 }
95 95
96 write_lock(&GlobalSMBSeslock);
97 atomic_dec(&sesInfoAllocCount); 96 atomic_dec(&sesInfoAllocCount);
98 list_del(&buf_to_free->cifsSessionList);
99 write_unlock(&GlobalSMBSeslock);
100 kfree(buf_to_free->serverOS); 97 kfree(buf_to_free->serverOS);
101 kfree(buf_to_free->serverDomain); 98 kfree(buf_to_free->serverDomain);
102 kfree(buf_to_free->serverNOS); 99 kfree(buf_to_free->serverNOS);
@@ -111,17 +108,14 @@ tconInfoAlloc(void)
111 struct cifsTconInfo *ret_buf; 108 struct cifsTconInfo *ret_buf;
112 ret_buf = kzalloc(sizeof(struct cifsTconInfo), GFP_KERNEL); 109 ret_buf = kzalloc(sizeof(struct cifsTconInfo), GFP_KERNEL);
113 if (ret_buf) { 110 if (ret_buf) {
114 write_lock(&GlobalSMBSeslock);
115 atomic_inc(&tconInfoAllocCount); 111 atomic_inc(&tconInfoAllocCount);
116 list_add(&ret_buf->cifsConnectionList,
117 &GlobalTreeConnectionList);
118 ret_buf->tidStatus = CifsNew; 112 ret_buf->tidStatus = CifsNew;
113 ++ret_buf->tc_count;
119 INIT_LIST_HEAD(&ret_buf->openFileList); 114 INIT_LIST_HEAD(&ret_buf->openFileList);
120 init_MUTEX(&ret_buf->tconSem); 115 INIT_LIST_HEAD(&ret_buf->tcon_list);
121#ifdef CONFIG_CIFS_STATS 116#ifdef CONFIG_CIFS_STATS
122 spin_lock_init(&ret_buf->stat_lock); 117 spin_lock_init(&ret_buf->stat_lock);
123#endif 118#endif
124 write_unlock(&GlobalSMBSeslock);
125 } 119 }
126 return ret_buf; 120 return ret_buf;
127} 121}
@@ -133,10 +127,7 @@ tconInfoFree(struct cifsTconInfo *buf_to_free)
133 cFYI(1, ("Null buffer passed to tconInfoFree")); 127 cFYI(1, ("Null buffer passed to tconInfoFree"));
134 return; 128 return;
135 } 129 }
136 write_lock(&GlobalSMBSeslock);
137 atomic_dec(&tconInfoAllocCount); 130 atomic_dec(&tconInfoAllocCount);
138 list_del(&buf_to_free->cifsConnectionList);
139 write_unlock(&GlobalSMBSeslock);
140 kfree(buf_to_free->nativeFileSystem); 131 kfree(buf_to_free->nativeFileSystem);
141 kfree(buf_to_free); 132 kfree(buf_to_free);
142} 133}
@@ -350,9 +341,9 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
350 if (current->fsuid != treeCon->ses->linux_uid) { 341 if (current->fsuid != treeCon->ses->linux_uid) {
351 cFYI(1, ("Multiuser mode and UID " 342 cFYI(1, ("Multiuser mode and UID "
352 "did not match tcon uid")); 343 "did not match tcon uid"));
353 read_lock(&GlobalSMBSeslock); 344 read_lock(&cifs_tcp_ses_lock);
354 list_for_each(temp_item, &GlobalSMBSessionList) { 345 list_for_each(temp_item, &treeCon->ses->server->smb_ses_list) {
355 ses = list_entry(temp_item, struct cifsSesInfo, cifsSessionList); 346 ses = list_entry(temp_item, struct cifsSesInfo, smb_ses_list);
356 if (ses->linux_uid == current->fsuid) { 347 if (ses->linux_uid == current->fsuid) {
357 if (ses->server == treeCon->ses->server) { 348 if (ses->server == treeCon->ses->server) {
358 cFYI(1, ("found matching uid substitute right smb_uid")); 349 cFYI(1, ("found matching uid substitute right smb_uid"));
@@ -364,7 +355,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ ,
364 } 355 }
365 } 356 }
366 } 357 }
367 read_unlock(&GlobalSMBSeslock); 358 read_unlock(&cifs_tcp_ses_lock);
368 } 359 }
369 } 360 }
370 } 361 }
@@ -497,9 +488,10 @@ bool
497is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) 488is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
498{ 489{
499 struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf; 490 struct smb_com_lock_req *pSMB = (struct smb_com_lock_req *)buf;
500 struct list_head *tmp; 491 struct list_head *tmp, *tmp1, *tmp2;
501 struct list_head *tmp1; 492 struct cifsSesInfo *ses;
502 struct cifsTconInfo *tcon; 493 struct cifsTconInfo *tcon;
494 struct cifsInodeInfo *pCifsInode;
503 struct cifsFileInfo *netfile; 495 struct cifsFileInfo *netfile;
504 496
505 cFYI(1, ("Checking for oplock break or dnotify response")); 497 cFYI(1, ("Checking for oplock break or dnotify response"));
@@ -554,42 +546,45 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
554 return false; 546 return false;
555 547
556 /* look up tcon based on tid & uid */ 548 /* look up tcon based on tid & uid */
557 read_lock(&GlobalSMBSeslock); 549 read_lock(&cifs_tcp_ses_lock);
558 list_for_each(tmp, &GlobalTreeConnectionList) { 550 list_for_each(tmp, &srv->smb_ses_list) {
559 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); 551 ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list);
560 if ((tcon->tid == buf->Tid) && (srv == tcon->ses->server)) { 552 list_for_each(tmp1, &ses->tcon_list) {
553 tcon = list_entry(tmp1, struct cifsTconInfo, tcon_list);
554 if (tcon->tid != buf->Tid)
555 continue;
556
561 cifs_stats_inc(&tcon->num_oplock_brks); 557 cifs_stats_inc(&tcon->num_oplock_brks);
562 list_for_each(tmp1, &tcon->openFileList) { 558 write_lock(&GlobalSMBSeslock);
563 netfile = list_entry(tmp1, struct cifsFileInfo, 559 list_for_each(tmp2, &tcon->openFileList) {
560 netfile = list_entry(tmp2, struct cifsFileInfo,
564 tlist); 561 tlist);
565 if (pSMB->Fid == netfile->netfid) { 562 if (pSMB->Fid != netfile->netfid)
566 struct cifsInodeInfo *pCifsInode; 563 continue;
567 read_unlock(&GlobalSMBSeslock); 564
568 cFYI(1, 565 write_unlock(&GlobalSMBSeslock);
569 ("file id match, oplock break")); 566 read_unlock(&cifs_tcp_ses_lock);
570 pCifsInode = 567 cFYI(1, ("file id match, oplock break"));
571 CIFS_I(netfile->pInode); 568 pCifsInode = CIFS_I(netfile->pInode);
572 pCifsInode->clientCanCacheAll = false; 569 pCifsInode->clientCanCacheAll = false;
573 if (pSMB->OplockLevel == 0) 570 if (pSMB->OplockLevel == 0)
574 pCifsInode->clientCanCacheRead 571 pCifsInode->clientCanCacheRead = false;
575 = false; 572 pCifsInode->oplockPending = true;
576 pCifsInode->oplockPending = true; 573 AllocOplockQEntry(netfile->pInode,
577 AllocOplockQEntry(netfile->pInode, 574 netfile->netfid, tcon);
578 netfile->netfid, 575 cFYI(1, ("about to wake up oplock thread"));
579 tcon); 576 if (oplockThread)
580 cFYI(1, 577 wake_up_process(oplockThread);
581 ("about to wake up oplock thread")); 578
582 if (oplockThread) 579 return true;
583 wake_up_process(oplockThread);
584 return true;
585 }
586 } 580 }
587 read_unlock(&GlobalSMBSeslock); 581 write_unlock(&GlobalSMBSeslock);
582 read_unlock(&cifs_tcp_ses_lock);
588 cFYI(1, ("No matching file for oplock break")); 583 cFYI(1, ("No matching file for oplock break"));
589 return true; 584 return true;
590 } 585 }
591 } 586 }
592 read_unlock(&GlobalSMBSeslock); 587 read_unlock(&cifs_tcp_ses_lock);
593 cFYI(1, ("Can not process oplock break for non-existent connection")); 588 cFYI(1, ("Can not process oplock break for non-existent connection"));
594 return true; 589 return true;
595} 590}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 58d57299f2a0..9f51f9bf0292 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -741,11 +741,14 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
741 (index_to_find < first_entry_in_buffer)) { 741 (index_to_find < first_entry_in_buffer)) {
742 /* close and restart search */ 742 /* close and restart search */
743 cFYI(1, ("search backing up - close and restart search")); 743 cFYI(1, ("search backing up - close and restart search"));
744 write_lock(&GlobalSMBSeslock);
744 if (!cifsFile->srch_inf.endOfSearch && 745 if (!cifsFile->srch_inf.endOfSearch &&
745 !cifsFile->invalidHandle) { 746 !cifsFile->invalidHandle) {
746 cifsFile->invalidHandle = true; 747 cifsFile->invalidHandle = true;
748 write_unlock(&GlobalSMBSeslock);
747 CIFSFindClose(xid, pTcon, cifsFile->netfid); 749 CIFSFindClose(xid, pTcon, cifsFile->netfid);
748 } 750 } else
751 write_unlock(&GlobalSMBSeslock);
749 if (cifsFile->srch_inf.ntwrk_buf_start) { 752 if (cifsFile->srch_inf.ntwrk_buf_start) {
750 cFYI(1, ("freeing SMB ff cache buf on search rewind")); 753 cFYI(1, ("freeing SMB ff cache buf on search rewind"));
751 if (cifsFile->srch_inf.smallBuf) 754 if (cifsFile->srch_inf.smallBuf)
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index bf0e6d8e382a..ff8243a8fe3e 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -161,7 +161,7 @@ void DeleteTconOplockQEntries(struct cifsTconInfo *tcon)
161 161
162int 162int
163smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, 163smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
164 unsigned int smb_buf_length, struct sockaddr *sin) 164 unsigned int smb_buf_length, struct sockaddr *sin, bool noblocksnd)
165{ 165{
166 int rc = 0; 166 int rc = 0;
167 int i = 0; 167 int i = 0;
@@ -178,7 +178,10 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
178 smb_msg.msg_namelen = sizeof(struct sockaddr); 178 smb_msg.msg_namelen = sizeof(struct sockaddr);
179 smb_msg.msg_control = NULL; 179 smb_msg.msg_control = NULL;
180 smb_msg.msg_controllen = 0; 180 smb_msg.msg_controllen = 0;
181 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ 181 if (noblocksnd)
182 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
183 else
184 smb_msg.msg_flags = MSG_NOSIGNAL;
182 185
183 /* smb header is converted in header_assemble. bcc and rest of SMB word 186 /* smb header is converted in header_assemble. bcc and rest of SMB word
184 area, and byte area if necessary, is converted to littleendian in 187 area, and byte area if necessary, is converted to littleendian in
@@ -229,8 +232,8 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer,
229} 232}
230 233
231static int 234static int
232smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, 235smb_send2(struct TCP_Server_Info *server, struct kvec *iov, int n_vec,
233 struct sockaddr *sin) 236 struct sockaddr *sin, bool noblocksnd)
234{ 237{
235 int rc = 0; 238 int rc = 0;
236 int i = 0; 239 int i = 0;
@@ -240,6 +243,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
240 unsigned int total_len; 243 unsigned int total_len;
241 int first_vec = 0; 244 int first_vec = 0;
242 unsigned int smb_buf_length = smb_buffer->smb_buf_length; 245 unsigned int smb_buf_length = smb_buffer->smb_buf_length;
246 struct socket *ssocket = server->ssocket;
243 247
244 if (ssocket == NULL) 248 if (ssocket == NULL)
245 return -ENOTSOCK; /* BB eventually add reconnect code here */ 249 return -ENOTSOCK; /* BB eventually add reconnect code here */
@@ -248,7 +252,10 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
248 smb_msg.msg_namelen = sizeof(struct sockaddr); 252 smb_msg.msg_namelen = sizeof(struct sockaddr);
249 smb_msg.msg_control = NULL; 253 smb_msg.msg_control = NULL;
250 smb_msg.msg_controllen = 0; 254 smb_msg.msg_controllen = 0;
251 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ 255 if (noblocksnd)
256 smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL;
257 else
258 smb_msg.msg_flags = MSG_NOSIGNAL;
252 259
253 /* smb header is converted in header_assemble. bcc and rest of SMB word 260 /* smb header is converted in header_assemble. bcc and rest of SMB word
254 area, and byte area if necessary, is converted to littleendian in 261 area, and byte area if necessary, is converted to littleendian in
@@ -283,8 +290,11 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
283 if (rc < 0) 290 if (rc < 0)
284 break; 291 break;
285 292
286 if (rc >= total_len) { 293 if (rc == total_len) {
287 WARN_ON(rc > total_len); 294 total_len = 0;
295 break;
296 } else if (rc > total_len) {
297 cERROR(1, ("sent %d requested %d", rc, total_len));
288 break; 298 break;
289 } 299 }
290 if (rc == 0) { 300 if (rc == 0) {
@@ -312,6 +322,16 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
312 i = 0; /* in case we get ENOSPC on the next send */ 322 i = 0; /* in case we get ENOSPC on the next send */
313 } 323 }
314 324
325 if ((total_len > 0) && (total_len != smb_buf_length + 4)) {
326 cFYI(1, ("partial send (%d remaining), terminating session",
327 total_len));
328 /* If we have only sent part of an SMB then the next SMB
329 could be taken as the remainder of this one. We need
330 to kill the socket so the server throws away the partial
331 SMB */
332 server->tcpStatus = CifsNeedReconnect;
333 }
334
315 if (rc < 0) { 335 if (rc < 0) {
316 cERROR(1, ("Error %d sending data on socket to server", rc)); 336 cERROR(1, ("Error %d sending data on socket to server", rc));
317 } else 337 } else
@@ -518,8 +538,9 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
518#ifdef CONFIG_CIFS_STATS2 538#ifdef CONFIG_CIFS_STATS2
519 atomic_inc(&ses->server->inSend); 539 atomic_inc(&ses->server->inSend);
520#endif 540#endif
521 rc = smb_send2(ses->server->ssocket, iov, n_vec, 541 rc = smb_send2(ses->server, iov, n_vec,
522 (struct sockaddr *) &(ses->server->addr.sockAddr)); 542 (struct sockaddr *) &(ses->server->addr.sockAddr),
543 ses->server->noblocksnd);
523#ifdef CONFIG_CIFS_STATS2 544#ifdef CONFIG_CIFS_STATS2
524 atomic_dec(&ses->server->inSend); 545 atomic_dec(&ses->server->inSend);
525 midQ->when_sent = jiffies; 546 midQ->when_sent = jiffies;
@@ -711,7 +732,8 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
711 atomic_inc(&ses->server->inSend); 732 atomic_inc(&ses->server->inSend);
712#endif 733#endif
713 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, 734 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length,
714 (struct sockaddr *) &(ses->server->addr.sockAddr)); 735 (struct sockaddr *) &(ses->server->addr.sockAddr),
736 ses->server->noblocksnd);
715#ifdef CONFIG_CIFS_STATS2 737#ifdef CONFIG_CIFS_STATS2
716 atomic_dec(&ses->server->inSend); 738 atomic_dec(&ses->server->inSend);
717 midQ->when_sent = jiffies; 739 midQ->when_sent = jiffies;
@@ -851,7 +873,8 @@ send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf,
851 return rc; 873 return rc;
852 } 874 }
853 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, 875 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length,
854 (struct sockaddr *) &(ses->server->addr.sockAddr)); 876 (struct sockaddr *) &(ses->server->addr.sockAddr),
877 ses->server->noblocksnd);
855 up(&ses->server->tcpSem); 878 up(&ses->server->tcpSem);
856 return rc; 879 return rc;
857} 880}
@@ -941,7 +964,8 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
941 atomic_inc(&ses->server->inSend); 964 atomic_inc(&ses->server->inSend);
942#endif 965#endif
943 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, 966 rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length,
944 (struct sockaddr *) &(ses->server->addr.sockAddr)); 967 (struct sockaddr *) &(ses->server->addr.sockAddr),
968 ses->server->noblocksnd);
945#ifdef CONFIG_CIFS_STATS2 969#ifdef CONFIG_CIFS_STATS2
946 atomic_dec(&ses->server->inSend); 970 atomic_dec(&ses->server->inSend);
947 midQ->when_sent = jiffies; 971 midQ->when_sent = jiffies;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index d910501de6d2..8d86b7960f0d 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -812,7 +812,7 @@ int dlm_release_lockspace(void *lockspace, int force)
812 error = release_lockspace(ls, force); 812 error = release_lockspace(ls, force);
813 if (!error) 813 if (!error)
814 ls_count--; 814 ls_count--;
815 else if (!ls_count) 815 if (!ls_count)
816 threads_stop(); 816 threads_stop();
817 mutex_unlock(&ls_lock); 817 mutex_unlock(&ls_lock);
818 818
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index e22bc3961345..0d713b691941 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1037,17 +1037,14 @@ static int
1037decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, 1037decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
1038 struct ecryptfs_crypt_stat *crypt_stat) 1038 struct ecryptfs_crypt_stat *crypt_stat)
1039{ 1039{
1040 struct scatterlist dst_sg; 1040 struct scatterlist dst_sg[2];
1041 struct scatterlist src_sg; 1041 struct scatterlist src_sg[2];
1042 struct mutex *tfm_mutex; 1042 struct mutex *tfm_mutex;
1043 struct blkcipher_desc desc = { 1043 struct blkcipher_desc desc = {
1044 .flags = CRYPTO_TFM_REQ_MAY_SLEEP 1044 .flags = CRYPTO_TFM_REQ_MAY_SLEEP
1045 }; 1045 };
1046 int rc = 0; 1046 int rc = 0;
1047 1047
1048 sg_init_table(&dst_sg, 1);
1049 sg_init_table(&src_sg, 1);
1050
1051 if (unlikely(ecryptfs_verbosity > 0)) { 1048 if (unlikely(ecryptfs_verbosity > 0)) {
1052 ecryptfs_printk( 1049 ecryptfs_printk(
1053 KERN_DEBUG, "Session key encryption key (size [%d]):\n", 1050 KERN_DEBUG, "Session key encryption key (size [%d]):\n",
@@ -1066,8 +1063,8 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
1066 } 1063 }
1067 rc = virt_to_scatterlist(auth_tok->session_key.encrypted_key, 1064 rc = virt_to_scatterlist(auth_tok->session_key.encrypted_key,
1068 auth_tok->session_key.encrypted_key_size, 1065 auth_tok->session_key.encrypted_key_size,
1069 &src_sg, 1); 1066 src_sg, 2);
1070 if (rc != 1) { 1067 if (rc < 1 || rc > 2) {
1071 printk(KERN_ERR "Internal error whilst attempting to convert " 1068 printk(KERN_ERR "Internal error whilst attempting to convert "
1072 "auth_tok->session_key.encrypted_key to scatterlist; " 1069 "auth_tok->session_key.encrypted_key to scatterlist; "
1073 "expected rc = 1; got rc = [%d]. " 1070 "expected rc = 1; got rc = [%d]. "
@@ -1079,8 +1076,8 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
1079 auth_tok->session_key.encrypted_key_size; 1076 auth_tok->session_key.encrypted_key_size;
1080 rc = virt_to_scatterlist(auth_tok->session_key.decrypted_key, 1077 rc = virt_to_scatterlist(auth_tok->session_key.decrypted_key,
1081 auth_tok->session_key.decrypted_key_size, 1078 auth_tok->session_key.decrypted_key_size,
1082 &dst_sg, 1); 1079 dst_sg, 2);
1083 if (rc != 1) { 1080 if (rc < 1 || rc > 2) {
1084 printk(KERN_ERR "Internal error whilst attempting to convert " 1081 printk(KERN_ERR "Internal error whilst attempting to convert "
1085 "auth_tok->session_key.decrypted_key to scatterlist; " 1082 "auth_tok->session_key.decrypted_key to scatterlist; "
1086 "expected rc = 1; got rc = [%d]\n", rc); 1083 "expected rc = 1; got rc = [%d]\n", rc);
@@ -1096,7 +1093,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
1096 rc = -EINVAL; 1093 rc = -EINVAL;
1097 goto out; 1094 goto out;
1098 } 1095 }
1099 rc = crypto_blkcipher_decrypt(&desc, &dst_sg, &src_sg, 1096 rc = crypto_blkcipher_decrypt(&desc, dst_sg, src_sg,
1100 auth_tok->session_key.encrypted_key_size); 1097 auth_tok->session_key.encrypted_key_size);
1101 mutex_unlock(tfm_mutex); 1098 mutex_unlock(tfm_mutex);
1102 if (unlikely(rc)) { 1099 if (unlikely(rc)) {
@@ -1539,8 +1536,8 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
1539 size_t i; 1536 size_t i;
1540 size_t encrypted_session_key_valid = 0; 1537 size_t encrypted_session_key_valid = 0;
1541 char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES]; 1538 char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
1542 struct scatterlist dst_sg; 1539 struct scatterlist dst_sg[2];
1543 struct scatterlist src_sg; 1540 struct scatterlist src_sg[2];
1544 struct mutex *tfm_mutex = NULL; 1541 struct mutex *tfm_mutex = NULL;
1545 u8 cipher_code; 1542 u8 cipher_code;
1546 size_t packet_size_length; 1543 size_t packet_size_length;
@@ -1619,8 +1616,8 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
1619 ecryptfs_dump_hex(session_key_encryption_key, 16); 1616 ecryptfs_dump_hex(session_key_encryption_key, 16);
1620 } 1617 }
1621 rc = virt_to_scatterlist(crypt_stat->key, key_rec->enc_key_size, 1618 rc = virt_to_scatterlist(crypt_stat->key, key_rec->enc_key_size,
1622 &src_sg, 1); 1619 src_sg, 2);
1623 if (rc != 1) { 1620 if (rc < 1 || rc > 2) {
1624 ecryptfs_printk(KERN_ERR, "Error generating scatterlist " 1621 ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
1625 "for crypt_stat session key; expected rc = 1; " 1622 "for crypt_stat session key; expected rc = 1; "
1626 "got rc = [%d]. key_rec->enc_key_size = [%d]\n", 1623 "got rc = [%d]. key_rec->enc_key_size = [%d]\n",
@@ -1629,8 +1626,8 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
1629 goto out; 1626 goto out;
1630 } 1627 }
1631 rc = virt_to_scatterlist(key_rec->enc_key, key_rec->enc_key_size, 1628 rc = virt_to_scatterlist(key_rec->enc_key, key_rec->enc_key_size,
1632 &dst_sg, 1); 1629 dst_sg, 2);
1633 if (rc != 1) { 1630 if (rc < 1 || rc > 2) {
1634 ecryptfs_printk(KERN_ERR, "Error generating scatterlist " 1631 ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
1635 "for crypt_stat encrypted session key; " 1632 "for crypt_stat encrypted session key; "
1636 "expected rc = 1; got rc = [%d]. " 1633 "expected rc = 1; got rc = [%d]. "
@@ -1651,7 +1648,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
1651 rc = 0; 1648 rc = 0;
1652 ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n", 1649 ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes of the key\n",
1653 crypt_stat->key_size); 1650 crypt_stat->key_size);
1654 rc = crypto_blkcipher_encrypt(&desc, &dst_sg, &src_sg, 1651 rc = crypto_blkcipher_encrypt(&desc, dst_sg, src_sg,
1655 (*key_rec).enc_key_size); 1652 (*key_rec).enc_key_size);
1656 mutex_unlock(tfm_mutex); 1653 mutex_unlock(tfm_mutex);
1657 if (rc) { 1654 if (rc) {
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index aec5c13f6341..96355d505347 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -102,6 +102,8 @@
102 102
103#define EP_UNACTIVE_PTR ((void *) -1L) 103#define EP_UNACTIVE_PTR ((void *) -1L)
104 104
105#define EP_ITEM_COST (sizeof(struct epitem) + sizeof(struct eppoll_entry))
106
105struct epoll_filefd { 107struct epoll_filefd {
106 struct file *file; 108 struct file *file;
107 int fd; 109 int fd;
@@ -200,6 +202,9 @@ struct eventpoll {
200 * holding ->lock. 202 * holding ->lock.
201 */ 203 */
202 struct epitem *ovflist; 204 struct epitem *ovflist;
205
206 /* The user that created the eventpoll descriptor */
207 struct user_struct *user;
203}; 208};
204 209
205/* Wait structure used by the poll hooks */ 210/* Wait structure used by the poll hooks */
@@ -227,9 +232,17 @@ struct ep_pqueue {
227}; 232};
228 233
229/* 234/*
235 * Configuration options available inside /proc/sys/fs/epoll/
236 */
237/* Maximum number of epoll devices, per user */
238static int max_user_instances __read_mostly;
239/* Maximum number of epoll watched descriptors, per user */
240static int max_user_watches __read_mostly;
241
242/*
230 * This mutex is used to serialize ep_free() and eventpoll_release_file(). 243 * This mutex is used to serialize ep_free() and eventpoll_release_file().
231 */ 244 */
232static struct mutex epmutex; 245static DEFINE_MUTEX(epmutex);
233 246
234/* Safe wake up implementation */ 247/* Safe wake up implementation */
235static struct poll_safewake psw; 248static struct poll_safewake psw;
@@ -240,6 +253,33 @@ static struct kmem_cache *epi_cache __read_mostly;
240/* Slab cache used to allocate "struct eppoll_entry" */ 253/* Slab cache used to allocate "struct eppoll_entry" */
241static struct kmem_cache *pwq_cache __read_mostly; 254static struct kmem_cache *pwq_cache __read_mostly;
242 255
256#ifdef CONFIG_SYSCTL
257
258#include <linux/sysctl.h>
259
260static int zero;
261
262ctl_table epoll_table[] = {
263 {
264 .procname = "max_user_instances",
265 .data = &max_user_instances,
266 .maxlen = sizeof(int),
267 .mode = 0644,
268 .proc_handler = &proc_dointvec_minmax,
269 .extra1 = &zero,
270 },
271 {
272 .procname = "max_user_watches",
273 .data = &max_user_watches,
274 .maxlen = sizeof(int),
275 .mode = 0644,
276 .proc_handler = &proc_dointvec_minmax,
277 .extra1 = &zero,
278 },
279 { .ctl_name = 0 }
280};
281#endif /* CONFIG_SYSCTL */
282
243 283
244/* Setup the structure that is used as key for the RB tree */ 284/* Setup the structure that is used as key for the RB tree */
245static inline void ep_set_ffd(struct epoll_filefd *ffd, 285static inline void ep_set_ffd(struct epoll_filefd *ffd,
@@ -402,6 +442,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
402 /* At this point it is safe to free the eventpoll item */ 442 /* At this point it is safe to free the eventpoll item */
403 kmem_cache_free(epi_cache, epi); 443 kmem_cache_free(epi_cache, epi);
404 444
445 atomic_dec(&ep->user->epoll_watches);
446
405 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n", 447 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n",
406 current, ep, file)); 448 current, ep, file));
407 449
@@ -449,6 +491,8 @@ static void ep_free(struct eventpoll *ep)
449 491
450 mutex_unlock(&epmutex); 492 mutex_unlock(&epmutex);
451 mutex_destroy(&ep->mtx); 493 mutex_destroy(&ep->mtx);
494 atomic_dec(&ep->user->epoll_devs);
495 free_uid(ep->user);
452 kfree(ep); 496 kfree(ep);
453} 497}
454 498
@@ -532,10 +576,19 @@ void eventpoll_release_file(struct file *file)
532 576
533static int ep_alloc(struct eventpoll **pep) 577static int ep_alloc(struct eventpoll **pep)
534{ 578{
535 struct eventpoll *ep = kzalloc(sizeof(*ep), GFP_KERNEL); 579 int error;
580 struct user_struct *user;
581 struct eventpoll *ep;
536 582
537 if (!ep) 583 user = get_current_user();
538 return -ENOMEM; 584 error = -EMFILE;
585 if (unlikely(atomic_read(&user->epoll_devs) >=
586 max_user_instances))
587 goto free_uid;
588 error = -ENOMEM;
589 ep = kzalloc(sizeof(*ep), GFP_KERNEL);
590 if (unlikely(!ep))
591 goto free_uid;
539 592
540 spin_lock_init(&ep->lock); 593 spin_lock_init(&ep->lock);
541 mutex_init(&ep->mtx); 594 mutex_init(&ep->mtx);
@@ -544,12 +597,17 @@ static int ep_alloc(struct eventpoll **pep)
544 INIT_LIST_HEAD(&ep->rdllist); 597 INIT_LIST_HEAD(&ep->rdllist);
545 ep->rbr = RB_ROOT; 598 ep->rbr = RB_ROOT;
546 ep->ovflist = EP_UNACTIVE_PTR; 599 ep->ovflist = EP_UNACTIVE_PTR;
600 ep->user = user;
547 601
548 *pep = ep; 602 *pep = ep;
549 603
550 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n", 604 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n",
551 current, ep)); 605 current, ep));
552 return 0; 606 return 0;
607
608free_uid:
609 free_uid(user);
610 return error;
553} 611}
554 612
555/* 613/*
@@ -703,9 +761,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
703 struct epitem *epi; 761 struct epitem *epi;
704 struct ep_pqueue epq; 762 struct ep_pqueue epq;
705 763
706 error = -ENOMEM; 764 if (unlikely(atomic_read(&ep->user->epoll_watches) >=
765 max_user_watches))
766 return -ENOSPC;
707 if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL))) 767 if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL)))
708 goto error_return; 768 return -ENOMEM;
709 769
710 /* Item initialization follow here ... */ 770 /* Item initialization follow here ... */
711 INIT_LIST_HEAD(&epi->rdllink); 771 INIT_LIST_HEAD(&epi->rdllink);
@@ -735,6 +795,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
735 * install process. Namely an allocation for a wait queue failed due 795 * install process. Namely an allocation for a wait queue failed due
736 * high memory pressure. 796 * high memory pressure.
737 */ 797 */
798 error = -ENOMEM;
738 if (epi->nwait < 0) 799 if (epi->nwait < 0)
739 goto error_unregister; 800 goto error_unregister;
740 801
@@ -765,6 +826,8 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
765 826
766 spin_unlock_irqrestore(&ep->lock, flags); 827 spin_unlock_irqrestore(&ep->lock, flags);
767 828
829 atomic_inc(&ep->user->epoll_watches);
830
768 /* We have to call this outside the lock */ 831 /* We have to call this outside the lock */
769 if (pwake) 832 if (pwake)
770 ep_poll_safewake(&psw, &ep->poll_wait); 833 ep_poll_safewake(&psw, &ep->poll_wait);
@@ -789,7 +852,7 @@ error_unregister:
789 spin_unlock_irqrestore(&ep->lock, flags); 852 spin_unlock_irqrestore(&ep->lock, flags);
790 853
791 kmem_cache_free(epi_cache, epi); 854 kmem_cache_free(epi_cache, epi);
792error_return: 855
793 return error; 856 return error;
794} 857}
795 858
@@ -1078,6 +1141,7 @@ asmlinkage long sys_epoll_create1(int flags)
1078 flags & O_CLOEXEC); 1141 flags & O_CLOEXEC);
1079 if (fd < 0) 1142 if (fd < 0)
1080 ep_free(ep); 1143 ep_free(ep);
1144 atomic_inc(&ep->user->epoll_devs);
1081 1145
1082error_return: 1146error_return:
1083 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", 1147 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
@@ -1299,7 +1363,12 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
1299 1363
1300static int __init eventpoll_init(void) 1364static int __init eventpoll_init(void)
1301{ 1365{
1302 mutex_init(&epmutex); 1366 struct sysinfo si;
1367
1368 si_meminfo(&si);
1369 max_user_instances = 128;
1370 max_user_watches = (((si.totalram - si.totalhigh) / 32) << PAGE_SHIFT) /
1371 EP_ITEM_COST;
1303 1372
1304 /* Initialize the structure used to perform safe poll wait head wake ups */ 1373 /* Initialize the structure used to perform safe poll wait head wake ups */
1305 ep_poll_safewake_init(&psw); 1374 ep_poll_safewake_init(&psw);
diff --git a/fs/exec.c b/fs/exec.c
index 4e834f16d9da..ec5df9a38313 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1159,6 +1159,7 @@ EXPORT_SYMBOL(remove_arg_zero);
1159 */ 1159 */
1160int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) 1160int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1161{ 1161{
1162 unsigned int depth = bprm->recursion_depth;
1162 int try,retval; 1163 int try,retval;
1163 struct linux_binfmt *fmt; 1164 struct linux_binfmt *fmt;
1164#ifdef __alpha__ 1165#ifdef __alpha__
@@ -1219,8 +1220,15 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1219 continue; 1220 continue;
1220 read_unlock(&binfmt_lock); 1221 read_unlock(&binfmt_lock);
1221 retval = fn(bprm, regs); 1222 retval = fn(bprm, regs);
1223 /*
1224 * Restore the depth counter to its starting value
1225 * in this call, so we don't have to rely on every
1226 * load_binary function to restore it on return.
1227 */
1228 bprm->recursion_depth = depth;
1222 if (retval >= 0) { 1229 if (retval >= 0) {
1223 tracehook_report_exec(fmt, bprm, regs); 1230 if (depth == 0)
1231 tracehook_report_exec(fmt, bprm, regs);
1224 put_binfmt(fmt); 1232 put_binfmt(fmt);
1225 allow_write_access(bprm->file); 1233 allow_write_access(bprm->file);
1226 if (bprm->file) 1234 if (bprm->file)
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 80246bad1b7f..890e01828817 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -367,6 +367,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
367 * Try to get any dentry for the given file handle from the filesystem. 367 * Try to get any dentry for the given file handle from the filesystem.
368 */ 368 */
369 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); 369 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
370 if (!result)
371 result = ERR_PTR(-ESTALE);
370 if (IS_ERR(result)) 372 if (IS_ERR(result))
371 return result; 373 return result;
372 374
@@ -420,6 +422,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
420 422
421 target_dir = nop->fh_to_parent(mnt->mnt_sb, fid, 423 target_dir = nop->fh_to_parent(mnt->mnt_sb, fid,
422 fh_len, fileid_type); 424 fh_len, fileid_type);
425 if (!target_dir)
426 goto err_result;
423 err = PTR_ERR(target_dir); 427 err = PTR_ERR(target_dir);
424 if (IS_ERR(target_dir)) 428 if (IS_ERR(target_dir))
425 goto err_result; 429 goto err_result;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index e5717a4fae67..f6c94f232ec1 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2375,12 +2375,9 @@ int ext3_force_commit(struct super_block *sb)
2375/* 2375/*
2376 * Ext3 always journals updates to the superblock itself, so we don't 2376 * Ext3 always journals updates to the superblock itself, so we don't
2377 * have to propagate any other updates to the superblock on disk at this 2377 * have to propagate any other updates to the superblock on disk at this
2378 * point. Just start an async writeback to get the buffers on their way 2378 * point. (We can probably nuke this function altogether, and remove
2379 * to the disk. 2379 * any mention to sb->s_dirt in all of fs/ext3; eventual cleanup...)
2380 *
2381 * This implicitly triggers the writebehind on sync().
2382 */ 2380 */
2383
2384static void ext3_write_super (struct super_block * sb) 2381static void ext3_write_super (struct super_block * sb)
2385{ 2382{
2386 if (mutex_trylock(&sb->s_lock) != 0) 2383 if (mutex_trylock(&sb->s_lock) != 0)
@@ -2390,13 +2387,12 @@ static void ext3_write_super (struct super_block * sb)
2390 2387
2391static int ext3_sync_fs(struct super_block *sb, int wait) 2388static int ext3_sync_fs(struct super_block *sb, int wait)
2392{ 2389{
2393 tid_t target;
2394
2395 sb->s_dirt = 0; 2390 sb->s_dirt = 0;
2396 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { 2391 if (wait)
2397 if (wait) 2392 ext3_force_commit(sb);
2398 log_wait_commit(EXT3_SB(sb)->s_journal, target); 2393 else
2399 } 2394 journal_start_commit(EXT3_SB(sb)->s_journal, NULL);
2395
2400 return 0; 2396 return 0;
2401} 2397}
2402 2398
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d2003cdc36aa..db35cfdb3c8b 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -609,8 +609,8 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
609 609
610 if (free_blocks - (nblocks + root_blocks + dirty_blocks) < 610 if (free_blocks - (nblocks + root_blocks + dirty_blocks) <
611 EXT4_FREEBLOCKS_WATERMARK) { 611 EXT4_FREEBLOCKS_WATERMARK) {
612 free_blocks = percpu_counter_sum(fbc); 612 free_blocks = percpu_counter_sum_positive(fbc);
613 dirty_blocks = percpu_counter_sum(dbc); 613 dirty_blocks = percpu_counter_sum_positive(dbc);
614 if (dirty_blocks < 0) { 614 if (dirty_blocks < 0) {
615 printk(KERN_CRIT "Dirty block accounting " 615 printk(KERN_CRIT "Dirty block accounting "
616 "went wrong %lld\n", 616 "went wrong %lld\n",
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index fe34d74cfb19..2a117e286e54 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -718,6 +718,8 @@ got:
718 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); 718 gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
719 free = ext4_free_blocks_after_init(sb, group, gdp); 719 free = ext4_free_blocks_after_init(sb, group, gdp);
720 gdp->bg_free_blocks_count = cpu_to_le16(free); 720 gdp->bg_free_blocks_count = cpu_to_le16(free);
721 gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
722 gdp);
721 } 723 }
722 spin_unlock(sb_bgl_lock(sbi, group)); 724 spin_unlock(sb_bgl_lock(sbi, group));
723 725
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8dbf6953845b..be21a5ae33cb 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2329,6 +2329,8 @@ static int ext4_da_writepage(struct page *page,
2329 unlock_page(page); 2329 unlock_page(page);
2330 return 0; 2330 return 0;
2331 } 2331 }
2332 /* now mark the buffer_heads as dirty and uptodate */
2333 block_commit_write(page, 0, PAGE_CACHE_SIZE);
2332 } 2334 }
2333 2335
2334 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2336 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
@@ -4580,9 +4582,10 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
4580static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) 4582static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
4581{ 4583{
4582 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) 4584 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
4583 return ext4_indirect_trans_blocks(inode, nrblocks, 0); 4585 return ext4_indirect_trans_blocks(inode, nrblocks, chunk);
4584 return ext4_ext_index_trans_blocks(inode, nrblocks, 0); 4586 return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
4585} 4587}
4588
4586/* 4589/*
4587 * Account for index blocks, block groups bitmaps and block group 4590 * Account for index blocks, block groups bitmaps and block group
4588 * descriptor blocks if modify datablocks and index blocks 4591 * descriptor blocks if modify datablocks and index blocks
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index dfe17a134052..444ad998f72e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4441,6 +4441,7 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4441 else if (block >= (entry->start_blk + entry->count)) 4441 else if (block >= (entry->start_blk + entry->count))
4442 n = &(*n)->rb_right; 4442 n = &(*n)->rb_right;
4443 else { 4443 else {
4444 ext4_unlock_group(sb, group);
4444 ext4_error(sb, __func__, 4445 ext4_error(sb, __func__,
4445 "Double free of blocks %d (%d %d)\n", 4446 "Double free of blocks %d (%d %d)\n",
4446 block, entry->start_blk, entry->count); 4447 block, entry->start_blk, entry->count);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 994859df010e..e4a241c65dbe 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1458,9 +1458,8 @@ static int ext4_fill_flex_info(struct super_block *sb)
1458 1458
1459 /* We allocate both existing and potentially added groups */ 1459 /* We allocate both existing and potentially added groups */
1460 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1460 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1461 ((sbi->s_es->s_reserved_gdt_blocks +1 ) << 1461 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1462 EXT4_DESC_PER_BLOCK_BITS(sb))) / 1462 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1463 groups_per_flex;
1464 sbi->s_flex_groups = kzalloc(flex_group_count * 1463 sbi->s_flex_groups = kzalloc(flex_group_count *
1465 sizeof(struct flex_groups), GFP_KERNEL); 1464 sizeof(struct flex_groups), GFP_KERNEL);
1466 if (sbi->s_flex_groups == NULL) { 1465 if (sbi->s_flex_groups == NULL) {
@@ -2885,12 +2884,9 @@ int ext4_force_commit(struct super_block *sb)
2885/* 2884/*
2886 * Ext4 always journals updates to the superblock itself, so we don't 2885 * Ext4 always journals updates to the superblock itself, so we don't
2887 * have to propagate any other updates to the superblock on disk at this 2886 * have to propagate any other updates to the superblock on disk at this
2888 * point. Just start an async writeback to get the buffers on their way 2887 * point. (We can probably nuke this function altogether, and remove
2889 * to the disk. 2888 * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...)
2890 *
2891 * This implicitly triggers the writebehind on sync().
2892 */ 2889 */
2893
2894static void ext4_write_super(struct super_block *sb) 2890static void ext4_write_super(struct super_block *sb)
2895{ 2891{
2896 if (mutex_trylock(&sb->s_lock) != 0) 2892 if (mutex_trylock(&sb->s_lock) != 0)
@@ -2900,15 +2896,15 @@ static void ext4_write_super(struct super_block *sb)
2900 2896
2901static int ext4_sync_fs(struct super_block *sb, int wait) 2897static int ext4_sync_fs(struct super_block *sb, int wait)
2902{ 2898{
2903 tid_t target; 2899 int ret = 0;
2904 2900
2905 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); 2901 trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
2906 sb->s_dirt = 0; 2902 sb->s_dirt = 0;
2907 if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { 2903 if (wait)
2908 if (wait) 2904 ret = ext4_force_commit(sb);
2909 jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); 2905 else
2910 } 2906 jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, NULL);
2911 return 0; 2907 return ret;
2912} 2908}
2913 2909
2914/* 2910/*
diff --git a/fs/fat/Makefile b/fs/fat/Makefile
index bfb5f06cf2c8..e06190322c1c 100644
--- a/fs/fat/Makefile
+++ b/fs/fat/Makefile
@@ -3,5 +3,9 @@
3# 3#
4 4
5obj-$(CONFIG_FAT_FS) += fat.o 5obj-$(CONFIG_FAT_FS) += fat.o
6obj-$(CONFIG_VFAT_FS) += vfat.o
7obj-$(CONFIG_MSDOS_FS) += msdos.o
6 8
7fat-objs := cache.o dir.o fatent.o file.o inode.o misc.o 9fat-y := cache.o dir.o fatent.o file.o inode.o misc.o
10vfat-y := namei_vfat.o
11msdos-y := namei_msdos.o
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 3222f51c41cf..b42602298087 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -9,8 +9,8 @@
9 */ 9 */
10 10
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/msdos_fs.h>
13#include <linux/buffer_head.h> 12#include <linux/buffer_head.h>
13#include "fat.h"
14 14
15/* this must be > 0. */ 15/* this must be > 0. */
16#define FAT_MAX_CACHE 8 16#define FAT_MAX_CACHE 8
@@ -293,10 +293,12 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
293} 293}
294 294
295int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, 295int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
296 unsigned long *mapped_blocks) 296 unsigned long *mapped_blocks, int create)
297{ 297{
298 struct super_block *sb = inode->i_sb; 298 struct super_block *sb = inode->i_sb;
299 struct msdos_sb_info *sbi = MSDOS_SB(sb); 299 struct msdos_sb_info *sbi = MSDOS_SB(sb);
300 const unsigned long blocksize = sb->s_blocksize;
301 const unsigned char blocksize_bits = sb->s_blocksize_bits;
300 sector_t last_block; 302 sector_t last_block;
301 int cluster, offset; 303 int cluster, offset;
302 304
@@ -309,10 +311,21 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
309 } 311 }
310 return 0; 312 return 0;
311 } 313 }
312 last_block = (MSDOS_I(inode)->mmu_private + (sb->s_blocksize - 1)) 314
313 >> sb->s_blocksize_bits; 315 last_block = (i_size_read(inode) + (blocksize - 1)) >> blocksize_bits;
314 if (sector >= last_block) 316 if (sector >= last_block) {
315 return 0; 317 if (!create)
318 return 0;
319
320 /*
321 * ->mmu_private can access on only allocation path.
322 * (caller must hold ->i_mutex)
323 */
324 last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1))
325 >> blocksize_bits;
326 if (sector >= last_block)
327 return 0;
328 }
316 329
317 cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits); 330 cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits);
318 offset = sector & (sbi->sec_per_clus - 1); 331 offset = sector & (sbi->sec_per_clus - 1);
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index bae1c3292522..67e058357098 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -16,11 +16,11 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/time.h> 18#include <linux/time.h>
19#include <linux/msdos_fs.h>
20#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
21#include <linux/buffer_head.h> 20#include <linux/buffer_head.h>
22#include <linux/compat.h> 21#include <linux/compat.h>
23#include <asm/uaccess.h> 22#include <asm/uaccess.h>
23#include "fat.h"
24 24
25static inline loff_t fat_make_i_pos(struct super_block *sb, 25static inline loff_t fat_make_i_pos(struct super_block *sb,
26 struct buffer_head *bh, 26 struct buffer_head *bh,
@@ -77,7 +77,7 @@ next:
77 77
78 *bh = NULL; 78 *bh = NULL;
79 iblock = *pos >> sb->s_blocksize_bits; 79 iblock = *pos >> sb->s_blocksize_bits;
80 err = fat_bmap(dir, iblock, &phys, &mapped_blocks); 80 err = fat_bmap(dir, iblock, &phys, &mapped_blocks, 0);
81 if (err || !phys) 81 if (err || !phys)
82 return -1; /* beyond EOF or error */ 82 return -1; /* beyond EOF or error */
83 83
@@ -86,7 +86,7 @@ next:
86 *bh = sb_bread(sb, phys); 86 *bh = sb_bread(sb, phys);
87 if (*bh == NULL) { 87 if (*bh == NULL) {
88 printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n", 88 printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n",
89 (unsigned long long)phys); 89 (llu)phys);
90 /* skip this block */ 90 /* skip this block */
91 *pos = (iblock + 1) << sb->s_blocksize_bits; 91 *pos = (iblock + 1) << sb->s_blocksize_bits;
92 goto next; 92 goto next;
@@ -373,9 +373,10 @@ parse_record:
373 if (de->attr == ATTR_EXT) { 373 if (de->attr == ATTR_EXT) {
374 int status = fat_parse_long(inode, &cpos, &bh, &de, 374 int status = fat_parse_long(inode, &cpos, &bh, &de,
375 &unicode, &nr_slots); 375 &unicode, &nr_slots);
376 if (status < 0) 376 if (status < 0) {
377 return status; 377 err = status;
378 else if (status == PARSE_INVALID) 378 goto end_of_dir;
379 } else if (status == PARSE_INVALID)
379 continue; 380 continue;
380 else if (status == PARSE_NOT_LONGNAME) 381 else if (status == PARSE_NOT_LONGNAME)
381 goto parse_record; 382 goto parse_record;
@@ -832,6 +833,7 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd,
832#endif /* CONFIG_COMPAT */ 833#endif /* CONFIG_COMPAT */
833 834
834const struct file_operations fat_dir_operations = { 835const struct file_operations fat_dir_operations = {
836 .llseek = generic_file_llseek,
835 .read = generic_read_dir, 837 .read = generic_read_dir,
836 .readdir = fat_readdir, 838 .readdir = fat_readdir,
837 .ioctl = fat_dir_ioctl, 839 .ioctl = fat_dir_ioctl,
@@ -1089,6 +1091,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
1089 struct msdos_dir_entry *de; 1091 struct msdos_dir_entry *de;
1090 sector_t blknr; 1092 sector_t blknr;
1091 __le16 date, time; 1093 __le16 date, time;
1094 u8 time_cs;
1092 int err, cluster; 1095 int err, cluster;
1093 1096
1094 err = fat_alloc_clusters(dir, &cluster, 1); 1097 err = fat_alloc_clusters(dir, &cluster, 1);
@@ -1102,7 +1105,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
1102 goto error_free; 1105 goto error_free;
1103 } 1106 }
1104 1107
1105 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); 1108 fat_time_unix2fat(sbi, ts, &time, &date, &time_cs);
1106 1109
1107 de = (struct msdos_dir_entry *)bhs[0]->b_data; 1110 de = (struct msdos_dir_entry *)bhs[0]->b_data;
1108 /* filling the new directory slots ("." and ".." entries) */ 1111 /* filling the new directory slots ("." and ".." entries) */
@@ -1112,13 +1115,14 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
1112 de[0].lcase = de[1].lcase = 0; 1115 de[0].lcase = de[1].lcase = 0;
1113 de[0].time = de[1].time = time; 1116 de[0].time = de[1].time = time;
1114 de[0].date = de[1].date = date; 1117 de[0].date = de[1].date = date;
1115 de[0].ctime_cs = de[1].ctime_cs = 0;
1116 if (sbi->options.isvfat) { 1118 if (sbi->options.isvfat) {
1117 /* extra timestamps */ 1119 /* extra timestamps */
1118 de[0].ctime = de[1].ctime = time; 1120 de[0].ctime = de[1].ctime = time;
1121 de[0].ctime_cs = de[1].ctime_cs = time_cs;
1119 de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = date; 1122 de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = date;
1120 } else { 1123 } else {
1121 de[0].ctime = de[1].ctime = 0; 1124 de[0].ctime = de[1].ctime = 0;
1125 de[0].ctime_cs = de[1].ctime_cs = 0;
1122 de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = 0; 1126 de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = 0;
1123 } 1127 }
1124 de[0].start = cpu_to_le16(cluster); 1128 de[0].start = cpu_to_le16(cluster);
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
new file mode 100644
index 000000000000..ea440d65819c
--- /dev/null
+++ b/fs/fat/fat.h
@@ -0,0 +1,329 @@
1#ifndef _FAT_H
2#define _FAT_H
3
4#include <linux/buffer_head.h>
5#include <linux/string.h>
6#include <linux/nls.h>
7#include <linux/fs.h>
8#include <linux/mutex.h>
9#include <linux/msdos_fs.h>
10
11/*
12 * vfat shortname flags
13 */
14#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */
15#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */
16#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */
17#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */
18#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */
19
20struct fat_mount_options {
21 uid_t fs_uid;
22 gid_t fs_gid;
23 unsigned short fs_fmask;
24 unsigned short fs_dmask;
25 unsigned short codepage; /* Codepage for shortname conversions */
26 char *iocharset; /* Charset used for filename input/display */
27 unsigned short shortname; /* flags for shortname display/create rule */
28 unsigned char name_check; /* r = relaxed, n = normal, s = strict */
29 unsigned short allow_utime;/* permission for setting the [am]time */
30 unsigned quiet:1, /* set = fake successful chmods and chowns */
31 showexec:1, /* set = only set x bit for com/exe/bat */
32 sys_immutable:1, /* set = system files are immutable */
33 dotsOK:1, /* set = hidden and system files are named '.filename' */
34 isvfat:1, /* 0=no vfat long filename support, 1=vfat support */
35 utf8:1, /* Use of UTF-8 character set (Default) */
36 unicode_xlate:1, /* create escape sequences for unhandled Unicode */
37 numtail:1, /* Does first alias have a numeric '~1' type tail? */
38 flush:1, /* write things quickly */
39 nocase:1, /* Does this need case conversion? 0=need case conversion*/
40 usefree:1, /* Use free_clusters for FAT32 */
41 tz_utc:1, /* Filesystem timestamps are in UTC */
42 rodir:1; /* allow ATTR_RO for directory */
43};
44
45#define FAT_HASH_BITS 8
46#define FAT_HASH_SIZE (1UL << FAT_HASH_BITS)
47
48/*
49 * MS-DOS file system in-core superblock data
50 */
51struct msdos_sb_info {
52 unsigned short sec_per_clus; /* sectors/cluster */
53 unsigned short cluster_bits; /* log2(cluster_size) */
54 unsigned int cluster_size; /* cluster size */
55 unsigned char fats,fat_bits; /* number of FATs, FAT bits (12 or 16) */
56 unsigned short fat_start;
57 unsigned long fat_length; /* FAT start & length (sec.) */
58 unsigned long dir_start;
59 unsigned short dir_entries; /* root dir start & entries */
60 unsigned long data_start; /* first data sector */
61 unsigned long max_cluster; /* maximum cluster number */
62 unsigned long root_cluster; /* first cluster of the root directory */
63 unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */
64 struct mutex fat_lock;
65 unsigned int prev_free; /* previously allocated cluster number */
66 unsigned int free_clusters; /* -1 if undefined */
67 unsigned int free_clus_valid; /* is free_clusters valid? */
68 struct fat_mount_options options;
69 struct nls_table *nls_disk; /* Codepage used on disk */
70 struct nls_table *nls_io; /* Charset used for input and display */
71 const void *dir_ops; /* Opaque; default directory operations */
72 int dir_per_block; /* dir entries per block */
73 int dir_per_block_bits; /* log2(dir_per_block) */
74
75 int fatent_shift;
76 struct fatent_operations *fatent_ops;
77
78 spinlock_t inode_hash_lock;
79 struct hlist_head inode_hashtable[FAT_HASH_SIZE];
80};
81
82#define FAT_CACHE_VALID 0 /* special case for valid cache */
83
84/*
85 * MS-DOS file system inode data in memory
86 */
87struct msdos_inode_info {
88 spinlock_t cache_lru_lock;
89 struct list_head cache_lru;
90 int nr_caches;
91 /* for avoiding the race between fat_free() and fat_get_cluster() */
92 unsigned int cache_valid_id;
93
94 /* NOTE: mmu_private is 64bits, so must hold ->i_mutex to access */
95 loff_t mmu_private; /* physically allocated size */
96
97 int i_start; /* first cluster or 0 */
98 int i_logstart; /* logical first cluster */
99 int i_attrs; /* unused attribute bits */
100 loff_t i_pos; /* on-disk position of directory entry or 0 */
101 struct hlist_node i_fat_hash; /* hash by i_location */
102 struct inode vfs_inode;
103};
104
105struct fat_slot_info {
106 loff_t i_pos; /* on-disk position of directory entry */
107 loff_t slot_off; /* offset for slot or de start */
108 int nr_slots; /* number of slots + 1(de) in filename */
109 struct msdos_dir_entry *de;
110 struct buffer_head *bh;
111};
112
113static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb)
114{
115 return sb->s_fs_info;
116}
117
118static inline struct msdos_inode_info *MSDOS_I(struct inode *inode)
119{
120 return container_of(inode, struct msdos_inode_info, vfs_inode);
121}
122
123/*
124 * If ->i_mode can't hold S_IWUGO (i.e. ATTR_RO), we use ->i_attrs to
125 * save ATTR_RO instead of ->i_mode.
126 *
127 * If it's directory and !sbi->options.rodir, ATTR_RO isn't read-only
128 * bit, it's just used as flag for app.
129 */
130static inline int fat_mode_can_hold_ro(struct inode *inode)
131{
132 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
133 mode_t mask;
134
135 if (S_ISDIR(inode->i_mode)) {
136 if (!sbi->options.rodir)
137 return 0;
138 mask = ~sbi->options.fs_dmask;
139 } else
140 mask = ~sbi->options.fs_fmask;
141
142 if (!(mask & S_IWUGO))
143 return 0;
144 return 1;
145}
146
147/* Convert attribute bits and a mask to the UNIX mode. */
148static inline mode_t fat_make_mode(struct msdos_sb_info *sbi,
149 u8 attrs, mode_t mode)
150{
151 if (attrs & ATTR_RO && !((attrs & ATTR_DIR) && !sbi->options.rodir))
152 mode &= ~S_IWUGO;
153
154 if (attrs & ATTR_DIR)
155 return (mode & ~sbi->options.fs_dmask) | S_IFDIR;
156 else
157 return (mode & ~sbi->options.fs_fmask) | S_IFREG;
158}
159
160/* Return the FAT attribute byte for this inode */
161static inline u8 fat_make_attrs(struct inode *inode)
162{
163 u8 attrs = MSDOS_I(inode)->i_attrs;
164 if (S_ISDIR(inode->i_mode))
165 attrs |= ATTR_DIR;
166 if (fat_mode_can_hold_ro(inode) && !(inode->i_mode & S_IWUGO))
167 attrs |= ATTR_RO;
168 return attrs;
169}
170
171static inline void fat_save_attrs(struct inode *inode, u8 attrs)
172{
173 if (fat_mode_can_hold_ro(inode))
174 MSDOS_I(inode)->i_attrs = attrs & ATTR_UNUSED;
175 else
176 MSDOS_I(inode)->i_attrs = attrs & (ATTR_UNUSED | ATTR_RO);
177}
178
179static inline unsigned char fat_checksum(const __u8 *name)
180{
181 unsigned char s = name[0];
182 s = (s<<7) + (s>>1) + name[1]; s = (s<<7) + (s>>1) + name[2];
183 s = (s<<7) + (s>>1) + name[3]; s = (s<<7) + (s>>1) + name[4];
184 s = (s<<7) + (s>>1) + name[5]; s = (s<<7) + (s>>1) + name[6];
185 s = (s<<7) + (s>>1) + name[7]; s = (s<<7) + (s>>1) + name[8];
186 s = (s<<7) + (s>>1) + name[9]; s = (s<<7) + (s>>1) + name[10];
187 return s;
188}
189
190static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus)
191{
192 return ((sector_t)clus - FAT_START_ENT) * sbi->sec_per_clus
193 + sbi->data_start;
194}
195
196static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len)
197{
198#ifdef __BIG_ENDIAN
199 while (len--) {
200 *dst++ = src[0] | (src[1] << 8);
201 src += 2;
202 }
203#else
204 memcpy(dst, src, len * 2);
205#endif
206}
207
208static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len)
209{
210#ifdef __BIG_ENDIAN
211 while (len--) {
212 dst[0] = *src & 0x00FF;
213 dst[1] = (*src & 0xFF00) >> 8;
214 dst += 2;
215 src++;
216 }
217#else
218 memcpy(dst, src, len * 2);
219#endif
220}
221
222/* fat/cache.c */
223extern void fat_cache_inval_inode(struct inode *inode);
224extern int fat_get_cluster(struct inode *inode, int cluster,
225 int *fclus, int *dclus);
226extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
227 unsigned long *mapped_blocks, int create);
228
229/* fat/dir.c */
230extern const struct file_operations fat_dir_operations;
231extern int fat_search_long(struct inode *inode, const unsigned char *name,
232 int name_len, struct fat_slot_info *sinfo);
233extern int fat_dir_empty(struct inode *dir);
234extern int fat_subdirs(struct inode *dir);
235extern int fat_scan(struct inode *dir, const unsigned char *name,
236 struct fat_slot_info *sinfo);
237extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh,
238 struct msdos_dir_entry **de, loff_t *i_pos);
239extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts);
240extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
241 struct fat_slot_info *sinfo);
242extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo);
243
244/* fat/fatent.c */
245struct fat_entry {
246 int entry;
247 union {
248 u8 *ent12_p[2];
249 __le16 *ent16_p;
250 __le32 *ent32_p;
251 } u;
252 int nr_bhs;
253 struct buffer_head *bhs[2];
254};
255
256static inline void fatent_init(struct fat_entry *fatent)
257{
258 fatent->nr_bhs = 0;
259 fatent->entry = 0;
260 fatent->u.ent32_p = NULL;
261 fatent->bhs[0] = fatent->bhs[1] = NULL;
262}
263
264static inline void fatent_set_entry(struct fat_entry *fatent, int entry)
265{
266 fatent->entry = entry;
267 fatent->u.ent32_p = NULL;
268}
269
270static inline void fatent_brelse(struct fat_entry *fatent)
271{
272 int i;
273 fatent->u.ent32_p = NULL;
274 for (i = 0; i < fatent->nr_bhs; i++)
275 brelse(fatent->bhs[i]);
276 fatent->nr_bhs = 0;
277 fatent->bhs[0] = fatent->bhs[1] = NULL;
278}
279
280extern void fat_ent_access_init(struct super_block *sb);
281extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent,
282 int entry);
283extern int fat_ent_write(struct inode *inode, struct fat_entry *fatent,
284 int new, int wait);
285extern int fat_alloc_clusters(struct inode *inode, int *cluster,
286 int nr_cluster);
287extern int fat_free_clusters(struct inode *inode, int cluster);
288extern int fat_count_free_clusters(struct super_block *sb);
289
290/* fat/file.c */
291extern int fat_generic_ioctl(struct inode *inode, struct file *filp,
292 unsigned int cmd, unsigned long arg);
293extern const struct file_operations fat_file_operations;
294extern const struct inode_operations fat_file_inode_operations;
295extern int fat_setattr(struct dentry * dentry, struct iattr * attr);
296extern void fat_truncate(struct inode *inode);
297extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
298 struct kstat *stat);
299
300/* fat/inode.c */
301extern void fat_attach(struct inode *inode, loff_t i_pos);
302extern void fat_detach(struct inode *inode);
303extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos);
304extern struct inode *fat_build_inode(struct super_block *sb,
305 struct msdos_dir_entry *de, loff_t i_pos);
306extern int fat_sync_inode(struct inode *inode);
307extern int fat_fill_super(struct super_block *sb, void *data, int silent,
308 const struct inode_operations *fs_dir_inode_ops, int isvfat);
309
310extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
311 struct inode *i2);
312/* fat/misc.c */
313extern void fat_fs_panic(struct super_block *s, const char *fmt, ...)
314 __attribute__ ((format (printf, 2, 3))) __cold;
315extern void fat_clusters_flush(struct super_block *sb);
316extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
317extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
318 __le16 __time, __le16 __date, u8 time_cs);
319extern void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts,
320 __le16 *time, __le16 *date, u8 *time_cs);
321extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
322
323int fat_cache_init(void);
324void fat_cache_destroy(void);
325
326/* helper for printk */
327typedef unsigned long long llu;
328
329#endif /* !_FAT_H */
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index fb98b3d847ed..da6eea47872f 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -7,6 +7,7 @@
7#include <linux/fs.h> 7#include <linux/fs.h>
8#include <linux/msdos_fs.h> 8#include <linux/msdos_fs.h>
9#include <linux/blkdev.h> 9#include <linux/blkdev.h>
10#include "fat.h"
10 11
11struct fatent_operations { 12struct fatent_operations {
12 void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); 13 void (*ent_blocknr)(struct super_block *, int, int *, sector_t *);
@@ -92,8 +93,7 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent,
92err_brelse: 93err_brelse:
93 brelse(bhs[0]); 94 brelse(bhs[0]);
94err: 95err:
95 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", 96 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", (llu)blocknr);
96 (unsigned long long)blocknr);
97 return -EIO; 97 return -EIO;
98} 98}
99 99
@@ -106,7 +106,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent,
106 fatent->bhs[0] = sb_bread(sb, blocknr); 106 fatent->bhs[0] = sb_bread(sb, blocknr);
107 if (!fatent->bhs[0]) { 107 if (!fatent->bhs[0]) {
108 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", 108 printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n",
109 (unsigned long long)blocknr); 109 (llu)blocknr);
110 return -EIO; 110 return -EIO;
111 } 111 }
112 fatent->nr_bhs = 1; 112 fatent->nr_bhs = 1;
@@ -316,10 +316,20 @@ static inline int fat_ent_update_ptr(struct super_block *sb,
316 /* Is this fatent's blocks including this entry? */ 316 /* Is this fatent's blocks including this entry? */
317 if (!fatent->nr_bhs || bhs[0]->b_blocknr != blocknr) 317 if (!fatent->nr_bhs || bhs[0]->b_blocknr != blocknr)
318 return 0; 318 return 0;
319 /* Does this entry need the next block? */ 319 if (sbi->fat_bits == 12) {
320 if (sbi->fat_bits == 12 && (offset + 1) >= sb->s_blocksize) { 320 if ((offset + 1) < sb->s_blocksize) {
321 if (fatent->nr_bhs != 2 || bhs[1]->b_blocknr != (blocknr + 1)) 321 /* This entry is on bhs[0]. */
322 return 0; 322 if (fatent->nr_bhs == 2) {
323 brelse(bhs[1]);
324 fatent->nr_bhs = 1;
325 }
326 } else {
327 /* This entry needs the next block. */
328 if (fatent->nr_bhs != 2)
329 return 0;
330 if (bhs[1]->b_blocknr != (blocknr + 1))
331 return 0;
332 }
323 } 333 }
324 ops->ent_set_ptr(fatent, offset); 334 ops->ent_set_ptr(fatent, offset);
325 return 1; 335 return 1;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index ddde37025ca6..f06a4e525ece 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -10,13 +10,13 @@
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/mount.h> 11#include <linux/mount.h>
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/msdos_fs.h>
14#include <linux/buffer_head.h> 13#include <linux/buffer_head.h>
15#include <linux/writeback.h> 14#include <linux/writeback.h>
16#include <linux/backing-dev.h> 15#include <linux/backing-dev.h>
17#include <linux/blkdev.h> 16#include <linux/blkdev.h>
18#include <linux/fsnotify.h> 17#include <linux/fsnotify.h>
19#include <linux/security.h> 18#include <linux/security.h>
19#include "fat.h"
20 20
21int fat_generic_ioctl(struct inode *inode, struct file *filp, 21int fat_generic_ioctl(struct inode *inode, struct file *filp,
22 unsigned int cmd, unsigned long arg) 22 unsigned int cmd, unsigned long arg)
@@ -29,10 +29,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
29 { 29 {
30 u32 attr; 30 u32 attr;
31 31
32 if (inode->i_ino == MSDOS_ROOT_INO) 32 mutex_lock(&inode->i_mutex);
33 attr = ATTR_DIR; 33 attr = fat_make_attrs(inode);
34 else 34 mutex_unlock(&inode->i_mutex);
35 attr = fat_attr(inode);
36 35
37 return put_user(attr, user_attr); 36 return put_user(attr, user_attr);
38 } 37 }
@@ -62,20 +61,16 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
62 /* Merge in ATTR_VOLUME and ATTR_DIR */ 61 /* Merge in ATTR_VOLUME and ATTR_DIR */
63 attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) | 62 attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) |
64 (is_dir ? ATTR_DIR : 0); 63 (is_dir ? ATTR_DIR : 0);
65 oldattr = fat_attr(inode); 64 oldattr = fat_make_attrs(inode);
66 65
67 /* Equivalent to a chmod() */ 66 /* Equivalent to a chmod() */
68 ia.ia_valid = ATTR_MODE | ATTR_CTIME; 67 ia.ia_valid = ATTR_MODE | ATTR_CTIME;
69 ia.ia_ctime = current_fs_time(inode->i_sb); 68 ia.ia_ctime = current_fs_time(inode->i_sb);
70 if (is_dir) { 69 if (is_dir)
71 ia.ia_mode = MSDOS_MKMODE(attr, 70 ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO);
72 S_IRWXUGO & ~sbi->options.fs_dmask) 71 else {
73 | S_IFDIR; 72 ia.ia_mode = fat_make_mode(sbi, attr,
74 } else { 73 S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO));
75 ia.ia_mode = MSDOS_MKMODE(attr,
76 (S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO))
77 & ~sbi->options.fs_fmask)
78 | S_IFREG;
79 } 74 }
80 75
81 /* The root directory has no attributes */ 76 /* The root directory has no attributes */
@@ -115,7 +110,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
115 inode->i_flags &= S_IMMUTABLE; 110 inode->i_flags &= S_IMMUTABLE;
116 } 111 }
117 112
118 MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; 113 fat_save_attrs(inode, attr);
119 mark_inode_dirty(inode); 114 mark_inode_dirty(inode);
120up: 115up:
121 mnt_drop_write(filp->f_path.mnt); 116 mnt_drop_write(filp->f_path.mnt);
@@ -274,7 +269,7 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi,
274 269
275 /* 270 /*
276 * Note, the basic check is already done by a caller of 271 * Note, the basic check is already done by a caller of
277 * (attr->ia_mode & ~MSDOS_VALID_MODE) 272 * (attr->ia_mode & ~FAT_VALID_MODE)
278 */ 273 */
279 274
280 if (S_ISREG(inode->i_mode)) 275 if (S_ISREG(inode->i_mode))
@@ -287,11 +282,18 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi,
287 /* 282 /*
288 * Of the r and x bits, all (subject to umask) must be present. Of the 283 * Of the r and x bits, all (subject to umask) must be present. Of the
289 * w bits, either all (subject to umask) or none must be present. 284 * w bits, either all (subject to umask) or none must be present.
285 *
286 * If fat_mode_can_hold_ro(inode) is false, can't change w bits.
290 */ 287 */
291 if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO))) 288 if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO)))
292 return -EPERM; 289 return -EPERM;
293 if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) 290 if (fat_mode_can_hold_ro(inode)) {
294 return -EPERM; 291 if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask)))
292 return -EPERM;
293 } else {
294 if ((perm & S_IWUGO) != (S_IWUGO & ~mask))
295 return -EPERM;
296 }
295 297
296 *mode_ptr &= S_IFMT | perm; 298 *mode_ptr &= S_IFMT | perm;
297 299
@@ -314,13 +316,15 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
314} 316}
315 317
316#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) 318#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
319/* valid file mode bits */
320#define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO)
317 321
318int fat_setattr(struct dentry *dentry, struct iattr *attr) 322int fat_setattr(struct dentry *dentry, struct iattr *attr)
319{ 323{
320 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); 324 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
321 struct inode *inode = dentry->d_inode; 325 struct inode *inode = dentry->d_inode;
322 int error = 0;
323 unsigned int ia_valid; 326 unsigned int ia_valid;
327 int error;
324 328
325 /* 329 /*
326 * Expand the file. Since inode_setattr() updates ->i_size 330 * Expand the file. Since inode_setattr() updates ->i_size
@@ -356,7 +360,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
356 ((attr->ia_valid & ATTR_GID) && 360 ((attr->ia_valid & ATTR_GID) &&
357 (attr->ia_gid != sbi->options.fs_gid)) || 361 (attr->ia_gid != sbi->options.fs_gid)) ||
358 ((attr->ia_valid & ATTR_MODE) && 362 ((attr->ia_valid & ATTR_MODE) &&
359 (attr->ia_mode & ~MSDOS_VALID_MODE))) 363 (attr->ia_mode & ~FAT_VALID_MODE)))
360 error = -EPERM; 364 error = -EPERM;
361 365
362 if (error) { 366 if (error) {
@@ -374,7 +378,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
374 attr->ia_valid &= ~ATTR_MODE; 378 attr->ia_valid &= ~ATTR_MODE;
375 } 379 }
376 380
377 error = inode_setattr(inode, attr); 381 if (attr->ia_valid)
382 error = inode_setattr(inode, attr);
378out: 383out:
379 return error; 384 return error;
380} 385}
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 2b2eec1283bf..bdd8fb7be2ca 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -16,7 +16,6 @@
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/smp_lock.h> 17#include <linux/smp_lock.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <linux/msdos_fs.h>
20#include <linux/pagemap.h> 19#include <linux/pagemap.h>
21#include <linux/mpage.h> 20#include <linux/mpage.h>
22#include <linux/buffer_head.h> 21#include <linux/buffer_head.h>
@@ -27,7 +26,9 @@
27#include <linux/uio.h> 26#include <linux/uio.h>
28#include <linux/writeback.h> 27#include <linux/writeback.h>
29#include <linux/log2.h> 28#include <linux/log2.h>
29#include <linux/hash.h>
30#include <asm/unaligned.h> 30#include <asm/unaligned.h>
31#include "fat.h"
31 32
32#ifndef CONFIG_FAT_DEFAULT_IOCHARSET 33#ifndef CONFIG_FAT_DEFAULT_IOCHARSET
33/* if user don't select VFAT, this is undefined. */ 34/* if user don't select VFAT, this is undefined. */
@@ -63,7 +64,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
63 sector_t phys; 64 sector_t phys;
64 int err, offset; 65 int err, offset;
65 66
66 err = fat_bmap(inode, iblock, &phys, &mapped_blocks); 67 err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
67 if (err) 68 if (err)
68 return err; 69 return err;
69 if (phys) { 70 if (phys) {
@@ -93,7 +94,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
93 *max_blocks = min(mapped_blocks, *max_blocks); 94 *max_blocks = min(mapped_blocks, *max_blocks);
94 MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits; 95 MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
95 96
96 err = fat_bmap(inode, iblock, &phys, &mapped_blocks); 97 err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
97 if (err) 98 if (err)
98 return err; 99 return err;
99 100
@@ -198,7 +199,14 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
198 199
199static sector_t _fat_bmap(struct address_space *mapping, sector_t block) 200static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
200{ 201{
201 return generic_block_bmap(mapping, block, fat_get_block); 202 sector_t blocknr;
203
204 /* fat_get_cluster() assumes the requested blocknr isn't truncated. */
205 mutex_lock(&mapping->host->i_mutex);
206 blocknr = generic_block_bmap(mapping, block, fat_get_block);
207 mutex_unlock(&mapping->host->i_mutex);
208
209 return blocknr;
202} 210}
203 211
204static const struct address_space_operations fat_aops = { 212static const struct address_space_operations fat_aops = {
@@ -247,25 +255,21 @@ static void fat_hash_init(struct super_block *sb)
247 INIT_HLIST_HEAD(&sbi->inode_hashtable[i]); 255 INIT_HLIST_HEAD(&sbi->inode_hashtable[i]);
248} 256}
249 257
250static inline unsigned long fat_hash(struct super_block *sb, loff_t i_pos) 258static inline unsigned long fat_hash(loff_t i_pos)
251{ 259{
252 unsigned long tmp = (unsigned long)i_pos | (unsigned long) sb; 260 return hash_32(i_pos, FAT_HASH_BITS);
253 tmp = tmp + (tmp >> FAT_HASH_BITS) + (tmp >> FAT_HASH_BITS * 2);
254 return tmp & FAT_HASH_MASK;
255} 261}
256 262
257void fat_attach(struct inode *inode, loff_t i_pos) 263void fat_attach(struct inode *inode, loff_t i_pos)
258{ 264{
259 struct super_block *sb = inode->i_sb; 265 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
260 struct msdos_sb_info *sbi = MSDOS_SB(sb); 266 struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
261 267
262 spin_lock(&sbi->inode_hash_lock); 268 spin_lock(&sbi->inode_hash_lock);
263 MSDOS_I(inode)->i_pos = i_pos; 269 MSDOS_I(inode)->i_pos = i_pos;
264 hlist_add_head(&MSDOS_I(inode)->i_fat_hash, 270 hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head);
265 sbi->inode_hashtable + fat_hash(sb, i_pos));
266 spin_unlock(&sbi->inode_hash_lock); 271 spin_unlock(&sbi->inode_hash_lock);
267} 272}
268
269EXPORT_SYMBOL_GPL(fat_attach); 273EXPORT_SYMBOL_GPL(fat_attach);
270 274
271void fat_detach(struct inode *inode) 275void fat_detach(struct inode *inode)
@@ -276,13 +280,12 @@ void fat_detach(struct inode *inode)
276 hlist_del_init(&MSDOS_I(inode)->i_fat_hash); 280 hlist_del_init(&MSDOS_I(inode)->i_fat_hash);
277 spin_unlock(&sbi->inode_hash_lock); 281 spin_unlock(&sbi->inode_hash_lock);
278} 282}
279
280EXPORT_SYMBOL_GPL(fat_detach); 283EXPORT_SYMBOL_GPL(fat_detach);
281 284
282struct inode *fat_iget(struct super_block *sb, loff_t i_pos) 285struct inode *fat_iget(struct super_block *sb, loff_t i_pos)
283{ 286{
284 struct msdos_sb_info *sbi = MSDOS_SB(sb); 287 struct msdos_sb_info *sbi = MSDOS_SB(sb);
285 struct hlist_head *head = sbi->inode_hashtable + fat_hash(sb, i_pos); 288 struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
286 struct hlist_node *_p; 289 struct hlist_node *_p;
287 struct msdos_inode_info *i; 290 struct msdos_inode_info *i;
288 struct inode *inode = NULL; 291 struct inode *inode = NULL;
@@ -341,8 +344,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
341 344
342 if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) { 345 if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) {
343 inode->i_generation &= ~1; 346 inode->i_generation &= ~1;
344 inode->i_mode = MSDOS_MKMODE(de->attr, 347 inode->i_mode = fat_make_mode(sbi, de->attr, S_IRWXUGO);
345 S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR;
346 inode->i_op = sbi->dir_ops; 348 inode->i_op = sbi->dir_ops;
347 inode->i_fop = &fat_dir_operations; 349 inode->i_fop = &fat_dir_operations;
348 350
@@ -359,10 +361,9 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
359 inode->i_nlink = fat_subdirs(inode); 361 inode->i_nlink = fat_subdirs(inode);
360 } else { /* not a directory */ 362 } else { /* not a directory */
361 inode->i_generation |= 1; 363 inode->i_generation |= 1;
362 inode->i_mode = MSDOS_MKMODE(de->attr, 364 inode->i_mode = fat_make_mode(sbi, de->attr,
363 ((sbi->options.showexec && !is_exec(de->name + 8)) 365 ((sbi->options.showexec && !is_exec(de->name + 8))
364 ? S_IRUGO|S_IWUGO : S_IRWXUGO) 366 ? S_IRUGO|S_IWUGO : S_IRWXUGO));
365 & ~sbi->options.fs_fmask) | S_IFREG;
366 MSDOS_I(inode)->i_start = le16_to_cpu(de->start); 367 MSDOS_I(inode)->i_start = le16_to_cpu(de->start);
367 if (sbi->fat_bits == 32) 368 if (sbi->fat_bits == 32)
368 MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16); 369 MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16);
@@ -378,25 +379,16 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
378 if (sbi->options.sys_immutable) 379 if (sbi->options.sys_immutable)
379 inode->i_flags |= S_IMMUTABLE; 380 inode->i_flags |= S_IMMUTABLE;
380 } 381 }
381 MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED; 382 fat_save_attrs(inode, de->attr);
383
382 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) 384 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
383 & ~((loff_t)sbi->cluster_size - 1)) >> 9; 385 & ~((loff_t)sbi->cluster_size - 1)) >> 9;
384 inode->i_mtime.tv_sec = 386
385 date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date), 387 fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0);
386 sbi->options.tz_utc);
387 inode->i_mtime.tv_nsec = 0;
388 if (sbi->options.isvfat) { 388 if (sbi->options.isvfat) {
389 int secs = de->ctime_cs / 100; 389 fat_time_fat2unix(sbi, &inode->i_ctime, de->ctime,
390 int csecs = de->ctime_cs % 100; 390 de->cdate, de->ctime_cs);
391 inode->i_ctime.tv_sec = 391 fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0);
392 date_dos2unix(le16_to_cpu(de->ctime),
393 le16_to_cpu(de->cdate),
394 sbi->options.tz_utc) + secs;
395 inode->i_ctime.tv_nsec = csecs * 10000000;
396 inode->i_atime.tv_sec =
397 date_dos2unix(0, le16_to_cpu(de->adate),
398 sbi->options.tz_utc);
399 inode->i_atime.tv_nsec = 0;
400 } else 392 } else
401 inode->i_ctime = inode->i_atime = inode->i_mtime; 393 inode->i_ctime = inode->i_atime = inode->i_mtime;
402 394
@@ -443,13 +435,8 @@ static void fat_delete_inode(struct inode *inode)
443 435
444static void fat_clear_inode(struct inode *inode) 436static void fat_clear_inode(struct inode *inode)
445{ 437{
446 struct super_block *sb = inode->i_sb;
447 struct msdos_sb_info *sbi = MSDOS_SB(sb);
448
449 spin_lock(&sbi->inode_hash_lock);
450 fat_cache_inval_inode(inode); 438 fat_cache_inval_inode(inode);
451 hlist_del_init(&MSDOS_I(inode)->i_fat_hash); 439 fat_detach(inode);
452 spin_unlock(&sbi->inode_hash_lock);
453} 440}
454 441
455static void fat_write_super(struct super_block *sb) 442static void fat_write_super(struct super_block *sb)
@@ -555,6 +542,20 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
555 return 0; 542 return 0;
556} 543}
557 544
545static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi,
546 struct inode *inode)
547{
548 loff_t i_pos;
549#if BITS_PER_LONG == 32
550 spin_lock(&sbi->inode_hash_lock);
551#endif
552 i_pos = MSDOS_I(inode)->i_pos;
553#if BITS_PER_LONG == 32
554 spin_unlock(&sbi->inode_hash_lock);
555#endif
556 return i_pos;
557}
558
558static int fat_write_inode(struct inode *inode, int wait) 559static int fat_write_inode(struct inode *inode, int wait)
559{ 560{
560 struct super_block *sb = inode->i_sb; 561 struct super_block *sb = inode->i_sb;
@@ -564,9 +565,12 @@ static int fat_write_inode(struct inode *inode, int wait)
564 loff_t i_pos; 565 loff_t i_pos;
565 int err; 566 int err;
566 567
568 if (inode->i_ino == MSDOS_ROOT_INO)
569 return 0;
570
567retry: 571retry:
568 i_pos = MSDOS_I(inode)->i_pos; 572 i_pos = fat_i_pos_read(sbi, inode);
569 if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) 573 if (!i_pos)
570 return 0; 574 return 0;
571 575
572 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); 576 bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
@@ -588,19 +592,17 @@ retry:
588 raw_entry->size = 0; 592 raw_entry->size = 0;
589 else 593 else
590 raw_entry->size = cpu_to_le32(inode->i_size); 594 raw_entry->size = cpu_to_le32(inode->i_size);
591 raw_entry->attr = fat_attr(inode); 595 raw_entry->attr = fat_make_attrs(inode);
592 raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); 596 raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
593 raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); 597 raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
594 fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, 598 fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time,
595 &raw_entry->date, sbi->options.tz_utc); 599 &raw_entry->date, NULL);
596 if (sbi->options.isvfat) { 600 if (sbi->options.isvfat) {
597 __le16 atime; 601 __le16 atime;
598 fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime, 602 fat_time_unix2fat(sbi, &inode->i_ctime, &raw_entry->ctime,
599 &raw_entry->cdate, sbi->options.tz_utc); 603 &raw_entry->cdate, &raw_entry->ctime_cs);
600 fat_date_unix2dos(inode->i_atime.tv_sec, &atime, 604 fat_time_unix2fat(sbi, &inode->i_atime, &atime,
601 &raw_entry->adate, sbi->options.tz_utc); 605 &raw_entry->adate, NULL);
602 raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
603 inode->i_ctime.tv_nsec / 10000000;
604 } 606 }
605 spin_unlock(&sbi->inode_hash_lock); 607 spin_unlock(&sbi->inode_hash_lock);
606 mark_buffer_dirty(bh); 608 mark_buffer_dirty(bh);
@@ -819,8 +821,10 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
819 seq_puts(m, ",uni_xlate"); 821 seq_puts(m, ",uni_xlate");
820 if (!opts->numtail) 822 if (!opts->numtail)
821 seq_puts(m, ",nonumtail"); 823 seq_puts(m, ",nonumtail");
824 if (opts->rodir)
825 seq_puts(m, ",rodir");
822 } 826 }
823 if (sbi->options.flush) 827 if (opts->flush)
824 seq_puts(m, ",flush"); 828 seq_puts(m, ",flush");
825 if (opts->tz_utc) 829 if (opts->tz_utc)
826 seq_puts(m, ",tz=UTC"); 830 seq_puts(m, ",tz=UTC");
@@ -836,7 +840,7 @@ enum {
836 Opt_charset, Opt_shortname_lower, Opt_shortname_win95, 840 Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
837 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 841 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
838 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 842 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
839 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err, 843 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err,
840}; 844};
841 845
842static const match_table_t fat_tokens = { 846static const match_table_t fat_tokens = {
@@ -908,6 +912,7 @@ static const match_table_t vfat_tokens = {
908 {Opt_nonumtail_yes, "nonumtail=yes"}, 912 {Opt_nonumtail_yes, "nonumtail=yes"},
909 {Opt_nonumtail_yes, "nonumtail=true"}, 913 {Opt_nonumtail_yes, "nonumtail=true"},
910 {Opt_nonumtail_yes, "nonumtail"}, 914 {Opt_nonumtail_yes, "nonumtail"},
915 {Opt_rodir, "rodir"},
911 {Opt_err, NULL} 916 {Opt_err, NULL}
912}; 917};
913 918
@@ -927,10 +932,13 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
927 opts->allow_utime = -1; 932 opts->allow_utime = -1;
928 opts->codepage = fat_default_codepage; 933 opts->codepage = fat_default_codepage;
929 opts->iocharset = fat_default_iocharset; 934 opts->iocharset = fat_default_iocharset;
930 if (is_vfat) 935 if (is_vfat) {
931 opts->shortname = VFAT_SFN_DISPLAY_LOWER|VFAT_SFN_CREATE_WIN95; 936 opts->shortname = VFAT_SFN_DISPLAY_LOWER|VFAT_SFN_CREATE_WIN95;
932 else 937 opts->rodir = 0;
938 } else {
933 opts->shortname = 0; 939 opts->shortname = 0;
940 opts->rodir = 1;
941 }
934 opts->name_check = 'n'; 942 opts->name_check = 'n';
935 opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0; 943 opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0;
936 opts->utf8 = opts->unicode_xlate = 0; 944 opts->utf8 = opts->unicode_xlate = 0;
@@ -1081,6 +1089,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1081 case Opt_nonumtail_yes: /* empty or 1 or yes or true */ 1089 case Opt_nonumtail_yes: /* empty or 1 or yes or true */
1082 opts->numtail = 0; /* negated option */ 1090 opts->numtail = 0; /* negated option */
1083 break; 1091 break;
1092 case Opt_rodir:
1093 opts->rodir = 1;
1094 break;
1084 1095
1085 /* obsolete mount options */ 1096 /* obsolete mount options */
1086 case Opt_obsolate: 1097 case Opt_obsolate:
@@ -1126,7 +1137,7 @@ static int fat_read_root(struct inode *inode)
1126 inode->i_gid = sbi->options.fs_gid; 1137 inode->i_gid = sbi->options.fs_gid;
1127 inode->i_version++; 1138 inode->i_version++;
1128 inode->i_generation = 0; 1139 inode->i_generation = 0;
1129 inode->i_mode = (S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR; 1140 inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO);
1130 inode->i_op = sbi->dir_ops; 1141 inode->i_op = sbi->dir_ops;
1131 inode->i_fop = &fat_dir_operations; 1142 inode->i_fop = &fat_dir_operations;
1132 if (sbi->fat_bits == 32) { 1143 if (sbi->fat_bits == 32) {
@@ -1143,7 +1154,7 @@ static int fat_read_root(struct inode *inode)
1143 MSDOS_I(inode)->i_logstart = 0; 1154 MSDOS_I(inode)->i_logstart = 0;
1144 MSDOS_I(inode)->mmu_private = inode->i_size; 1155 MSDOS_I(inode)->mmu_private = inode->i_size;
1145 1156
1146 MSDOS_I(inode)->i_attrs = ATTR_NONE; 1157 fat_save_attrs(inode, ATTR_DIR);
1147 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0; 1158 inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
1148 inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0; 1159 inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0;
1149 inode->i_nlink = fat_subdirs(inode)+2; 1160 inode->i_nlink = fat_subdirs(inode)+2;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 79fb98ad36d4..ac39ebcc1496 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -8,8 +8,8 @@
8 8
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/msdos_fs.h>
12#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include "fat.h"
13 13
14/* 14/*
15 * fat_fs_panic reports a severe file system problem and sets the file system 15 * fat_fs_panic reports a severe file system problem and sets the file system
@@ -124,8 +124,9 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
124 mark_inode_dirty(inode); 124 mark_inode_dirty(inode);
125 } 125 }
126 if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) { 126 if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) {
127 fat_fs_panic(sb, "clusters badly computed (%d != %lu)", 127 fat_fs_panic(sb, "clusters badly computed (%d != %llu)",
128 new_fclus, inode->i_blocks >> (sbi->cluster_bits - 9)); 128 new_fclus,
129 (llu)(inode->i_blocks >> (sbi->cluster_bits - 9)));
129 fat_cache_inval_inode(inode); 130 fat_cache_inval_inode(inode);
130 } 131 }
131 inode->i_blocks += nr_cluster << (sbi->cluster_bits - 9); 132 inode->i_blocks += nr_cluster << (sbi->cluster_bits - 9);
@@ -135,65 +136,131 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
135 136
136extern struct timezone sys_tz; 137extern struct timezone sys_tz;
137 138
139/*
140 * The epoch of FAT timestamp is 1980.
141 * : bits : value
142 * date: 0 - 4: day (1 - 31)
143 * date: 5 - 8: month (1 - 12)
144 * date: 9 - 15: year (0 - 127) from 1980
145 * time: 0 - 4: sec (0 - 29) 2sec counts
146 * time: 5 - 10: min (0 - 59)
147 * time: 11 - 15: hour (0 - 23)
148 */
149#define SECS_PER_MIN 60
150#define SECS_PER_HOUR (60 * 60)
151#define SECS_PER_DAY (SECS_PER_HOUR * 24)
152#define UNIX_SECS_1980 315532800L
153#if BITS_PER_LONG == 64
154#define UNIX_SECS_2108 4354819200L
155#endif
156/* days between 1.1.70 and 1.1.80 (2 leap days) */
157#define DAYS_DELTA (365 * 10 + 2)
158/* 120 (2100 - 1980) isn't leap year */
159#define YEAR_2100 120
160#define IS_LEAP_YEAR(y) (!((y) & 3) && (y) != YEAR_2100)
161
138/* Linear day numbers of the respective 1sts in non-leap years. */ 162/* Linear day numbers of the respective 1sts in non-leap years. */
139static int day_n[] = { 163static time_t days_in_year[] = {
140 /* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */ 164 /* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */
141 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, 0 165 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0,
142}; 166};
143 167
144/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */ 168/* Convert a FAT time/date pair to a UNIX date (seconds since 1 1 70). */
145int date_dos2unix(unsigned short time, unsigned short date, int tz_utc) 169void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
170 __le16 __time, __le16 __date, u8 time_cs)
146{ 171{
147 int month, year, secs; 172 u16 time = le16_to_cpu(__time), date = le16_to_cpu(__date);
173 time_t second, day, leap_day, month, year;
148 174
149 /* 175 year = date >> 9;
150 * first subtract and mask after that... Otherwise, if 176 month = max(1, (date >> 5) & 0xf);
151 * date == 0, bad things happen 177 day = max(1, date & 0x1f) - 1;
152 */ 178
153 month = ((date >> 5) - 1) & 15; 179 leap_day = (year + 3) / 4;
154 year = date >> 9; 180 if (year > YEAR_2100) /* 2100 isn't leap year */
155 secs = (time & 31)*2+60*((time >> 5) & 63)+(time >> 11)*3600+86400* 181 leap_day--;
156 ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 && 182 if (IS_LEAP_YEAR(year) && month > 2)
157 month < 2 ? 1 : 0)+3653); 183 leap_day++;
158 /* days since 1.1.70 plus 80's leap day */ 184
159 if (!tz_utc) 185 second = (time & 0x1f) << 1;
160 secs += sys_tz.tz_minuteswest*60; 186 second += ((time >> 5) & 0x3f) * SECS_PER_MIN;
161 return secs; 187 second += (time >> 11) * SECS_PER_HOUR;
188 second += (year * 365 + leap_day
189 + days_in_year[month] + day
190 + DAYS_DELTA) * SECS_PER_DAY;
191
192 if (!sbi->options.tz_utc)
193 second += sys_tz.tz_minuteswest * SECS_PER_MIN;
194
195 if (time_cs) {
196 ts->tv_sec = second + (time_cs / 100);
197 ts->tv_nsec = (time_cs % 100) * 10000000;
198 } else {
199 ts->tv_sec = second;
200 ts->tv_nsec = 0;
201 }
162} 202}
163 203
164/* Convert linear UNIX date to a MS-DOS time/date pair. */ 204/* Convert linear UNIX date to a FAT time/date pair. */
165void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc) 205void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts,
206 __le16 *time, __le16 *date, u8 *time_cs)
166{ 207{
167 int day, year, nl_day, month; 208 time_t second = ts->tv_sec;
209 time_t day, leap_day, month, year;
168 210
169 if (!tz_utc) 211 if (!sbi->options.tz_utc)
170 unix_date -= sys_tz.tz_minuteswest*60; 212 second -= sys_tz.tz_minuteswest * SECS_PER_MIN;
171 213
172 /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */ 214 /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
173 if (unix_date < 315532800) 215 if (second < UNIX_SECS_1980) {
174 unix_date = 315532800; 216 *time = 0;
175 217 *date = cpu_to_le16((0 << 9) | (1 << 5) | 1);
176 *time = cpu_to_le16((unix_date % 60)/2+(((unix_date/60) % 60) << 5)+ 218 if (time_cs)
177 (((unix_date/3600) % 24) << 11)); 219 *time_cs = 0;
178 day = unix_date/86400-3652; 220 return;
179 year = day/365; 221 }
180 if ((year+3)/4+365*year > day) 222#if BITS_PER_LONG == 64
223 if (second >= UNIX_SECS_2108) {
224 *time = cpu_to_le16((23 << 11) | (59 << 5) | 29);
225 *date = cpu_to_le16((127 << 9) | (12 << 5) | 31);
226 if (time_cs)
227 *time_cs = 199;
228 return;
229 }
230#endif
231
232 day = second / SECS_PER_DAY - DAYS_DELTA;
233 year = day / 365;
234 leap_day = (year + 3) / 4;
235 if (year > YEAR_2100) /* 2100 isn't leap year */
236 leap_day--;
237 if (year * 365 + leap_day > day)
181 year--; 238 year--;
182 day -= (year+3)/4+365*year; 239 leap_day = (year + 3) / 4;
183 if (day == 59 && !(year & 3)) { 240 if (year > YEAR_2100) /* 2100 isn't leap year */
184 nl_day = day; 241 leap_day--;
242 day -= year * 365 + leap_day;
243
244 if (IS_LEAP_YEAR(year) && day == days_in_year[3]) {
185 month = 2; 245 month = 2;
186 } else { 246 } else {
187 nl_day = (year & 3) || day <= 59 ? day : day-1; 247 if (IS_LEAP_YEAR(year) && day > days_in_year[3])
188 for (month = 0; month < 12; month++) { 248 day--;
189 if (day_n[month] > nl_day) 249 for (month = 1; month < 12; month++) {
250 if (days_in_year[month + 1] > day)
190 break; 251 break;
191 } 252 }
192 } 253 }
193 *date = cpu_to_le16(nl_day-day_n[month-1]+1+(month << 5)+(year << 9)); 254 day -= days_in_year[month];
194}
195 255
196EXPORT_SYMBOL_GPL(fat_date_unix2dos); 256 *time = cpu_to_le16(((second / SECS_PER_HOUR) % 24) << 11
257 | ((second / SECS_PER_MIN) % 60) << 5
258 | (second % SECS_PER_MIN) >> 1);
259 *date = cpu_to_le16((year << 9) | (month << 5) | (day + 1));
260 if (time_cs)
261 *time_cs = (ts->tv_sec & 1) * 100 + ts->tv_nsec / 10000000;
262}
263EXPORT_SYMBOL_GPL(fat_time_unix2fat);
197 264
198int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) 265int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
199{ 266{
diff --git a/fs/msdos/namei.c b/fs/fat/namei_msdos.c
index e844b9809d27..7ba03a4acbe0 100644
--- a/fs/msdos/namei.c
+++ b/fs/fat/namei_msdos.c
@@ -9,8 +9,8 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/time.h> 10#include <linux/time.h>
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/msdos_fs.h>
13#include <linux/smp_lock.h> 12#include <linux/smp_lock.h>
13#include "fat.h"
14 14
15/* Characters that are undesirable in an MS-DOS file name */ 15/* Characters that are undesirable in an MS-DOS file name */
16static unsigned char bad_chars[] = "*?<>|\""; 16static unsigned char bad_chars[] = "*?<>|\"";
@@ -203,33 +203,37 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
203{ 203{
204 struct super_block *sb = dir->i_sb; 204 struct super_block *sb = dir->i_sb;
205 struct fat_slot_info sinfo; 205 struct fat_slot_info sinfo;
206 struct inode *inode = NULL; 206 struct inode *inode;
207 int res; 207 int err;
208
209 dentry->d_op = &msdos_dentry_operations;
210 208
211 lock_super(sb); 209 lock_super(sb);
212 res = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); 210
213 if (res == -ENOENT) 211 err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo);
214 goto add; 212 if (err) {
215 if (res < 0) 213 if (err == -ENOENT) {
216 goto out; 214 inode = NULL;
215 goto out;
216 }
217 goto error;
218 }
219
217 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); 220 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
218 brelse(sinfo.bh); 221 brelse(sinfo.bh);
219 if (IS_ERR(inode)) { 222 if (IS_ERR(inode)) {
220 res = PTR_ERR(inode); 223 err = PTR_ERR(inode);
221 goto out; 224 goto error;
222 } 225 }
223add: 226out:
224 res = 0; 227 unlock_super(sb);
228 dentry->d_op = &msdos_dentry_operations;
225 dentry = d_splice_alias(inode, dentry); 229 dentry = d_splice_alias(inode, dentry);
226 if (dentry) 230 if (dentry)
227 dentry->d_op = &msdos_dentry_operations; 231 dentry->d_op = &msdos_dentry_operations;
228out: 232 return dentry;
233
234error:
229 unlock_super(sb); 235 unlock_super(sb);
230 if (!res) 236 return ERR_PTR(err);
231 return dentry;
232 return ERR_PTR(res);
233} 237}
234 238
235/***** Creates a directory entry (name is already formatted). */ 239/***** Creates a directory entry (name is already formatted). */
@@ -247,7 +251,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
247 if (is_hid) 251 if (is_hid)
248 de.attr |= ATTR_HIDDEN; 252 de.attr |= ATTR_HIDDEN;
249 de.lcase = 0; 253 de.lcase = 0;
250 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); 254 fat_time_unix2fat(sbi, ts, &time, &date, NULL);
251 de.cdate = de.adate = 0; 255 de.cdate = de.adate = 0;
252 de.ctime = 0; 256 de.ctime = 0;
253 de.ctime_cs = 0; 257 de.ctime_cs = 0;
diff --git a/fs/vfat/namei.c b/fs/fat/namei_vfat.c
index 155c10b4adbd..bf326d4356a3 100644
--- a/fs/vfat/namei.c
+++ b/fs/fat/namei_vfat.c
@@ -16,36 +16,75 @@
16 */ 16 */
17 17
18#include <linux/module.h> 18#include <linux/module.h>
19
20#include <linux/jiffies.h> 19#include <linux/jiffies.h>
21#include <linux/msdos_fs.h>
22#include <linux/ctype.h> 20#include <linux/ctype.h>
23#include <linux/slab.h> 21#include <linux/slab.h>
24#include <linux/smp_lock.h> 22#include <linux/smp_lock.h>
25#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
26#include <linux/namei.h> 24#include <linux/namei.h>
25#include "fat.h"
27 26
28static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) 27/*
28 * If new entry was created in the parent, it could create the 8.3
29 * alias (the shortname of logname). So, the parent may have the
30 * negative-dentry which matches the created 8.3 alias.
31 *
32 * If it happened, the negative dentry isn't actually negative
33 * anymore. So, drop it.
34 */
35static int vfat_revalidate_shortname(struct dentry *dentry)
29{ 36{
30 int ret = 1; 37 int ret = 1;
31 38 spin_lock(&dentry->d_lock);
32 if (!dentry->d_inode && 39 if (dentry->d_time != dentry->d_parent->d_inode->i_version)
33 nd && !(nd->flags & LOOKUP_CONTINUE) && (nd->flags & LOOKUP_CREATE))
34 /*
35 * negative dentry is dropped, in order to make sure
36 * to use the name which a user desires if this is
37 * create path.
38 */
39 ret = 0; 40 ret = 0;
40 else { 41 spin_unlock(&dentry->d_lock);
41 spin_lock(&dentry->d_lock);
42 if (dentry->d_time != dentry->d_parent->d_inode->i_version)
43 ret = 0;
44 spin_unlock(&dentry->d_lock);
45 }
46 return ret; 42 return ret;
47} 43}
48 44
45static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
46{
47 /* This is not negative dentry. Always valid. */
48 if (dentry->d_inode)
49 return 1;
50 return vfat_revalidate_shortname(dentry);
51}
52
53static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
54{
55 /*
56 * This is not negative dentry. Always valid.
57 *
58 * Note, rename() to existing directory entry will have ->d_inode,
59 * and will use existing name which isn't specified name by user.
60 *
61 * We may be able to drop this positive dentry here. But dropping
62 * positive dentry isn't good idea. So it's unsupported like
63 * rename("filename", "FILENAME") for now.
64 */
65 if (dentry->d_inode)
66 return 1;
67
68 /*
69 * This may be nfsd (or something), anyway, we can't see the
70 * intent of this. So, since this can be for creation, drop it.
71 */
72 if (!nd)
73 return 0;
74
75 /*
76 * Drop the negative dentry, in order to make sure to use the
77 * case sensitive name which is specified by user if this is
78 * for creation.
79 */
80 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
81 if (nd->flags & LOOKUP_CREATE)
82 return 0;
83 }
84
85 return vfat_revalidate_shortname(dentry);
86}
87
49/* returns the length of a struct qstr, ignoring trailing dots */ 88/* returns the length of a struct qstr, ignoring trailing dots */
50static unsigned int vfat_striptail_len(struct qstr *qstr) 89static unsigned int vfat_striptail_len(struct qstr *qstr)
51{ 90{
@@ -127,25 +166,16 @@ static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b)
127 return 1; 166 return 1;
128} 167}
129 168
130static struct dentry_operations vfat_dentry_ops[4] = { 169static struct dentry_operations vfat_ci_dentry_ops = {
131 { 170 .d_revalidate = vfat_revalidate_ci,
132 .d_hash = vfat_hashi, 171 .d_hash = vfat_hashi,
133 .d_compare = vfat_cmpi, 172 .d_compare = vfat_cmpi,
134 }, 173};
135 { 174
136 .d_revalidate = vfat_revalidate, 175static struct dentry_operations vfat_dentry_ops = {
137 .d_hash = vfat_hashi, 176 .d_revalidate = vfat_revalidate,
138 .d_compare = vfat_cmpi, 177 .d_hash = vfat_hash,
139 }, 178 .d_compare = vfat_cmp,
140 {
141 .d_hash = vfat_hash,
142 .d_compare = vfat_cmp,
143 },
144 {
145 .d_revalidate = vfat_revalidate,
146 .d_hash = vfat_hash,
147 .d_compare = vfat_cmp,
148 }
149}; 179};
150 180
151/* Characters that are undesirable in an MS-DOS file name */ 181/* Characters that are undesirable in an MS-DOS file name */
@@ -569,6 +599,7 @@ static int vfat_build_slots(struct inode *dir, const unsigned char *name,
569 unsigned char msdos_name[MSDOS_NAME]; 599 unsigned char msdos_name[MSDOS_NAME];
570 wchar_t *uname; 600 wchar_t *uname;
571 __le16 time, date; 601 __le16 time, date;
602 u8 time_cs;
572 int err, ulen, usize, i; 603 int err, ulen, usize, i;
573 loff_t offset; 604 loff_t offset;
574 605
@@ -621,10 +652,10 @@ shortname:
621 memcpy(de->name, msdos_name, MSDOS_NAME); 652 memcpy(de->name, msdos_name, MSDOS_NAME);
622 de->attr = is_dir ? ATTR_DIR : ATTR_ARCH; 653 de->attr = is_dir ? ATTR_DIR : ATTR_ARCH;
623 de->lcase = lcase; 654 de->lcase = lcase;
624 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); 655 fat_time_unix2fat(sbi, ts, &time, &date, &time_cs);
625 de->time = de->ctime = time; 656 de->time = de->ctime = time;
626 de->date = de->cdate = de->adate = date; 657 de->date = de->cdate = de->adate = date;
627 de->ctime_cs = 0; 658 de->ctime_cs = time_cs;
628 de->start = cpu_to_le16(cluster); 659 de->start = cpu_to_le16(cluster);
629 de->starthi = cpu_to_le16(cluster >> 16); 660 de->starthi = cpu_to_le16(cluster >> 16);
630 de->size = 0; 661 de->size = 0;
@@ -683,46 +714,58 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
683{ 714{
684 struct super_block *sb = dir->i_sb; 715 struct super_block *sb = dir->i_sb;
685 struct fat_slot_info sinfo; 716 struct fat_slot_info sinfo;
686 struct inode *inode = NULL; 717 struct inode *inode;
687 struct dentry *alias; 718 struct dentry *alias;
688 int err, table; 719 int err;
689 720
690 lock_super(sb); 721 lock_super(sb);
691 table = (MSDOS_SB(sb)->options.name_check == 's') ? 2 : 0;
692 dentry->d_op = &vfat_dentry_ops[table];
693 722
694 err = vfat_find(dir, &dentry->d_name, &sinfo); 723 err = vfat_find(dir, &dentry->d_name, &sinfo);
695 if (err) { 724 if (err) {
696 table++; 725 if (err == -ENOENT) {
726 inode = NULL;
727 goto out;
728 }
697 goto error; 729 goto error;
698 } 730 }
731
699 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); 732 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
700 brelse(sinfo.bh); 733 brelse(sinfo.bh);
701 if (IS_ERR(inode)) { 734 if (IS_ERR(inode)) {
702 unlock_super(sb); 735 err = PTR_ERR(inode);
703 return ERR_CAST(inode); 736 goto error;
704 } 737 }
705 alias = d_find_alias(inode);
706 if (alias) {
707 if (d_invalidate(alias) == 0)
708 dput(alias);
709 else {
710 iput(inode);
711 unlock_super(sb);
712 return alias;
713 }
714 738
739 alias = d_find_alias(inode);
740 if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
741 /*
742 * This inode has non DCACHE_DISCONNECTED dentry. This
743 * means, the user did ->lookup() by an another name
744 * (longname vs 8.3 alias of it) in past.
745 *
746 * Switch to new one for reason of locality if possible.
747 */
748 BUG_ON(d_unhashed(alias));
749 if (!S_ISDIR(inode->i_mode))
750 d_move(alias, dentry);
751 iput(inode);
752 unlock_super(sb);
753 return alias;
715 } 754 }
716error: 755out:
717 unlock_super(sb); 756 unlock_super(sb);
718 dentry->d_op = &vfat_dentry_ops[table]; 757 dentry->d_op = sb->s_root->d_op;
719 dentry->d_time = dentry->d_parent->d_inode->i_version; 758 dentry->d_time = dentry->d_parent->d_inode->i_version;
720 dentry = d_splice_alias(inode, dentry); 759 dentry = d_splice_alias(inode, dentry);
721 if (dentry) { 760 if (dentry) {
722 dentry->d_op = &vfat_dentry_ops[table]; 761 dentry->d_op = sb->s_root->d_op;
723 dentry->d_time = dentry->d_parent->d_inode->i_version; 762 dentry->d_time = dentry->d_parent->d_inode->i_version;
724 } 763 }
725 return dentry; 764 return dentry;
765
766error:
767 unlock_super(sb);
768 return ERR_PTR(err);
726} 769}
727 770
728static int vfat_create(struct inode *dir, struct dentry *dentry, int mode, 771static int vfat_create(struct inode *dir, struct dentry *dentry, int mode,
@@ -1014,9 +1057,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
1014 return res; 1057 return res;
1015 1058
1016 if (MSDOS_SB(sb)->options.name_check != 's') 1059 if (MSDOS_SB(sb)->options.name_check != 's')
1017 sb->s_root->d_op = &vfat_dentry_ops[0]; 1060 sb->s_root->d_op = &vfat_ci_dentry_ops;
1018 else 1061 else
1019 sb->s_root->d_op = &vfat_dentry_ops[2]; 1062 sb->s_root->d_op = &vfat_dentry_ops;
1020 1063
1021 return 0; 1064 return 0;
1022} 1065}
diff --git a/fs/fcntl.c b/fs/fcntl.c
index ac4f7db9f134..549daf8005fb 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -19,6 +19,7 @@
19#include <linux/signal.h> 19#include <linux/signal.h>
20#include <linux/rcupdate.h> 20#include <linux/rcupdate.h>
21#include <linux/pid_namespace.h> 21#include <linux/pid_namespace.h>
22#include <linux/smp_lock.h>
22 23
23#include <asm/poll.h> 24#include <asm/poll.h>
24#include <asm/siginfo.h> 25#include <asm/siginfo.h>
@@ -175,6 +176,11 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
175 if (error) 176 if (error)
176 return error; 177 return error;
177 178
179 /*
180 * We still need a lock here for now to keep multiple FASYNC calls
181 * from racing with each other.
182 */
183 lock_kernel();
178 if ((arg ^ filp->f_flags) & FASYNC) { 184 if ((arg ^ filp->f_flags) & FASYNC) {
179 if (filp->f_op && filp->f_op->fasync) { 185 if (filp->f_op && filp->f_op->fasync) {
180 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); 186 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
@@ -185,6 +191,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
185 191
186 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); 192 filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
187 out: 193 out:
194 unlock_kernel();
188 return error; 195 return error;
189} 196}
190 197
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 6ae9011b95eb..2f34f8f2134b 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -81,7 +81,7 @@ extern int do_rmdir(const char *file);
81extern int do_mknod(const char *file, int mode, unsigned int major, 81extern int do_mknod(const char *file, int mode, unsigned int major,
82 unsigned int minor); 82 unsigned int minor);
83extern int link_file(const char *from, const char *to); 83extern int link_file(const char *from, const char *to);
84extern int do_readlink(char *file, char *buf, int size); 84extern int hostfs_do_readlink(char *file, char *buf, int size);
85extern int rename_file(char *from, char *to); 85extern int rename_file(char *from, char *to);
86extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, 86extern int do_statfs(char *root, long *bsize_out, long long *blocks_out,
87 long long *bfree_out, long long *bavail_out, 87 long long *bfree_out, long long *bavail_out,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 7f34f4385de0..3a31451ac170 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -168,7 +168,7 @@ static char *follow_link(char *link)
168 if (name == NULL) 168 if (name == NULL)
169 goto out; 169 goto out;
170 170
171 n = do_readlink(link, name, len); 171 n = hostfs_do_readlink(link, name, len);
172 if (n < len) 172 if (n < len)
173 break; 173 break;
174 len *= 2; 174 len *= 2;
@@ -943,7 +943,7 @@ int hostfs_link_readpage(struct file *file, struct page *page)
943 name = inode_name(page->mapping->host, 0); 943 name = inode_name(page->mapping->host, 0);
944 if (name == NULL) 944 if (name == NULL)
945 return -ENOMEM; 945 return -ENOMEM;
946 err = do_readlink(name, buffer, PAGE_CACHE_SIZE); 946 err = hostfs_do_readlink(name, buffer, PAGE_CACHE_SIZE);
947 kfree(name); 947 kfree(name);
948 if (err == PAGE_CACHE_SIZE) 948 if (err == PAGE_CACHE_SIZE)
949 err = -E2BIG; 949 err = -E2BIG;
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 53fd0a67c11a..b79424f93282 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -377,7 +377,7 @@ int link_file(const char *to, const char *from)
377 return 0; 377 return 0;
378} 378}
379 379
380int do_readlink(char *file, char *buf, int size) 380int hostfs_do_readlink(char *file, char *buf, int size)
381{ 381{
382 int n; 382 int n;
383 383
diff --git a/fs/inotify.c b/fs/inotify.c
index 690e72595e6e..dae3f28f30d4 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -106,6 +106,20 @@ void get_inotify_watch(struct inotify_watch *watch)
106} 106}
107EXPORT_SYMBOL_GPL(get_inotify_watch); 107EXPORT_SYMBOL_GPL(get_inotify_watch);
108 108
109int pin_inotify_watch(struct inotify_watch *watch)
110{
111 struct super_block *sb = watch->inode->i_sb;
112 spin_lock(&sb_lock);
113 if (sb->s_count >= S_BIAS) {
114 atomic_inc(&sb->s_active);
115 spin_unlock(&sb_lock);
116 atomic_inc(&watch->count);
117 return 1;
118 }
119 spin_unlock(&sb_lock);
120 return 0;
121}
122
109/** 123/**
110 * put_inotify_watch - decrements the ref count on a given watch. cleans up 124 * put_inotify_watch - decrements the ref count on a given watch. cleans up
111 * watch references if the count reaches zero. inotify_watch is freed by 125 * watch references if the count reaches zero. inotify_watch is freed by
@@ -124,6 +138,13 @@ void put_inotify_watch(struct inotify_watch *watch)
124} 138}
125EXPORT_SYMBOL_GPL(put_inotify_watch); 139EXPORT_SYMBOL_GPL(put_inotify_watch);
126 140
141void unpin_inotify_watch(struct inotify_watch *watch)
142{
143 struct super_block *sb = watch->inode->i_sb;
144 put_inotify_watch(watch);
145 deactivate_super(sb);
146}
147
127/* 148/*
128 * inotify_handle_get_wd - returns the next WD for use by the given handle 149 * inotify_handle_get_wd - returns the next WD for use by the given handle
129 * 150 *
@@ -407,11 +428,13 @@ void inotify_unmount_inodes(struct list_head *list)
407 watches = &inode->inotify_watches; 428 watches = &inode->inotify_watches;
408 list_for_each_entry_safe(watch, next_w, watches, i_list) { 429 list_for_each_entry_safe(watch, next_w, watches, i_list) {
409 struct inotify_handle *ih= watch->ih; 430 struct inotify_handle *ih= watch->ih;
431 get_inotify_watch(watch);
410 mutex_lock(&ih->mutex); 432 mutex_lock(&ih->mutex);
411 ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, 433 ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
412 NULL, NULL); 434 NULL, NULL);
413 inotify_remove_watch_locked(ih, watch); 435 inotify_remove_watch_locked(ih, watch);
414 mutex_unlock(&ih->mutex); 436 mutex_unlock(&ih->mutex);
437 put_inotify_watch(watch);
415 } 438 }
416 mutex_unlock(&inode->inotify_mutex); 439 mutex_unlock(&inode->inotify_mutex);
417 iput(inode); 440 iput(inode);
@@ -479,6 +502,112 @@ void inotify_init_watch(struct inotify_watch *watch)
479} 502}
480EXPORT_SYMBOL_GPL(inotify_init_watch); 503EXPORT_SYMBOL_GPL(inotify_init_watch);
481 504
505/*
506 * Watch removals suck violently. To kick the watch out we need (in this
507 * order) inode->inotify_mutex and ih->mutex. That's fine if we have
508 * a hold on inode; however, for all other cases we need to make damn sure
509 * we don't race with umount. We can *NOT* just grab a reference to a
510 * watch - inotify_unmount_inodes() will happily sail past it and we'll end
511 * with reference to inode potentially outliving its superblock. Ideally
512 * we just want to grab an active reference to superblock if we can; that
513 * will make sure we won't go into inotify_umount_inodes() until we are
514 * done. Cleanup is just deactivate_super(). However, that leaves a messy
515 * case - what if we *are* racing with umount() and active references to
516 * superblock can't be acquired anymore? We can bump ->s_count, grab
517 * ->s_umount, which will almost certainly wait until the superblock is shut
518 * down and the watch in question is pining for fjords. That's fine, but
519 * there is a problem - we might have hit the window between ->s_active
520 * getting to 0 / ->s_count - below S_BIAS (i.e. the moment when superblock
521 * is past the point of no return and is heading for shutdown) and the
522 * moment when deactivate_super() acquires ->s_umount. We could just do
523 * drop_super() yield() and retry, but that's rather antisocial and this
524 * stuff is luser-triggerable. OTOH, having grabbed ->s_umount and having
525 * found that we'd got there first (i.e. that ->s_root is non-NULL) we know
526 * that we won't race with inotify_umount_inodes(). So we could grab a
527 * reference to watch and do the rest as above, just with drop_super() instead
528 * of deactivate_super(), right? Wrong. We had to drop ih->mutex before we
529 * could grab ->s_umount. So the watch could've been gone already.
530 *
531 * That still can be dealt with - we need to save watch->wd, do idr_find()
532 * and compare its result with our pointer. If they match, we either have
533 * the damn thing still alive or we'd lost not one but two races at once,
534 * the watch had been killed and a new one got created with the same ->wd
535 * at the same address. That couldn't have happened in inotify_destroy(),
536 * but inotify_rm_wd() could run into that. Still, "new one got created"
537 * is not a problem - we have every right to kill it or leave it alone,
538 * whatever's more convenient.
539 *
540 * So we can use idr_find(...) == watch && watch->inode->i_sb == sb as
541 * "grab it and kill it" check. If it's been our original watch, we are
542 * fine, if it's a newcomer - nevermind, just pretend that we'd won the
543 * race and kill the fscker anyway; we are safe since we know that its
544 * superblock won't be going away.
545 *
546 * And yes, this is far beyond mere "not very pretty"; so's the entire
547 * concept of inotify to start with.
548 */
549
550/**
551 * pin_to_kill - pin the watch down for removal
552 * @ih: inotify handle
553 * @watch: watch to kill
554 *
555 * Called with ih->mutex held, drops it. Possible return values:
556 * 0 - nothing to do, it has died
557 * 1 - remove it, drop the reference and deactivate_super()
558 * 2 - remove it, drop the reference and drop_super(); we tried hard to avoid
559 * that variant, since it involved a lot of PITA, but that's the best that
560 * could've been done.
561 */
562static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
563{
564 struct super_block *sb = watch->inode->i_sb;
565 s32 wd = watch->wd;
566
567 spin_lock(&sb_lock);
568 if (sb->s_count >= S_BIAS) {
569 atomic_inc(&sb->s_active);
570 spin_unlock(&sb_lock);
571 get_inotify_watch(watch);
572 mutex_unlock(&ih->mutex);
573 return 1; /* the best outcome */
574 }
575 sb->s_count++;
576 spin_unlock(&sb_lock);
577 mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
578 down_read(&sb->s_umount);
579 if (likely(!sb->s_root)) {
580 /* fs is already shut down; the watch is dead */
581 drop_super(sb);
582 return 0;
583 }
584 /* raced with the final deactivate_super() */
585 mutex_lock(&ih->mutex);
586 if (idr_find(&ih->idr, wd) != watch || watch->inode->i_sb != sb) {
587 /* the watch is dead */
588 mutex_unlock(&ih->mutex);
589 drop_super(sb);
590 return 0;
591 }
592 /* still alive or freed and reused with the same sb and wd; kill */
593 get_inotify_watch(watch);
594 mutex_unlock(&ih->mutex);
595 return 2;
596}
597
598static void unpin_and_kill(struct inotify_watch *watch, int how)
599{
600 struct super_block *sb = watch->inode->i_sb;
601 put_inotify_watch(watch);
602 switch (how) {
603 case 1:
604 deactivate_super(sb);
605 break;
606 case 2:
607 drop_super(sb);
608 }
609}
610
482/** 611/**
483 * inotify_destroy - clean up and destroy an inotify instance 612 * inotify_destroy - clean up and destroy an inotify instance
484 * @ih: inotify handle 613 * @ih: inotify handle
@@ -490,11 +619,15 @@ void inotify_destroy(struct inotify_handle *ih)
490 * pretty. We cannot do a simple iteration over the list, because we 619 * pretty. We cannot do a simple iteration over the list, because we
491 * do not know the inode until we iterate to the watch. But we need to 620 * do not know the inode until we iterate to the watch. But we need to
492 * hold inode->inotify_mutex before ih->mutex. The following works. 621 * hold inode->inotify_mutex before ih->mutex. The following works.
622 *
623 * AV: it had to become even uglier to start working ;-/
493 */ 624 */
494 while (1) { 625 while (1) {
495 struct inotify_watch *watch; 626 struct inotify_watch *watch;
496 struct list_head *watches; 627 struct list_head *watches;
628 struct super_block *sb;
497 struct inode *inode; 629 struct inode *inode;
630 int how;
498 631
499 mutex_lock(&ih->mutex); 632 mutex_lock(&ih->mutex);
500 watches = &ih->watches; 633 watches = &ih->watches;
@@ -503,8 +636,10 @@ void inotify_destroy(struct inotify_handle *ih)
503 break; 636 break;
504 } 637 }
505 watch = list_first_entry(watches, struct inotify_watch, h_list); 638 watch = list_first_entry(watches, struct inotify_watch, h_list);
506 get_inotify_watch(watch); 639 sb = watch->inode->i_sb;
507 mutex_unlock(&ih->mutex); 640 how = pin_to_kill(ih, watch);
641 if (!how)
642 continue;
508 643
509 inode = watch->inode; 644 inode = watch->inode;
510 mutex_lock(&inode->inotify_mutex); 645 mutex_lock(&inode->inotify_mutex);
@@ -518,7 +653,7 @@ void inotify_destroy(struct inotify_handle *ih)
518 653
519 mutex_unlock(&ih->mutex); 654 mutex_unlock(&ih->mutex);
520 mutex_unlock(&inode->inotify_mutex); 655 mutex_unlock(&inode->inotify_mutex);
521 put_inotify_watch(watch); 656 unpin_and_kill(watch, how);
522 } 657 }
523 658
524 /* free this handle: the put matching the get in inotify_init() */ 659 /* free this handle: the put matching the get in inotify_init() */
@@ -719,7 +854,9 @@ void inotify_evict_watch(struct inotify_watch *watch)
719int inotify_rm_wd(struct inotify_handle *ih, u32 wd) 854int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
720{ 855{
721 struct inotify_watch *watch; 856 struct inotify_watch *watch;
857 struct super_block *sb;
722 struct inode *inode; 858 struct inode *inode;
859 int how;
723 860
724 mutex_lock(&ih->mutex); 861 mutex_lock(&ih->mutex);
725 watch = idr_find(&ih->idr, wd); 862 watch = idr_find(&ih->idr, wd);
@@ -727,9 +864,12 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
727 mutex_unlock(&ih->mutex); 864 mutex_unlock(&ih->mutex);
728 return -EINVAL; 865 return -EINVAL;
729 } 866 }
730 get_inotify_watch(watch); 867 sb = watch->inode->i_sb;
868 how = pin_to_kill(ih, watch);
869 if (!how)
870 return 0;
871
731 inode = watch->inode; 872 inode = watch->inode;
732 mutex_unlock(&ih->mutex);
733 873
734 mutex_lock(&inode->inotify_mutex); 874 mutex_lock(&inode->inotify_mutex);
735 mutex_lock(&ih->mutex); 875 mutex_lock(&ih->mutex);
@@ -740,7 +880,7 @@ int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
740 880
741 mutex_unlock(&ih->mutex); 881 mutex_unlock(&ih->mutex);
742 mutex_unlock(&inode->inotify_mutex); 882 mutex_unlock(&inode->inotify_mutex);
743 put_inotify_watch(watch); 883 unpin_and_kill(watch, how);
744 884
745 return 0; 885 return 0;
746} 886}
diff --git a/fs/ioctl.c b/fs/ioctl.c
index d152856c371b..43e8b2c0664b 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -400,11 +400,9 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
400 400
401 /* Did FASYNC state change ? */ 401 /* Did FASYNC state change ? */
402 if ((flag ^ filp->f_flags) & FASYNC) { 402 if ((flag ^ filp->f_flags) & FASYNC) {
403 if (filp->f_op && filp->f_op->fasync) { 403 if (filp->f_op && filp->f_op->fasync)
404 lock_kernel();
405 error = filp->f_op->fasync(fd, filp, on); 404 error = filp->f_op->fasync(fd, filp, on);
406 unlock_kernel(); 405 else
407 } else
408 error = -ENOTTY; 406 error = -ENOTTY;
409 } 407 }
410 if (error) 408 if (error)
@@ -440,11 +438,17 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
440 break; 438 break;
441 439
442 case FIONBIO: 440 case FIONBIO:
441 /* BKL needed to avoid races tweaking f_flags */
442 lock_kernel();
443 error = ioctl_fionbio(filp, argp); 443 error = ioctl_fionbio(filp, argp);
444 unlock_kernel();
444 break; 445 break;
445 446
446 case FIOASYNC: 447 case FIOASYNC:
448 /* BKL needed to avoid races tweaking f_flags */
449 lock_kernel();
447 error = ioctl_fioasync(fd, filp, argp); 450 error = ioctl_fioasync(fd, filp, argp);
451 unlock_kernel();
448 break; 452 break;
449 453
450 case FIOQSIZE: 454 case FIOQSIZE:
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 1bd8d4acc6f2..61f32f3868cd 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -115,7 +115,7 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
115 */ 115 */
116void __log_wait_for_space(journal_t *journal) 116void __log_wait_for_space(journal_t *journal)
117{ 117{
118 int nblocks; 118 int nblocks, space_left;
119 assert_spin_locked(&journal->j_state_lock); 119 assert_spin_locked(&journal->j_state_lock);
120 120
121 nblocks = jbd_space_needed(journal); 121 nblocks = jbd_space_needed(journal);
@@ -128,25 +128,42 @@ void __log_wait_for_space(journal_t *journal)
128 /* 128 /*
129 * Test again, another process may have checkpointed while we 129 * Test again, another process may have checkpointed while we
130 * were waiting for the checkpoint lock. If there are no 130 * were waiting for the checkpoint lock. If there are no
131 * outstanding transactions there is nothing to checkpoint and 131 * transactions ready to be checkpointed, try to recover
132 * we can't make progress. Abort the journal in this case. 132 * journal space by calling cleanup_journal_tail(), and if
133 * that doesn't work, by waiting for the currently committing
134 * transaction to complete. If there is absolutely no way
135 * to make progress, this is either a BUG or corrupted
136 * filesystem, so abort the journal and leave a stack
137 * trace for forensic evidence.
133 */ 138 */
134 spin_lock(&journal->j_state_lock); 139 spin_lock(&journal->j_state_lock);
135 spin_lock(&journal->j_list_lock); 140 spin_lock(&journal->j_list_lock);
136 nblocks = jbd_space_needed(journal); 141 nblocks = jbd_space_needed(journal);
137 if (__log_space_left(journal) < nblocks) { 142 space_left = __log_space_left(journal);
143 if (space_left < nblocks) {
138 int chkpt = journal->j_checkpoint_transactions != NULL; 144 int chkpt = journal->j_checkpoint_transactions != NULL;
145 tid_t tid = 0;
139 146
147 if (journal->j_committing_transaction)
148 tid = journal->j_committing_transaction->t_tid;
140 spin_unlock(&journal->j_list_lock); 149 spin_unlock(&journal->j_list_lock);
141 spin_unlock(&journal->j_state_lock); 150 spin_unlock(&journal->j_state_lock);
142 if (chkpt) { 151 if (chkpt) {
143 log_do_checkpoint(journal); 152 log_do_checkpoint(journal);
153 } else if (cleanup_journal_tail(journal) == 0) {
154 /* We were able to recover space; yay! */
155 ;
156 } else if (tid) {
157 log_wait_commit(journal, tid);
144 } else { 158 } else {
145 printk(KERN_ERR "%s: no transactions\n", 159 printk(KERN_ERR "%s: needed %d blocks and "
146 __func__); 160 "only had %d space available\n",
161 __func__, nblocks, space_left);
162 printk(KERN_ERR "%s: no way to get more "
163 "journal space\n", __func__);
164 WARN_ON(1);
147 journal_abort(journal, 0); 165 journal_abort(journal, 0);
148 } 166 }
149
150 spin_lock(&journal->j_state_lock); 167 spin_lock(&journal->j_state_lock);
151 } else { 168 } else {
152 spin_unlock(&journal->j_list_lock); 169 spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 9203c3332f17..9497718fe920 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -116,7 +116,7 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
116 */ 116 */
117void __jbd2_log_wait_for_space(journal_t *journal) 117void __jbd2_log_wait_for_space(journal_t *journal)
118{ 118{
119 int nblocks; 119 int nblocks, space_left;
120 assert_spin_locked(&journal->j_state_lock); 120 assert_spin_locked(&journal->j_state_lock);
121 121
122 nblocks = jbd_space_needed(journal); 122 nblocks = jbd_space_needed(journal);
@@ -129,25 +129,43 @@ void __jbd2_log_wait_for_space(journal_t *journal)
129 /* 129 /*
130 * Test again, another process may have checkpointed while we 130 * Test again, another process may have checkpointed while we
131 * were waiting for the checkpoint lock. If there are no 131 * were waiting for the checkpoint lock. If there are no
132 * outstanding transactions there is nothing to checkpoint and 132 * transactions ready to be checkpointed, try to recover
133 * we can't make progress. Abort the journal in this case. 133 * journal space by calling cleanup_journal_tail(), and if
134 * that doesn't work, by waiting for the currently committing
135 * transaction to complete. If there is absolutely no way
136 * to make progress, this is either a BUG or corrupted
137 * filesystem, so abort the journal and leave a stack
138 * trace for forensic evidence.
134 */ 139 */
135 spin_lock(&journal->j_state_lock); 140 spin_lock(&journal->j_state_lock);
136 spin_lock(&journal->j_list_lock); 141 spin_lock(&journal->j_list_lock);
137 nblocks = jbd_space_needed(journal); 142 nblocks = jbd_space_needed(journal);
138 if (__jbd2_log_space_left(journal) < nblocks) { 143 space_left = __jbd2_log_space_left(journal);
144 if (space_left < nblocks) {
139 int chkpt = journal->j_checkpoint_transactions != NULL; 145 int chkpt = journal->j_checkpoint_transactions != NULL;
146 tid_t tid = 0;
140 147
148 if (journal->j_committing_transaction)
149 tid = journal->j_committing_transaction->t_tid;
141 spin_unlock(&journal->j_list_lock); 150 spin_unlock(&journal->j_list_lock);
142 spin_unlock(&journal->j_state_lock); 151 spin_unlock(&journal->j_state_lock);
143 if (chkpt) { 152 if (chkpt) {
144 jbd2_log_do_checkpoint(journal); 153 jbd2_log_do_checkpoint(journal);
154 } else if (jbd2_cleanup_journal_tail(journal) == 0) {
155 /* We were able to recover space; yay! */
156 ;
157 } else if (tid) {
158 jbd2_log_wait_commit(journal, tid);
145 } else { 159 } else {
146 printk(KERN_ERR "%s: no transactions\n", 160 printk(KERN_ERR "%s: needed %d blocks and "
147 __func__); 161 "only had %d space available\n",
162 __func__, nblocks, space_left);
163 printk(KERN_ERR "%s: no way to get more "
164 "journal space in %s\n", __func__,
165 journal->j_devname);
166 WARN_ON(1);
148 jbd2_journal_abort(journal, 0); 167 jbd2_journal_abort(journal, 0);
149 } 168 }
150
151 spin_lock(&journal->j_state_lock); 169 spin_lock(&journal->j_state_lock);
152 } else { 170 } else {
153 spin_unlock(&journal->j_list_lock); 171 spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 783de118de92..e70d657a19f8 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1089,6 +1089,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1089 if (!journal->j_wbuf) { 1089 if (!journal->j_wbuf) {
1090 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", 1090 printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
1091 __func__); 1091 __func__);
1092 jbd2_stats_proc_exit(journal);
1092 kfree(journal); 1093 kfree(journal);
1093 return NULL; 1094 return NULL;
1094 } 1095 }
@@ -1098,6 +1099,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
1098 if (err) { 1099 if (err) {
1099 printk(KERN_ERR "%s: Cannnot locate journal superblock\n", 1100 printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
1100 __func__); 1101 __func__);
1102 jbd2_stats_proc_exit(journal);
1101 kfree(journal); 1103 kfree(journal);
1102 return NULL; 1104 return NULL;
1103 } 1105 }
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 8adebd3e43c6..3cceef4ad2b7 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -85,15 +85,15 @@ static int jffs2_garbage_collect_thread(void *_c)
85 for (;;) { 85 for (;;) {
86 allow_signal(SIGHUP); 86 allow_signal(SIGHUP);
87 again: 87 again:
88 spin_lock(&c->erase_completion_lock);
88 if (!jffs2_thread_should_wake(c)) { 89 if (!jffs2_thread_should_wake(c)) {
89 set_current_state (TASK_INTERRUPTIBLE); 90 set_current_state (TASK_INTERRUPTIBLE);
91 spin_unlock(&c->erase_completion_lock);
90 D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread sleeping...\n")); 92 D1(printk(KERN_DEBUG "jffs2_garbage_collect_thread sleeping...\n"));
91 /* Yes, there's a race here; we checked jffs2_thread_should_wake()
92 before setting current->state to TASK_INTERRUPTIBLE. But it doesn't
93 matter - We don't care if we miss a wakeup, because the GC thread
94 is only an optimisation anyway. */
95 schedule(); 93 schedule();
96 } 94 } else
95 spin_unlock(&c->erase_completion_lock);
96
97 97
98 /* This thread is purely an optimisation. But if it runs when 98 /* This thread is purely an optimisation. But if it runs when
99 other things could be running, it actually makes things a 99 other things could be running, it actually makes things a
diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c
index 47b045797e42..90cb60d09787 100644
--- a/fs/jffs2/compr_lzo.c
+++ b/fs/jffs2/compr_lzo.c
@@ -19,7 +19,7 @@
19 19
20static void *lzo_mem; 20static void *lzo_mem;
21static void *lzo_compress_buf; 21static void *lzo_compress_buf;
22static DEFINE_MUTEX(deflate_mutex); 22static DEFINE_MUTEX(deflate_mutex); /* for lzo_mem and lzo_compress_buf */
23 23
24static void free_workspace(void) 24static void free_workspace(void)
25{ 25{
@@ -49,18 +49,21 @@ static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out,
49 49
50 mutex_lock(&deflate_mutex); 50 mutex_lock(&deflate_mutex);
51 ret = lzo1x_1_compress(data_in, *sourcelen, lzo_compress_buf, &compress_size, lzo_mem); 51 ret = lzo1x_1_compress(data_in, *sourcelen, lzo_compress_buf, &compress_size, lzo_mem);
52 mutex_unlock(&deflate_mutex);
53
54 if (ret != LZO_E_OK) 52 if (ret != LZO_E_OK)
55 return -1; 53 goto fail;
56 54
57 if (compress_size > *dstlen) 55 if (compress_size > *dstlen)
58 return -1; 56 goto fail;
59 57
60 memcpy(cpage_out, lzo_compress_buf, compress_size); 58 memcpy(cpage_out, lzo_compress_buf, compress_size);
61 *dstlen = compress_size; 59 mutex_unlock(&deflate_mutex);
62 60
61 *dstlen = compress_size;
63 return 0; 62 return 0;
63
64 fail:
65 mutex_unlock(&deflate_mutex);
66 return -1;
64} 67}
65 68
66static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out, 69static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out,
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 0875b60b4bf7..21a052915aa9 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -261,9 +261,11 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c)
261 261
262 jffs2_sum_reset_collected(c->summary); /* reset collected summary */ 262 jffs2_sum_reset_collected(c->summary); /* reset collected summary */
263 263
264#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
264 /* adjust write buffer offset, else we get a non contiguous write bug */ 265 /* adjust write buffer offset, else we get a non contiguous write bug */
265 if (!(c->wbuf_ofs % c->sector_size) && !c->wbuf_len) 266 if (!(c->wbuf_ofs % c->sector_size) && !c->wbuf_len)
266 c->wbuf_ofs = 0xffffffff; 267 c->wbuf_ofs = 0xffffffff;
268#endif
267 269
268 D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset)); 270 D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset));
269 271
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 9fd8889097b7..70fc63a1727b 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -167,7 +167,8 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
167 continue; 167 continue;
168 if (host->h_server != ni->server) 168 if (host->h_server != ni->server)
169 continue; 169 continue;
170 if (!nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap)) 170 if (ni->server &&
171 !nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap))
171 continue; 172 continue;
172 173
173 /* Move to head of hash chain. */ 174 /* Move to head of hash chain. */
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index c631a83931ce..56b076736b56 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -181,6 +181,7 @@ lockd(void *vrqstp)
181 } 181 }
182 flush_signals(current); 182 flush_signals(current);
183 cancel_delayed_work_sync(&grace_period_end); 183 cancel_delayed_work_sync(&grace_period_end);
184 locks_end_grace(&lockd_manager);
184 if (nlmsvc_ops) 185 if (nlmsvc_ops)
185 nlmsvc_invalidate_all(); 186 nlmsvc_invalidate_all();
186 nlm_shutdown_hosts(); 187 nlm_shutdown_hosts();
diff --git a/fs/msdos/Makefile b/fs/msdos/Makefile
deleted file mode 100644
index ea67646fcb95..000000000000
--- a/fs/msdos/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
1#
2# Makefile for the Linux msdos filesystem routines.
3#
4
5obj-$(CONFIG_MSDOS_FS) += msdos.o
6
7msdos-y := namei.o
diff --git a/fs/namei.c b/fs/namei.c
index 09ce58e49e72..d34e0f9681c6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1378,7 +1378,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
1378 if (IS_APPEND(dir)) 1378 if (IS_APPEND(dir))
1379 return -EPERM; 1379 return -EPERM;
1380 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| 1380 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
1381 IS_IMMUTABLE(victim->d_inode)) 1381 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
1382 return -EPERM; 1382 return -EPERM;
1383 if (isdir) { 1383 if (isdir) {
1384 if (!S_ISDIR(victim->d_inode->i_mode)) 1384 if (!S_ISDIR(victim->d_inode->i_mode))
diff --git a/fs/namespace.c b/fs/namespace.c
index cce46702d33c..65b3dc844c87 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1815,8 +1815,8 @@ static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts)
1815 while (!list_empty(&graveyard)) { 1815 while (!list_empty(&graveyard)) {
1816 m = list_first_entry(&graveyard, struct vfsmount, 1816 m = list_first_entry(&graveyard, struct vfsmount,
1817 mnt_expire); 1817 mnt_expire);
1818 touch_mnt_namespace(mnt->mnt_ns); 1818 touch_mnt_namespace(m->mnt_ns);
1819 umount_tree(mnt, 1, umounts); 1819 umount_tree(m, 1, umounts);
1820 } 1820 }
1821 } 1821 }
1822} 1822}
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index bb93946ace22..b79ec930d9f1 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -225,12 +225,12 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
225 return 0; 225 return 0;
226 226
227 nfs4_save_user(&uid, &gid); 227 nfs4_save_user(&uid, &gid);
228 INIT_LIST_HEAD(dentries);
228 229
229 filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY); 230 filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY);
230 status = PTR_ERR(filp); 231 status = PTR_ERR(filp);
231 if (IS_ERR(filp)) 232 if (IS_ERR(filp))
232 goto out; 233 goto out;
233 INIT_LIST_HEAD(dentries);
234 status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla); 234 status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla);
235 fput(filp); 235 fput(filp);
236 while (!list_empty(dentries)) { 236 while (!list_empty(dentries)) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b0bebc552a11..1a052ac2bde9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3261,6 +3261,7 @@ nfs4_state_shutdown(void)
3261{ 3261{
3262 cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work); 3262 cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work);
3263 destroy_workqueue(laundry_wq); 3263 destroy_workqueue(laundry_wq);
3264 locks_end_grace(&nfsd4_manager);
3264 nfs4_lock_state(); 3265 nfs4_lock_state();
3265 nfs4_release_reclaim(); 3266 nfs4_release_reclaim();
3266 __nfs4_state_shutdown(); 3267 __nfs4_state_shutdown();
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 848a03e83a42..4433c8f00163 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1875,11 +1875,11 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func,
1875 return -ENOMEM; 1875 return -ENOMEM;
1876 1876
1877 offset = *offsetp; 1877 offset = *offsetp;
1878 cdp->err = nfserr_eof; /* will be cleared on successful read */
1879 1878
1880 while (1) { 1879 while (1) {
1881 unsigned int reclen; 1880 unsigned int reclen;
1882 1881
1882 cdp->err = nfserr_eof; /* will be cleared on successful read */
1883 buf.used = 0; 1883 buf.used = 0;
1884 buf.full = 0; 1884 buf.full = 0;
1885 1885
@@ -1912,9 +1912,6 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func,
1912 de = (struct buffered_dirent *)((char *)de + reclen); 1912 de = (struct buffered_dirent *)((char *)de + reclen);
1913 } 1913 }
1914 offset = vfs_llseek(file, 0, SEEK_CUR); 1914 offset = vfs_llseek(file, 0, SEEK_CUR);
1915 cdp->err = nfserr_eof;
1916 if (!buf.full)
1917 break;
1918 } 1915 }
1919 1916
1920 done: 1917 done:
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h
index 5e6724c1afd1..2142b1c68b61 100644
--- a/fs/ntfs/debug.h
+++ b/fs/ntfs/debug.h
@@ -30,7 +30,8 @@
30 30
31extern int debug_msgs; 31extern int debug_msgs;
32 32
33#if 0 /* Fool kernel-doc since it doesn't do macros yet */ 33extern void __ntfs_debug(const char *file, int line, const char *function,
34 const char *format, ...) __attribute__ ((format (printf, 4, 5)));
34/** 35/**
35 * ntfs_debug - write a debug level message to syslog 36 * ntfs_debug - write a debug level message to syslog
36 * @f: a printf format string containing the message 37 * @f: a printf format string containing the message
@@ -39,11 +40,6 @@ extern int debug_msgs;
39 * ntfs_debug() writes a DEBUG level message to the syslog but only if the 40 * ntfs_debug() writes a DEBUG level message to the syslog but only if the
40 * driver was compiled with -DDEBUG. Otherwise, the call turns into a NOP. 41 * driver was compiled with -DDEBUG. Otherwise, the call turns into a NOP.
41 */ 42 */
42static void ntfs_debug(const char *f, ...);
43#endif
44
45extern void __ntfs_debug (const char *file, int line, const char *function,
46 const char *format, ...) __attribute__ ((format (printf, 4, 5)));
47#define ntfs_debug(f, a...) \ 43#define ntfs_debug(f, a...) \
48 __ntfs_debug(__FILE__, __LINE__, __func__, f, ##a) 44 __ntfs_debug(__FILE__, __LINE__, __func__, f, ##a)
49 45
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 7e947c672469..3a178ec48d7c 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -112,7 +112,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
112 bh = bhs[i]; 112 bh = bhs[i];
113 113
114 if (buffer_jbd(bh)) { 114 if (buffer_jbd(bh)) {
115 mlog(ML_ERROR, 115 mlog(ML_BH_IO,
116 "trying to sync read a jbd " 116 "trying to sync read a jbd "
117 "managed bh (blocknr = %llu), skipping\n", 117 "managed bh (blocknr = %llu), skipping\n",
118 (unsigned long long)bh->b_blocknr); 118 (unsigned long long)bh->b_blocknr);
@@ -147,15 +147,10 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
147 for (i = nr; i > 0; i--) { 147 for (i = nr; i > 0; i--) {
148 bh = bhs[i - 1]; 148 bh = bhs[i - 1];
149 149
150 if (buffer_jbd(bh)) { 150 /* No need to wait on the buffer if it's managed by JBD. */
151 mlog(ML_ERROR, 151 if (!buffer_jbd(bh))
152 "the journal got the buffer while it was " 152 wait_on_buffer(bh);
153 "locked for io! (blocknr = %llu)\n",
154 (unsigned long long)bh->b_blocknr);
155 BUG();
156 }
157 153
158 wait_on_buffer(bh);
159 if (!buffer_uptodate(bh)) { 154 if (!buffer_uptodate(bh)) {
160 /* Status won't be cleared from here on out, 155 /* Status won't be cleared from here on out,
161 * so we can safely record this and loop back 156 * so we can safely record this and loop back
@@ -251,8 +246,6 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
251 ignore_cache = 1; 246 ignore_cache = 1;
252 } 247 }
253 248
254 /* XXX: Can we ever get this and *not* have the cached
255 * flag set? */
256 if (buffer_jbd(bh)) { 249 if (buffer_jbd(bh)) {
257 if (ignore_cache) 250 if (ignore_cache)
258 mlog(ML_BH_IO, "trying to sync read a jbd " 251 mlog(ML_BH_IO, "trying to sync read a jbd "
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 533a789c3ef8..ba962d71b34d 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -608,8 +608,10 @@ static int __init init_dlmfs_fs(void)
608 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 608 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
609 SLAB_MEM_SPREAD), 609 SLAB_MEM_SPREAD),
610 dlmfs_init_once); 610 dlmfs_init_once);
611 if (!dlmfs_inode_cache) 611 if (!dlmfs_inode_cache) {
612 status = -ENOMEM;
612 goto bail; 613 goto bail;
614 }
613 cleanup_inode = 1; 615 cleanup_inode = 1;
614 616
615 user_dlm_worker = create_singlethread_workqueue("user_dlm"); 617 user_dlm_worker = create_singlethread_workqueue("user_dlm");
diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h
index 39ec27738499..0c3cc03c61fa 100644
--- a/fs/ocfs2/dlm/userdlm.h
+++ b/fs/ocfs2/dlm/userdlm.h
@@ -33,7 +33,7 @@
33#include <linux/workqueue.h> 33#include <linux/workqueue.h>
34 34
35/* user_lock_res->l_flags flags. */ 35/* user_lock_res->l_flags flags. */
36#define USER_LOCK_ATTACHED (0x00000001) /* have we initialized 36#define USER_LOCK_ATTACHED (0x00000001) /* we have initialized
37 * the lvb */ 37 * the lvb */
38#define USER_LOCK_BUSY (0x00000002) /* we are currently in 38#define USER_LOCK_BUSY (0x00000002) /* we are currently in
39 * dlm_lock */ 39 * dlm_lock */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index ec684426034b..6e6cc0a2e5f7 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2841,9 +2841,8 @@ static void ocfs2_unlock_ast(void *opaque, int error)
2841 2841
2842 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); 2842 lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
2843 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; 2843 lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
2844 spin_unlock_irqrestore(&lockres->l_lock, flags);
2845
2846 wake_up(&lockres->l_event); 2844 wake_up(&lockres->l_event);
2845 spin_unlock_irqrestore(&lockres->l_lock, flags);
2847 2846
2848 mlog_exit_void(); 2847 mlog_exit_void();
2849} 2848}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 7efe937a415f..e2570a3bc2b2 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -247,8 +247,8 @@ int ocfs2_update_inode_atime(struct inode *inode,
247 mlog_entry_void(); 247 mlog_entry_void();
248 248
249 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 249 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
250 if (handle == NULL) { 250 if (IS_ERR(handle)) {
251 ret = -ENOMEM; 251 ret = PTR_ERR(handle);
252 mlog_errno(ret); 252 mlog_errno(ret);
253 goto out; 253 goto out;
254 } 254 }
@@ -312,8 +312,8 @@ static int ocfs2_simple_size_update(struct inode *inode,
312 handle_t *handle = NULL; 312 handle_t *handle = NULL;
313 313
314 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 314 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
315 if (handle == NULL) { 315 if (IS_ERR(handle)) {
316 ret = -ENOMEM; 316 ret = PTR_ERR(handle);
317 mlog_errno(ret); 317 mlog_errno(ret);
318 goto out; 318 goto out;
319 } 319 }
@@ -1055,8 +1055,8 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
1055 (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode); 1055 (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode);
1056 1056
1057 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 1057 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1058 if (handle == NULL) { 1058 if (IS_ERR(handle)) {
1059 ret = -ENOMEM; 1059 ret = PTR_ERR(handle);
1060 mlog_errno(ret); 1060 mlog_errno(ret);
1061 goto out; 1061 goto out;
1062 } 1062 }
@@ -1259,8 +1259,8 @@ static int __ocfs2_remove_inode_range(struct inode *inode,
1259 } 1259 }
1260 1260
1261 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); 1261 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
1262 if (handle == NULL) { 1262 if (IS_ERR(handle)) {
1263 ret = -ENOMEM; 1263 ret = PTR_ERR(handle);
1264 mlog_errno(ret); 1264 mlog_errno(ret);
1265 goto out; 1265 goto out;
1266 } 1266 }
@@ -1352,8 +1352,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
1352 goto out; 1352 goto out;
1353 1353
1354 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 1354 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1355 if (handle == NULL) { 1355 if (IS_ERR(handle)) {
1356 ret = -ENOMEM; 1356 ret = PTR_ERR(handle);
1357 mlog_errno(ret); 1357 mlog_errno(ret);
1358 goto out; 1358 goto out;
1359 } 1359 }
@@ -1866,6 +1866,13 @@ relock:
1866 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, 1866 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
1867 ppos, count, ocount); 1867 ppos, count, ocount);
1868 if (written < 0) { 1868 if (written < 0) {
1869 /*
1870 * direct write may have instantiated a few
1871 * blocks outside i_size. Trim these off again.
1872 * Don't need i_size_read because we hold i_mutex.
1873 */
1874 if (*ppos + count > inode->i_size)
1875 vmtruncate(inode, inode->i_size);
1869 ret = written; 1876 ret = written;
1870 goto out_dio; 1877 goto out_dio;
1871 } 1878 }
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 4903688f72a9..7aa00d511874 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1106,6 +1106,12 @@ void ocfs2_clear_inode(struct inode *inode)
1106 oi->ip_last_trans = 0; 1106 oi->ip_last_trans = 0;
1107 oi->ip_dir_start_lookup = 0; 1107 oi->ip_dir_start_lookup = 0;
1108 oi->ip_blkno = 0ULL; 1108 oi->ip_blkno = 0ULL;
1109
1110 /*
1111 * ip_jinode is used to track txns against this inode. We ensure that
1112 * the journal is flushed before journal shutdown. Thus it is safe to
1113 * have inodes get cleaned up after journal shutdown.
1114 */
1109 jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, 1115 jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal,
1110 &oi->ip_jinode); 1116 &oi->ip_jinode);
1111 1117
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 81e40677eecb..99fe9d584f3c 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -690,6 +690,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
690 690
691 /* Shutdown the kernel journal system */ 691 /* Shutdown the kernel journal system */
692 jbd2_journal_destroy(journal->j_journal); 692 jbd2_journal_destroy(journal->j_journal);
693 journal->j_journal = NULL;
693 694
694 OCFS2_I(inode)->ip_open_count--; 695 OCFS2_I(inode)->ip_open_count--;
695 696
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 3dc18d67557c..eea1d24713ea 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -113,7 +113,11 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
113 * ocfs2_write_begin_nolock(). 113 * ocfs2_write_begin_nolock().
114 */ 114 */
115 if (!PageUptodate(page) || page->mapping != inode->i_mapping) { 115 if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
116 ret = -EINVAL; 116 /*
117 * the page has been umapped in ocfs2_data_downconvert_worker.
118 * So return 0 here and let VFS retry.
119 */
120 ret = 0;
117 goto out; 121 goto out;
118 } 122 }
119 123
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 485a6aa0ad39..f4967e634ffd 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -378,8 +378,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
378 } 378 }
379 379
380 inode = new_inode(dir->i_sb); 380 inode = new_inode(dir->i_sb);
381 if (IS_ERR(inode)) { 381 if (!inode) {
382 status = PTR_ERR(inode); 382 status = -ENOMEM;
383 mlog(ML_ERROR, "new_inode failed!\n"); 383 mlog(ML_ERROR, "new_inode failed!\n");
384 goto leave; 384 goto leave;
385 } 385 }
@@ -491,8 +491,10 @@ leave:
491 brelse(*new_fe_bh); 491 brelse(*new_fe_bh);
492 *new_fe_bh = NULL; 492 *new_fe_bh = NULL;
493 } 493 }
494 if (inode) 494 if (inode) {
495 clear_nlink(inode);
495 iput(inode); 496 iput(inode);
497 }
496 } 498 }
497 499
498 mlog_exit(status); 500 mlog_exit(status);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index a21a465490c4..3fed9e3d8992 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -85,7 +85,7 @@ enum ocfs2_unlock_action {
85}; 85};
86 86
87/* ocfs2_lock_res->l_flags flags. */ 87/* ocfs2_lock_res->l_flags flags. */
88#define OCFS2_LOCK_ATTACHED (0x00000001) /* have we initialized 88#define OCFS2_LOCK_ATTACHED (0x00000001) /* we have initialized
89 * the lvb */ 89 * the lvb */
90#define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in 90#define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in
91 * dlm_lock */ 91 * dlm_lock */
@@ -473,6 +473,9 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
473 (____gd)->bg_signature); \ 473 (____gd)->bg_signature); \
474} while (0) 474} while (0)
475 475
476#define OCFS2_IS_VALID_XATTR_BLOCK(ptr) \
477 (!strcmp((ptr)->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE))
478
476static inline unsigned long ino_from_blkno(struct super_block *sb, 479static inline unsigned long ino_from_blkno(struct super_block *sb,
477 u64 blkno) 480 u64 blkno)
478{ 481{
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index f24ce3d3f956..5e0c0d0aef7d 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -86,7 +86,8 @@
86#define OCFS2_CLEAR_INCOMPAT_FEATURE(sb,mask) \ 86#define OCFS2_CLEAR_INCOMPAT_FEATURE(sb,mask) \
87 OCFS2_SB(sb)->s_feature_incompat &= ~(mask) 87 OCFS2_SB(sb)->s_feature_incompat &= ~(mask)
88 88
89#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB 89#define OCFS2_FEATURE_COMPAT_SUPP (OCFS2_FEATURE_COMPAT_BACKUP_SB \
90 | OCFS2_FEATURE_COMPAT_JBD2_SB)
90#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ 91#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \
91 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ 92 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \
92 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ 93 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \
@@ -153,6 +154,11 @@
153#define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001 154#define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001
154 155
155/* 156/*
157 * The filesystem will correctly handle journal feature bits.
158 */
159#define OCFS2_FEATURE_COMPAT_JBD2_SB 0x0002
160
161/*
156 * Unwritten extents support. 162 * Unwritten extents support.
157 */ 163 */
158#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001 164#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001
@@ -742,12 +748,12 @@ struct ocfs2_group_desc
742 */ 748 */
743struct ocfs2_xattr_entry { 749struct ocfs2_xattr_entry {
744 __le32 xe_name_hash; /* hash value of xattr prefix+suffix. */ 750 __le32 xe_name_hash; /* hash value of xattr prefix+suffix. */
745 __le16 xe_name_offset; /* byte offset from the 1st etnry in the local 751 __le16 xe_name_offset; /* byte offset from the 1st entry in the
746 local xattr storage(inode, xattr block or 752 local xattr storage(inode, xattr block or
747 xattr bucket). */ 753 xattr bucket). */
748 __u8 xe_name_len; /* xattr name len, does't include prefix. */ 754 __u8 xe_name_len; /* xattr name len, does't include prefix. */
749 __u8 xe_type; /* the low 7 bits indicates the name prefix's 755 __u8 xe_type; /* the low 7 bits indicate the name prefix
750 * type and the highest 1 bits indicate whether 756 * type and the highest bit indicates whether
751 * the EA is stored in the local storage. */ 757 * the EA is stored in the local storage. */
752 __le64 xe_value_size; /* real xattr value length. */ 758 __le64 xe_value_size; /* real xattr value length. */
753}; 759};
@@ -766,9 +772,10 @@ struct ocfs2_xattr_header {
766 xattr. */ 772 xattr. */
767 __le16 xh_name_value_len; /* total length of name/value 773 __le16 xh_name_value_len; /* total length of name/value
768 length in this bucket. */ 774 length in this bucket. */
769 __le16 xh_num_buckets; /* bucket nums in one extent 775 __le16 xh_num_buckets; /* Number of xattr buckets
770 record, only valid in the 776 in this extent record,
771 first bucket. */ 777 only valid in the first
778 bucket. */
772 __le64 xh_csum; 779 __le64 xh_csum;
773 struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */ 780 struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */
774}; 781};
@@ -776,8 +783,8 @@ struct ocfs2_xattr_header {
776/* 783/*
777 * On disk structure for xattr value root. 784 * On disk structure for xattr value root.
778 * 785 *
779 * It is used when one extended attribute's size is larger, and we will save it 786 * When an xattr's value is large enough, it is stored in an external
780 * in an outside cluster. It will stored in a b-tree like file content. 787 * b-tree like file data. The xattr value root points to this structure.
781 */ 788 */
782struct ocfs2_xattr_value_root { 789struct ocfs2_xattr_value_root {
783/*00*/ __le32 xr_clusters; /* clusters covered by xattr value. */ 790/*00*/ __le32 xr_clusters; /* clusters covered by xattr value. */
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index faec2d879357..9b76d41a8ac6 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -740,6 +740,9 @@ static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb)
740 740
741static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb) 741static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb)
742{ 742{
743 if (!lksb->lksb_fsdlm.sb_lvbptr)
744 lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb +
745 sizeof(struct dlm_lksb);
743 return (void *)(lksb->lksb_fsdlm.sb_lvbptr); 746 return (void *)(lksb->lksb_fsdlm.sb_lvbptr);
744} 747}
745 748
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 802c41492214..74d7367ade13 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3,25 +3,20 @@
3 * 3 *
4 * xattr.c 4 * xattr.c
5 * 5 *
6 * Copyright (C) 2008 Oracle. All rights reserved. 6 * Copyright (C) 2004, 2008 Oracle. All rights reserved.
7 * 7 *
8 * CREDITS: 8 * CREDITS:
9 * Lots of code in this file is taken from ext3. 9 * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
10 * 11 *
11 * This program is free software; you can redistribute it and/or 12 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public 13 * modify it under the terms of the GNU General Public
13 * License as published by the Free Software Foundation; either 14 * License version 2 as published by the Free Software Foundation.
14 * version 2 of the License, or (at your option) any later version.
15 * 15 *
16 * This program is distributed in the hope that it will be useful, 16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details. 19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public
22 * License along with this program; if not, write to the
23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 * Boston, MA 021110-1307, USA.
25 */ 20 */
26 21
27#include <linux/capability.h> 22#include <linux/capability.h>
@@ -83,7 +78,7 @@ struct xattr_handler *ocfs2_xattr_handlers[] = {
83 NULL 78 NULL
84}; 79};
85 80
86static struct xattr_handler *ocfs2_xattr_handler_map[] = { 81static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
87 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, 82 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
88 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, 83 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler,
89}; 84};
@@ -116,6 +111,10 @@ static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
116 int *block_off, 111 int *block_off,
117 int *new_offset); 112 int *new_offset);
118 113
114static int ocfs2_xattr_block_find(struct inode *inode,
115 int name_index,
116 const char *name,
117 struct ocfs2_xattr_search *xs);
119static int ocfs2_xattr_index_block_find(struct inode *inode, 118static int ocfs2_xattr_index_block_find(struct inode *inode,
120 struct buffer_head *root_bh, 119 struct buffer_head *root_bh,
121 int name_index, 120 int name_index,
@@ -137,6 +136,24 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
137static int ocfs2_delete_xattr_index_block(struct inode *inode, 136static int ocfs2_delete_xattr_index_block(struct inode *inode,
138 struct buffer_head *xb_bh); 137 struct buffer_head *xb_bh);
139 138
139static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
140{
141 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
142}
143
144static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
145{
146 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
147}
148
149static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
150{
151 u16 len = sb->s_blocksize -
152 offsetof(struct ocfs2_xattr_header, xh_entries);
153
154 return len / sizeof(struct ocfs2_xattr_entry);
155}
156
140static inline const char *ocfs2_xattr_prefix(int name_index) 157static inline const char *ocfs2_xattr_prefix(int name_index)
141{ 158{
142 struct xattr_handler *handler = NULL; 159 struct xattr_handler *handler = NULL;
@@ -542,14 +559,12 @@ static int ocfs2_xattr_block_list(struct inode *inode,
542 mlog_errno(ret); 559 mlog_errno(ret);
543 return ret; 560 return ret;
544 } 561 }
545 /*Verify the signature of xattr block*/
546 if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
547 strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
548 ret = -EFAULT;
549 goto cleanup;
550 }
551 562
552 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 563 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
564 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
565 ret = -EIO;
566 goto cleanup;
567 }
553 568
554 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 569 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
555 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 570 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
@@ -749,47 +764,25 @@ static int ocfs2_xattr_block_get(struct inode *inode,
749 size_t buffer_size, 764 size_t buffer_size,
750 struct ocfs2_xattr_search *xs) 765 struct ocfs2_xattr_search *xs)
751{ 766{
752 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
753 struct buffer_head *blk_bh = NULL;
754 struct ocfs2_xattr_block *xb; 767 struct ocfs2_xattr_block *xb;
755 struct ocfs2_xattr_value_root *xv; 768 struct ocfs2_xattr_value_root *xv;
756 size_t size; 769 size_t size;
757 int ret = -ENODATA, name_offset, name_len, block_off, i; 770 int ret = -ENODATA, name_offset, name_len, block_off, i;
758 771
759 if (!di->i_xattr_loc)
760 return ret;
761
762 memset(&xs->bucket, 0, sizeof(xs->bucket)); 772 memset(&xs->bucket, 0, sizeof(xs->bucket));
763 773
764 ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); 774 ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
765 if (ret < 0) { 775 if (ret) {
766 mlog_errno(ret); 776 mlog_errno(ret);
767 return ret;
768 }
769 /*Verify the signature of xattr block*/
770 if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
771 strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
772 ret = -EFAULT;
773 goto cleanup; 777 goto cleanup;
774 } 778 }
775 779
776 xs->xattr_bh = blk_bh; 780 if (xs->not_found) {
777 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 781 ret = -ENODATA;
778
779 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
780 xs->header = &xb->xb_attrs.xb_header;
781 xs->base = (void *)xs->header;
782 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
783 xs->here = xs->header->xh_entries;
784
785 ret = ocfs2_xattr_find_entry(name_index, name, xs);
786 } else
787 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
788 name_index,
789 name, xs);
790
791 if (ret)
792 goto cleanup; 782 goto cleanup;
783 }
784
785 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
793 size = le64_to_cpu(xs->here->xe_value_size); 786 size = le64_to_cpu(xs->here->xe_value_size);
794 if (buffer) { 787 if (buffer) {
795 ret = -ERANGE; 788 ret = -ERANGE;
@@ -828,7 +821,8 @@ cleanup:
828 brelse(xs->bucket.bhs[i]); 821 brelse(xs->bucket.bhs[i]);
829 memset(&xs->bucket, 0, sizeof(xs->bucket)); 822 memset(&xs->bucket, 0, sizeof(xs->bucket));
830 823
831 brelse(blk_bh); 824 brelse(xs->xattr_bh);
825 xs->xattr_bh = NULL;
832 return ret; 826 return ret;
833} 827}
834 828
@@ -837,11 +831,11 @@ cleanup:
837 * Copy an extended attribute into the buffer provided. 831 * Copy an extended attribute into the buffer provided.
838 * Buffer is NULL to compute the size of buffer required. 832 * Buffer is NULL to compute the size of buffer required.
839 */ 833 */
840int ocfs2_xattr_get(struct inode *inode, 834static int ocfs2_xattr_get(struct inode *inode,
841 int name_index, 835 int name_index,
842 const char *name, 836 const char *name,
843 void *buffer, 837 void *buffer,
844 size_t buffer_size) 838 size_t buffer_size)
845{ 839{
846 int ret; 840 int ret;
847 struct ocfs2_dinode *di = NULL; 841 struct ocfs2_dinode *di = NULL;
@@ -871,7 +865,7 @@ int ocfs2_xattr_get(struct inode *inode,
871 down_read(&oi->ip_xattr_sem); 865 down_read(&oi->ip_xattr_sem);
872 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, 866 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
873 buffer_size, &xis); 867 buffer_size, &xis);
874 if (ret == -ENODATA) 868 if (ret == -ENODATA && di->i_xattr_loc)
875 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, 869 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
876 buffer_size, &xbs); 870 buffer_size, &xbs);
877 up_read(&oi->ip_xattr_sem); 871 up_read(&oi->ip_xattr_sem);
@@ -1229,7 +1223,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1229 1223
1230 free = min_offs - ((void *)last - xs->base) - sizeof(__u32); 1224 free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
1231 if (free < 0) 1225 if (free < 0)
1232 return -EFAULT; 1226 return -EIO;
1233 1227
1234 if (!xs->not_found) { 1228 if (!xs->not_found) {
1235 size_t size = 0; 1229 size_t size = 0;
@@ -1514,10 +1508,9 @@ static int ocfs2_xattr_free_block(struct inode *inode,
1514 goto out; 1508 goto out;
1515 } 1509 }
1516 1510
1517 /*Verify the signature of xattr block*/ 1511 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1518 if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, 1512 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1519 strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { 1513 ret = -EIO;
1520 ret = -EFAULT;
1521 goto out; 1514 goto out;
1522 } 1515 }
1523 1516
@@ -1527,7 +1520,6 @@ static int ocfs2_xattr_free_block(struct inode *inode,
1527 goto out; 1520 goto out;
1528 } 1521 }
1529 1522
1530 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1531 blk = le64_to_cpu(xb->xb_blkno); 1523 blk = le64_to_cpu(xb->xb_blkno);
1532 bit = le16_to_cpu(xb->xb_suballoc_bit); 1524 bit = le16_to_cpu(xb->xb_suballoc_bit);
1533 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 1525 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
@@ -1771,15 +1763,14 @@ static int ocfs2_xattr_block_find(struct inode *inode,
1771 mlog_errno(ret); 1763 mlog_errno(ret);
1772 return ret; 1764 return ret;
1773 } 1765 }
1774 /*Verify the signature of xattr block*/ 1766
1775 if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, 1767 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1776 strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { 1768 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
1777 ret = -EFAULT; 1769 ret = -EIO;
1778 goto cleanup; 1770 goto cleanup;
1779 } 1771 }
1780 1772
1781 xs->xattr_bh = blk_bh; 1773 xs->xattr_bh = blk_bh;
1782 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1783 1774
1784 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1775 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1785 xs->header = &xb->xb_attrs.xb_header; 1776 xs->header = &xb->xb_attrs.xb_header;
@@ -1806,52 +1797,6 @@ cleanup:
1806} 1797}
1807 1798
1808/* 1799/*
1809 * When all the xattrs are deleted from index btree, the ocfs2_xattr_tree
1810 * will be erased and ocfs2_xattr_block will have its ocfs2_xattr_header
1811 * re-initialized.
1812 */
1813static int ocfs2_restore_xattr_block(struct inode *inode,
1814 struct ocfs2_xattr_search *xs)
1815{
1816 int ret;
1817 handle_t *handle;
1818 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1819 struct ocfs2_xattr_block *xb =
1820 (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1821 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
1822 u16 xb_flags = le16_to_cpu(xb->xb_flags);
1823
1824 BUG_ON(!(xb_flags & OCFS2_XATTR_INDEXED) ||
1825 le16_to_cpu(el->l_next_free_rec) != 0);
1826
1827 handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1828 if (IS_ERR(handle)) {
1829 ret = PTR_ERR(handle);
1830 handle = NULL;
1831 goto out;
1832 }
1833
1834 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1835 OCFS2_JOURNAL_ACCESS_WRITE);
1836 if (ret < 0) {
1837 mlog_errno(ret);
1838 goto out_commit;
1839 }
1840
1841 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
1842 offsetof(struct ocfs2_xattr_block, xb_attrs));
1843
1844 xb->xb_flags = cpu_to_le16(xb_flags & ~OCFS2_XATTR_INDEXED);
1845
1846 ocfs2_journal_dirty(handle, xs->xattr_bh);
1847
1848out_commit:
1849 ocfs2_commit_trans(osb, handle);
1850out:
1851 return ret;
1852}
1853
1854/*
1855 * ocfs2_xattr_block_set() 1800 * ocfs2_xattr_block_set()
1856 * 1801 *
1857 * Set, replace or remove an extended attribute into external block. 1802 * Set, replace or remove an extended attribute into external block.
@@ -1961,8 +1906,6 @@ out:
1961 } 1906 }
1962 1907
1963 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs); 1908 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs);
1964 if (!ret && xblk->xb_attrs.xb_root.xt_list.l_next_free_rec == 0)
1965 ret = ocfs2_restore_xattr_block(inode, xs);
1966 1909
1967end: 1910end:
1968 1911
@@ -2398,7 +2341,8 @@ static int ocfs2_xattr_index_block_find(struct inode *inode,
2398 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); 2341 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
2399 2342
2400 mlog(0, "find xattr extent rec %u clusters from %llu, the first hash " 2343 mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
2401 "in the rec is %u\n", num_clusters, p_blkno, first_hash); 2344 "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
2345 first_hash);
2402 2346
2403 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, 2347 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
2404 p_blkno, first_hash, num_clusters, xs); 2348 p_blkno, first_hash, num_clusters, xs);
@@ -2422,7 +2366,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2422 memset(&bucket, 0, sizeof(bucket)); 2366 memset(&bucket, 0, sizeof(bucket));
2423 2367
2424 mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n", 2368 mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
2425 clusters, blkno); 2369 clusters, (unsigned long long)blkno);
2426 2370
2427 for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) { 2371 for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
2428 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, 2372 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket,
@@ -2440,7 +2384,8 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2440 if (i == 0) 2384 if (i == 0)
2441 num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets); 2385 num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets);
2442 2386
2443 mlog(0, "iterating xattr bucket %llu, first hash %u\n", blkno, 2387 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
2388 (unsigned long long)blkno,
2444 le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash)); 2389 le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash));
2445 if (func) { 2390 if (func) {
2446 ret = func(inode, &bucket, para); 2391 ret = func(inode, &bucket, para);
@@ -2700,9 +2645,9 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode,
2700 return ret; 2645 return ret;
2701 } 2646 }
2702 2647
2703 i = xs->here - old_xh->xh_entries;
2704 xs->here = &xs->header->xh_entries[i];
2705 } 2648 }
2649 i = xs->here - old_xh->xh_entries;
2650 xs->here = &xs->header->xh_entries[i];
2706 } 2651 }
2707 2652
2708 return ret; 2653 return ret;
@@ -2776,7 +2721,8 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
2776 */ 2721 */
2777 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); 2722 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
2778 2723
2779 mlog(0, "allocate 1 cluster from %llu to xattr block\n", blkno); 2724 mlog(0, "allocate 1 cluster from %llu to xattr block\n",
2725 (unsigned long long)blkno);
2780 2726
2781 xh_bh = sb_getblk(inode->i_sb, blkno); 2727 xh_bh = sb_getblk(inode->i_sb, blkno);
2782 if (!xh_bh) { 2728 if (!xh_bh) {
@@ -2818,7 +2764,11 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
2818 if (data_bh) 2764 if (data_bh)
2819 ocfs2_journal_dirty(handle, data_bh); 2765 ocfs2_journal_dirty(handle, data_bh);
2820 2766
2821 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh); 2767 ret = ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh);
2768 if (ret) {
2769 mlog_errno(ret);
2770 goto out_commit;
2771 }
2822 2772
2823 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ 2773 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
2824 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - 2774 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
@@ -2941,8 +2891,8 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
2941 2891
2942 mlog(0, "adjust xattr bucket in %llu, count = %u, " 2892 mlog(0, "adjust xattr bucket in %llu, count = %u, "
2943 "xh_free_start = %u, xh_name_value_len = %u.\n", 2893 "xh_free_start = %u, xh_name_value_len = %u.\n",
2944 blkno, le16_to_cpu(xh->xh_count), xh_free_start, 2894 (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
2945 le16_to_cpu(xh->xh_name_value_len)); 2895 xh_free_start, le16_to_cpu(xh->xh_name_value_len));
2946 2896
2947 /* 2897 /*
2948 * sort all the entries by their offset. 2898 * sort all the entries by their offset.
@@ -3058,7 +3008,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3058 prev_blkno += (num_clusters - 1) * bpc + bpc / 2; 3008 prev_blkno += (num_clusters - 1) * bpc + bpc / 2;
3059 3009
3060 mlog(0, "move half of xattrs in cluster %llu to %llu\n", 3010 mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3061 prev_blkno, new_blkno); 3011 (unsigned long long)prev_blkno, (unsigned long long)new_blkno);
3062 3012
3063 /* 3013 /*
3064 * We need to update the 1st half of the new cluster and 3014 * We need to update the 1st half of the new cluster and
@@ -3168,26 +3118,74 @@ static int ocfs2_read_xattr_bucket(struct inode *inode,
3168} 3118}
3169 3119
3170/* 3120/*
3171 * Move half num of the xattrs in old bucket(blk) to new bucket(new_blk). 3121 * Find the suitable pos when we divide a bucket into 2.
3122 * We have to make sure the xattrs with the same hash value exist
3123 * in the same bucket.
3124 *
3125 * If this ocfs2_xattr_header covers more than one hash value, find a
3126 * place where the hash value changes. Try to find the most even split.
3127 * The most common case is that all entries have different hash values,
3128 * and the first check we make will find a place to split.
3129 */
3130static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
3131{
3132 struct ocfs2_xattr_entry *entries = xh->xh_entries;
3133 int count = le16_to_cpu(xh->xh_count);
3134 int delta, middle = count / 2;
3135
3136 /*
3137 * We start at the middle. Each step gets farther away in both
3138 * directions. We therefore hit the change in hash value
3139 * nearest to the middle. Note that this loop does not execute for
3140 * count < 2.
3141 */
3142 for (delta = 0; delta < middle; delta++) {
3143 /* Let's check delta earlier than middle */
3144 if (cmp_xe(&entries[middle - delta - 1],
3145 &entries[middle - delta]))
3146 return middle - delta;
3147
3148 /* For even counts, don't walk off the end */
3149 if ((middle + delta + 1) == count)
3150 continue;
3151
3152 /* Now try delta past middle */
3153 if (cmp_xe(&entries[middle + delta],
3154 &entries[middle + delta + 1]))
3155 return middle + delta + 1;
3156 }
3157
3158 /* Every entry had the same hash */
3159 return count;
3160}
3161
3162/*
3163 * Move some xattrs in old bucket(blk) to new bucket(new_blk).
3172 * first_hash will record the 1st hash of the new bucket. 3164 * first_hash will record the 1st hash of the new bucket.
3165 *
3166 * Normally half of the xattrs will be moved. But we have to make
3167 * sure that the xattrs with the same hash value are stored in the
3168 * same bucket. If all the xattrs in this bucket have the same hash
3169 * value, the new bucket will be initialized as an empty one and the
3170 * first_hash will be initialized as (hash_value+1).
3173 */ 3171 */
3174static int ocfs2_half_xattr_bucket(struct inode *inode, 3172static int ocfs2_divide_xattr_bucket(struct inode *inode,
3175 handle_t *handle, 3173 handle_t *handle,
3176 u64 blk, 3174 u64 blk,
3177 u64 new_blk, 3175 u64 new_blk,
3178 u32 *first_hash, 3176 u32 *first_hash,
3179 int new_bucket_head) 3177 int new_bucket_head)
3180{ 3178{
3181 int ret, i; 3179 int ret, i;
3182 u16 count, start, len, name_value_len, xe_len, name_offset; 3180 int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
3183 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3181 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3184 struct buffer_head **s_bhs, **t_bhs = NULL; 3182 struct buffer_head **s_bhs, **t_bhs = NULL;
3185 struct ocfs2_xattr_header *xh; 3183 struct ocfs2_xattr_header *xh;
3186 struct ocfs2_xattr_entry *xe; 3184 struct ocfs2_xattr_entry *xe;
3187 int blocksize = inode->i_sb->s_blocksize; 3185 int blocksize = inode->i_sb->s_blocksize;
3188 3186
3189 mlog(0, "move half of xattrs from bucket %llu to %llu\n", 3187 mlog(0, "move some of xattrs from bucket %llu to %llu\n",
3190 blk, new_blk); 3188 (unsigned long long)blk, (unsigned long long)new_blk);
3191 3189
3192 s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); 3190 s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
3193 if (!s_bhs) 3191 if (!s_bhs)
@@ -3220,21 +3218,44 @@ static int ocfs2_half_xattr_bucket(struct inode *inode,
3220 3218
3221 for (i = 0; i < blk_per_bucket; i++) { 3219 for (i = 0; i < blk_per_bucket; i++) {
3222 ret = ocfs2_journal_access(handle, inode, t_bhs[i], 3220 ret = ocfs2_journal_access(handle, inode, t_bhs[i],
3223 OCFS2_JOURNAL_ACCESS_CREATE); 3221 new_bucket_head ?
3222 OCFS2_JOURNAL_ACCESS_CREATE :
3223 OCFS2_JOURNAL_ACCESS_WRITE);
3224 if (ret) { 3224 if (ret) {
3225 mlog_errno(ret); 3225 mlog_errno(ret);
3226 goto out; 3226 goto out;
3227 } 3227 }
3228 } 3228 }
3229 3229
3230 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
3231 count = le16_to_cpu(xh->xh_count);
3232 start = ocfs2_xattr_find_divide_pos(xh);
3233
3234 if (start == count) {
3235 xe = &xh->xh_entries[start-1];
3236
3237 /*
3238 * initialized a new empty bucket here.
3239 * The hash value is set as one larger than
3240 * that of the last entry in the previous bucket.
3241 */
3242 for (i = 0; i < blk_per_bucket; i++)
3243 memset(t_bhs[i]->b_data, 0, blocksize);
3244
3245 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
3246 xh->xh_free_start = cpu_to_le16(blocksize);
3247 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
3248 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
3249
3250 goto set_num_buckets;
3251 }
3252
3230 /* copy the whole bucket to the new first. */ 3253 /* copy the whole bucket to the new first. */
3231 for (i = 0; i < blk_per_bucket; i++) 3254 for (i = 0; i < blk_per_bucket; i++)
3232 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize); 3255 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3233 3256
3234 /* update the new bucket. */ 3257 /* update the new bucket. */
3235 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; 3258 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
3236 count = le16_to_cpu(xh->xh_count);
3237 start = count / 2;
3238 3259
3239 /* 3260 /*
3240 * Calculate the total name/value len and xh_free_start for 3261 * Calculate the total name/value len and xh_free_start for
@@ -3291,6 +3312,7 @@ static int ocfs2_half_xattr_bucket(struct inode *inode,
3291 xh->xh_free_start = xe->xe_name_offset; 3312 xh->xh_free_start = xe->xe_name_offset;
3292 } 3313 }
3293 3314
3315set_num_buckets:
3294 /* set xh->xh_num_buckets for the new xh. */ 3316 /* set xh->xh_num_buckets for the new xh. */
3295 if (new_bucket_head) 3317 if (new_bucket_head)
3296 xh->xh_num_buckets = cpu_to_le16(1); 3318 xh->xh_num_buckets = cpu_to_le16(1);
@@ -3308,9 +3330,13 @@ static int ocfs2_half_xattr_bucket(struct inode *inode,
3308 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); 3330 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3309 3331
3310 /* 3332 /*
3311 * Now only update the 1st block of the old bucket. 3333 * Now only update the 1st block of the old bucket. If we
3312 * Please note that the entry has been sorted already above. 3334 * just added a new empty bucket, there is no need to modify
3335 * it.
3313 */ 3336 */
3337 if (start == count)
3338 goto out;
3339
3314 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; 3340 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
3315 memset(&xh->xh_entries[start], 0, 3341 memset(&xh->xh_entries[start], 0,
3316 sizeof(struct ocfs2_xattr_entry) * (count - start)); 3342 sizeof(struct ocfs2_xattr_entry) * (count - start));
@@ -3358,7 +3384,8 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
3358 BUG_ON(s_blkno == t_blkno); 3384 BUG_ON(s_blkno == t_blkno);
3359 3385
3360 mlog(0, "cp bucket %llu to %llu, target is %d\n", 3386 mlog(0, "cp bucket %llu to %llu, target is %d\n",
3361 s_blkno, t_blkno, t_is_new); 3387 (unsigned long long)s_blkno, (unsigned long long)t_blkno,
3388 t_is_new);
3362 3389
3363 s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, 3390 s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
3364 GFP_NOFS); 3391 GFP_NOFS);
@@ -3382,6 +3409,8 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode,
3382 3409
3383 for (i = 0; i < blk_per_bucket; i++) { 3410 for (i = 0; i < blk_per_bucket; i++) {
3384 ret = ocfs2_journal_access(handle, inode, t_bhs[i], 3411 ret = ocfs2_journal_access(handle, inode, t_bhs[i],
3412 t_is_new ?
3413 OCFS2_JOURNAL_ACCESS_CREATE :
3385 OCFS2_JOURNAL_ACCESS_WRITE); 3414 OCFS2_JOURNAL_ACCESS_WRITE);
3386 if (ret) 3415 if (ret)
3387 goto out; 3416 goto out;
@@ -3428,7 +3457,8 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode,
3428 struct ocfs2_xattr_header *xh; 3457 struct ocfs2_xattr_header *xh;
3429 u64 to_blk_start = to_blk; 3458 u64 to_blk_start = to_blk;
3430 3459
3431 mlog(0, "cp xattrs from cluster %llu to %llu\n", src_blk, to_blk); 3460 mlog(0, "cp xattrs from cluster %llu to %llu\n",
3461 (unsigned long long)src_blk, (unsigned long long)to_blk);
3432 3462
3433 /* 3463 /*
3434 * We need to update the new cluster and 1 more for the update of 3464 * We need to update the new cluster and 1 more for the update of
@@ -3493,15 +3523,15 @@ out:
3493} 3523}
3494 3524
3495/* 3525/*
3496 * Move half of the xattrs in this cluster to the new cluster. 3526 * Move some xattrs in this cluster to the new cluster.
3497 * This function should only be called when bucket size == cluster size. 3527 * This function should only be called when bucket size == cluster size.
3498 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. 3528 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
3499 */ 3529 */
3500static int ocfs2_half_xattr_cluster(struct inode *inode, 3530static int ocfs2_divide_xattr_cluster(struct inode *inode,
3501 handle_t *handle, 3531 handle_t *handle,
3502 u64 prev_blk, 3532 u64 prev_blk,
3503 u64 new_blk, 3533 u64 new_blk,
3504 u32 *first_hash) 3534 u32 *first_hash)
3505{ 3535{
3506 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3536 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3507 int ret, credits = 2 * blk_per_bucket; 3537 int ret, credits = 2 * blk_per_bucket;
@@ -3515,8 +3545,8 @@ static int ocfs2_half_xattr_cluster(struct inode *inode,
3515 } 3545 }
3516 3546
3517 /* Move half of the xattr in start_blk to the next bucket. */ 3547 /* Move half of the xattr in start_blk to the next bucket. */
3518 return ocfs2_half_xattr_bucket(inode, handle, prev_blk, 3548 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
3519 new_blk, first_hash, 1); 3549 new_blk, first_hash, 1);
3520} 3550}
3521 3551
3522/* 3552/*
@@ -3559,7 +3589,8 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3559 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); 3589 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3560 3590
3561 mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n", 3591 mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
3562 prev_blk, prev_clusters, new_blk); 3592 (unsigned long long)prev_blk, prev_clusters,
3593 (unsigned long long)new_blk);
3563 3594
3564 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) 3595 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1)
3565 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, 3596 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
@@ -3578,9 +3609,9 @@ static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3578 last_blk, new_blk, 3609 last_blk, new_blk,
3579 v_start); 3610 v_start);
3580 else { 3611 else {
3581 ret = ocfs2_half_xattr_cluster(inode, handle, 3612 ret = ocfs2_divide_xattr_cluster(inode, handle,
3582 last_blk, new_blk, 3613 last_blk, new_blk,
3583 v_start); 3614 v_start);
3584 3615
3585 if ((*header_bh)->b_blocknr == last_blk && extend) 3616 if ((*header_bh)->b_blocknr == last_blk && extend)
3586 *extend = 0; 3617 *extend = 0;
@@ -3629,7 +3660,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3629 mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, " 3660 mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
3630 "previous xattr blkno = %llu\n", 3661 "previous xattr blkno = %llu\n",
3631 (unsigned long long)OCFS2_I(inode)->ip_blkno, 3662 (unsigned long long)OCFS2_I(inode)->ip_blkno,
3632 prev_cpos, prev_blkno); 3663 prev_cpos, (unsigned long long)prev_blkno);
3633 3664
3634 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); 3665 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
3635 3666
@@ -3716,7 +3747,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3716 } 3747 }
3717 } 3748 }
3718 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", 3749 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
3719 num_bits, block, v_start); 3750 num_bits, (unsigned long long)block, v_start);
3720 ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block, 3751 ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
3721 num_bits, 0, meta_ac); 3752 num_bits, 0, meta_ac);
3722 if (ret < 0) { 3753 if (ret < 0) {
@@ -3761,7 +3792,7 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
3761 u16 bucket = le16_to_cpu(first_xh->xh_num_buckets); 3792 u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
3762 3793
3763 mlog(0, "extend xattr bucket in %llu, xattr extend rec starting " 3794 mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
3764 "from %llu, len = %u\n", start_blk, 3795 "from %llu, len = %u\n", (unsigned long long)start_blk,
3765 (unsigned long long)first_bh->b_blocknr, num_clusters); 3796 (unsigned long long)first_bh->b_blocknr, num_clusters);
3766 3797
3767 BUG_ON(bucket >= num_buckets); 3798 BUG_ON(bucket >= num_buckets);
@@ -3797,8 +3828,8 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
3797 } 3828 }
3798 3829
3799 /* Move half of the xattr in start_blk to the next bucket. */ 3830 /* Move half of the xattr in start_blk to the next bucket. */
3800 ret = ocfs2_half_xattr_bucket(inode, handle, start_blk, 3831 ret = ocfs2_divide_xattr_bucket(inode, handle, start_blk,
3801 start_blk + blk_per_bucket, NULL, 0); 3832 start_blk + blk_per_bucket, NULL, 0);
3802 3833
3803 le16_add_cpu(&first_xh->xh_num_buckets, 1); 3834 le16_add_cpu(&first_xh->xh_num_buckets, 1);
3804 ocfs2_journal_dirty(handle, first_bh); 3835 ocfs2_journal_dirty(handle, first_bh);
@@ -4146,7 +4177,7 @@ static int ocfs2_xattr_value_update_size(struct inode *inode,
4146 handle_t *handle = NULL; 4177 handle_t *handle = NULL;
4147 4178
4148 handle = ocfs2_start_trans(osb, 1); 4179 handle = ocfs2_start_trans(osb, 1);
4149 if (handle == NULL) { 4180 if (IS_ERR(handle)) {
4150 ret = -ENOMEM; 4181 ret = -ENOMEM;
4151 mlog_errno(ret); 4182 mlog_errno(ret);
4152 goto out; 4183 goto out;
@@ -4313,7 +4344,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
4313 } 4344 }
4314 4345
4315 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); 4346 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
4316 if (handle == NULL) { 4347 if (IS_ERR(handle)) {
4317 ret = -ENOMEM; 4348 ret = -ENOMEM;
4318 mlog_errno(ret); 4349 mlog_errno(ret);
4319 goto out; 4350 goto out;
@@ -4489,11 +4520,21 @@ out:
4489 return ret; 4520 return ret;
4490} 4521}
4491 4522
4492/* check whether the xattr bucket is filled up with the same hash value. */ 4523/*
4524 * check whether the xattr bucket is filled up with the same hash value.
4525 * If we want to insert the xattr with the same hash, return -ENOSPC.
4526 * If we want to insert a xattr with different hash value, go ahead
4527 * and ocfs2_divide_xattr_bucket will handle this.
4528 */
4493static int ocfs2_check_xattr_bucket_collision(struct inode *inode, 4529static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4494 struct ocfs2_xattr_bucket *bucket) 4530 struct ocfs2_xattr_bucket *bucket,
4531 const char *name)
4495{ 4532{
4496 struct ocfs2_xattr_header *xh = bucket->xh; 4533 struct ocfs2_xattr_header *xh = bucket->xh;
4534 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
4535
4536 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
4537 return 0;
4497 4538
4498 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == 4539 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
4499 xh->xh_entries[0].xe_name_hash) { 4540 xh->xh_entries[0].xe_name_hash) {
@@ -4616,7 +4657,9 @@ try_again:
4616 * one bucket's worth, so check it here whether we need to 4657 * one bucket's worth, so check it here whether we need to
4617 * add a new bucket for the insert. 4658 * add a new bucket for the insert.
4618 */ 4659 */
4619 ret = ocfs2_check_xattr_bucket_collision(inode, &xs->bucket); 4660 ret = ocfs2_check_xattr_bucket_collision(inode,
4661 &xs->bucket,
4662 xi->name);
4620 if (ret) { 4663 if (ret) {
4621 mlog_errno(ret); 4664 mlog_errno(ret);
4622 goto out; 4665 goto out;
@@ -4727,14 +4770,11 @@ out:
4727/* 4770/*
4728 * 'trusted' attributes support 4771 * 'trusted' attributes support
4729 */ 4772 */
4730
4731#define XATTR_TRUSTED_PREFIX "trusted."
4732
4733static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list, 4773static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
4734 size_t list_size, const char *name, 4774 size_t list_size, const char *name,
4735 size_t name_len) 4775 size_t name_len)
4736{ 4776{
4737 const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX) - 1; 4777 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
4738 const size_t total_len = prefix_len + name_len + 1; 4778 const size_t total_len = prefix_len + name_len + 1;
4739 4779
4740 if (list && total_len <= list_size) { 4780 if (list && total_len <= list_size) {
@@ -4771,18 +4811,14 @@ struct xattr_handler ocfs2_xattr_trusted_handler = {
4771 .set = ocfs2_xattr_trusted_set, 4811 .set = ocfs2_xattr_trusted_set,
4772}; 4812};
4773 4813
4774
4775/* 4814/*
4776 * 'user' attributes support 4815 * 'user' attributes support
4777 */ 4816 */
4778
4779#define XATTR_USER_PREFIX "user."
4780
4781static size_t ocfs2_xattr_user_list(struct inode *inode, char *list, 4817static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
4782 size_t list_size, const char *name, 4818 size_t list_size, const char *name,
4783 size_t name_len) 4819 size_t name_len)
4784{ 4820{
4785 const size_t prefix_len = sizeof(XATTR_USER_PREFIX) - 1; 4821 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
4786 const size_t total_len = prefix_len + name_len + 1; 4822 const size_t total_len = prefix_len + name_len + 1;
4787 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 4823 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4788 4824
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index c25c7c62a059..1d8314c7656d 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -3,24 +3,16 @@
3 * 3 *
4 * xattr.h 4 * xattr.h
5 * 5 *
6 * Function prototypes 6 * Copyright (C) 2004, 2008 Oracle. All rights reserved.
7 *
8 * Copyright (C) 2008 Oracle. All rights reserved.
9 * 7 *
10 * This program is free software; you can redistribute it and/or 8 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public 9 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either 10 * License version 2 as published by the Free Software Foundation.
13 * version 2 of the License, or (at your option) any later version.
14 * 11 *
15 * This program is distributed in the hope that it will be useful, 12 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details. 15 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA.
24 */ 16 */
25 17
26#ifndef OCFS2_XATTR_H 18#ifndef OCFS2_XATTR_H
@@ -40,29 +32,11 @@ enum ocfs2_xattr_type {
40 32
41extern struct xattr_handler ocfs2_xattr_user_handler; 33extern struct xattr_handler ocfs2_xattr_user_handler;
42extern struct xattr_handler ocfs2_xattr_trusted_handler; 34extern struct xattr_handler ocfs2_xattr_trusted_handler;
43
44extern ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
45extern int ocfs2_xattr_get(struct inode *, int, const char *, void *, size_t);
46extern int ocfs2_xattr_set(struct inode *, int, const char *, const void *,
47 size_t, int);
48extern int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh);
49extern struct xattr_handler *ocfs2_xattr_handlers[]; 35extern struct xattr_handler *ocfs2_xattr_handlers[];
50 36
51static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 37ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
52{ 38int ocfs2_xattr_set(struct inode *, int, const char *, const void *,
53 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; 39 size_t, int);
54} 40int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
55
56static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
57{
58 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
59}
60
61static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
62{
63 u16 len = sb->s_blocksize -
64 offsetof(struct ocfs2_xattr_header, xh_entries);
65 41
66 return len / sizeof(struct ocfs2_xattr_entry);
67}
68#endif /* OCFS2_XATTR_H */ 42#endif /* OCFS2_XATTR_H */
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 633f7a0ebb2c..6d5b213b8a9b 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -348,8 +348,8 @@ static ssize_t whole_disk_show(struct device *dev,
348static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, 348static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
349 whole_disk_show, NULL); 349 whole_disk_show, NULL);
350 350
351int add_partition(struct gendisk *disk, int partno, 351struct hd_struct *add_partition(struct gendisk *disk, int partno,
352 sector_t start, sector_t len, int flags) 352 sector_t start, sector_t len, int flags)
353{ 353{
354 struct hd_struct *p; 354 struct hd_struct *p;
355 dev_t devt = MKDEV(0, 0); 355 dev_t devt = MKDEV(0, 0);
@@ -361,15 +361,15 @@ int add_partition(struct gendisk *disk, int partno,
361 361
362 err = disk_expand_part_tbl(disk, partno); 362 err = disk_expand_part_tbl(disk, partno);
363 if (err) 363 if (err)
364 return err; 364 return ERR_PTR(err);
365 ptbl = disk->part_tbl; 365 ptbl = disk->part_tbl;
366 366
367 if (ptbl->part[partno]) 367 if (ptbl->part[partno])
368 return -EBUSY; 368 return ERR_PTR(-EBUSY);
369 369
370 p = kzalloc(sizeof(*p), GFP_KERNEL); 370 p = kzalloc(sizeof(*p), GFP_KERNEL);
371 if (!p) 371 if (!p)
372 return -ENOMEM; 372 return ERR_PTR(-EBUSY);
373 373
374 if (!init_part_stats(p)) { 374 if (!init_part_stats(p)) {
375 err = -ENOMEM; 375 err = -ENOMEM;
@@ -395,7 +395,7 @@ int add_partition(struct gendisk *disk, int partno,
395 395
396 err = blk_alloc_devt(p, &devt); 396 err = blk_alloc_devt(p, &devt);
397 if (err) 397 if (err)
398 goto out_free; 398 goto out_free_stats;
399 pdev->devt = devt; 399 pdev->devt = devt;
400 400
401 /* delay uevent until 'holders' subdir is created */ 401 /* delay uevent until 'holders' subdir is created */
@@ -424,18 +424,20 @@ int add_partition(struct gendisk *disk, int partno,
424 if (!ddev->uevent_suppress) 424 if (!ddev->uevent_suppress)
425 kobject_uevent(&pdev->kobj, KOBJ_ADD); 425 kobject_uevent(&pdev->kobj, KOBJ_ADD);
426 426
427 return 0; 427 return p;
428 428
429out_free_stats:
430 free_part_stats(p);
429out_free: 431out_free:
430 kfree(p); 432 kfree(p);
431 return err; 433 return ERR_PTR(err);
432out_del: 434out_del:
433 kobject_put(p->holder_dir); 435 kobject_put(p->holder_dir);
434 device_del(pdev); 436 device_del(pdev);
435out_put: 437out_put:
436 put_device(pdev); 438 put_device(pdev);
437 blk_free_devt(devt); 439 blk_free_devt(devt);
438 return err; 440 return ERR_PTR(err);
439} 441}
440 442
441/* Not exported, helper to add_disk(). */ 443/* Not exported, helper to add_disk(). */
@@ -566,15 +568,16 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
566 disk->disk_name, p, (unsigned long long) size); 568 disk->disk_name, p, (unsigned long long) size);
567 size = get_capacity(disk) - from; 569 size = get_capacity(disk) - from;
568 } 570 }
569 res = add_partition(disk, p, from, size, state->parts[p].flags); 571 part = add_partition(disk, p, from, size,
570 if (res) { 572 state->parts[p].flags);
571 printk(KERN_ERR " %s: p%d could not be added: %d\n", 573 if (IS_ERR(part)) {
572 disk->disk_name, p, -res); 574 printk(KERN_ERR " %s: p%d could not be added: %ld\n",
575 disk->disk_name, p, -PTR_ERR(part));
573 continue; 576 continue;
574 } 577 }
575#ifdef CONFIG_BLK_DEV_MD 578#ifdef CONFIG_BLK_DEV_MD
576 if (state->parts[p].flags & ADDPART_FLAG_RAID) 579 if (state->parts[p].flags & ADDPART_FLAG_RAID)
577 md_autodetect_dev(bdev->bd_dev+p); 580 md_autodetect_dev(part_to_dev(part)->devt);
578#endif 581#endif
579 } 582 }
580 kfree(state); 583 kfree(state);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 486cf3fe7139..d4677603c889 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -371,7 +371,7 @@ static int lstats_show_proc(struct seq_file *m, void *v)
371 task->latency_record[i].time, 371 task->latency_record[i].time,
372 task->latency_record[i].max); 372 task->latency_record[i].max);
373 for (q = 0; q < LT_BACKTRACEDEPTH; q++) { 373 for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
374 char sym[KSYM_NAME_LEN]; 374 char sym[KSYM_SYMBOL_LEN];
375 char *c; 375 char *c;
376 if (!task->latency_record[i].backtrace[q]) 376 if (!task->latency_record[i].backtrace[q])
377 break; 377 break;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 94fcfff6863a..06ed10b7da9e 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -31,6 +31,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
31 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 31 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
32 inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */ 32 inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
33 inode->i_mode = table->mode; 33 inode->i_mode = table->mode;
34 inode->i_uid = inode->i_gid = 0;
34 if (!table->child) { 35 if (!table->child) {
35 inode->i_mode |= S_IFREG; 36 inode->i_mode |= S_IFREG;
36 inode->i_op = &proc_sys_inode_operations; 37 inode->i_op = &proc_sys_inode_operations;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index b770c095e45c..3a8bdd7f5756 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -557,9 +557,9 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte)
557 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); 557 return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
558} 558}
559 559
560static unsigned long pte_to_pagemap_entry(pte_t pte) 560static u64 pte_to_pagemap_entry(pte_t pte)
561{ 561{
562 unsigned long pme = 0; 562 u64 pme = 0;
563 if (is_swap_pte(pte)) 563 if (is_swap_pte(pte))
564 pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) 564 pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte))
565 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; 565 | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP;
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 0c10a0b3f146..df26aa88fa47 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -1,43 +1,45 @@
1#include <linux/fs.h>
2#include <linux/init.h> 1#include <linux/init.h>
3#include <linux/proc_fs.h> 2#include <linux/proc_fs.h>
4#include <linux/sched.h> 3#include <linux/sched.h>
5#include <linux/seq_file.h>
6#include <linux/time.h> 4#include <linux/time.h>
7#include <asm/cputime.h> 5#include <asm/cputime.h>
8 6
9static int uptime_proc_show(struct seq_file *m, void *v) 7static int proc_calc_metrics(char *page, char **start, off_t off,
8 int count, int *eof, int len)
9{
10 if (len <= off + count)
11 *eof = 1;
12 *start = page + off;
13 len -= off;
14 if (len > count)
15 len = count;
16 if (len < 0)
17 len = 0;
18 return len;
19}
20
21static int uptime_read_proc(char *page, char **start, off_t off, int count,
22 int *eof, void *data)
10{ 23{
11 struct timespec uptime; 24 struct timespec uptime;
12 struct timespec idle; 25 struct timespec idle;
26 int len;
13 cputime_t idletime = cputime_add(init_task.utime, init_task.stime); 27 cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
14 28
15 do_posix_clock_monotonic_gettime(&uptime); 29 do_posix_clock_monotonic_gettime(&uptime);
16 monotonic_to_bootbased(&uptime); 30 monotonic_to_bootbased(&uptime);
17 cputime_to_timespec(idletime, &idle); 31 cputime_to_timespec(idletime, &idle);
18 seq_printf(m, "%lu.%02lu %lu.%02lu\n", 32 len = sprintf(page, "%lu.%02lu %lu.%02lu\n",
19 (unsigned long) uptime.tv_sec, 33 (unsigned long) uptime.tv_sec,
20 (uptime.tv_nsec / (NSEC_PER_SEC / 100)), 34 (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
21 (unsigned long) idle.tv_sec, 35 (unsigned long) idle.tv_sec,
22 (idle.tv_nsec / (NSEC_PER_SEC / 100))); 36 (idle.tv_nsec / (NSEC_PER_SEC / 100)));
23 return 0; 37 return proc_calc_metrics(page, start, off, count, eof, len);
24} 38}
25 39
26static int uptime_proc_open(struct inode *inode, struct file *file)
27{
28 return single_open(file, uptime_proc_show, NULL);
29}
30
31static const struct file_operations uptime_proc_fops = {
32 .open = uptime_proc_open,
33 .read = seq_read,
34 .llseek = seq_lseek,
35 .release = single_release,
36};
37
38static int __init proc_uptime_init(void) 40static int __init proc_uptime_init(void)
39{ 41{
40 proc_create("uptime", 0, NULL, &uptime_proc_fops); 42 create_proc_read_entry("uptime", 0, NULL, uptime_read_proc, NULL);
41 return 0; 43 return 0;
42} 44}
43module_init(proc_uptime_init); 45module_init(proc_uptime_init);
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 0a6aa2cc78f0..b49884c8c10e 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -234,8 +234,8 @@ int ubifs_bg_thread(void *info)
234 int err; 234 int err;
235 struct ubifs_info *c = info; 235 struct ubifs_info *c = info;
236 236
237 ubifs_msg("background thread \"%s\" started, PID %d", 237 dbg_msg("background thread \"%s\" started, PID %d",
238 c->bgt_name, current->pid); 238 c->bgt_name, current->pid);
239 set_freezable(); 239 set_freezable();
240 240
241 while (1) { 241 while (1) {
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 7186400750e7..510ffa0bbda4 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -101,21 +101,24 @@ static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key,
101 if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) { 101 if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) {
102 switch (type) { 102 switch (type) {
103 case UBIFS_INO_KEY: 103 case UBIFS_INO_KEY:
104 sprintf(p, "(%lu, %s)", key_inum(c, key), 104 sprintf(p, "(%lu, %s)", (unsigned long)key_inum(c, key),
105 get_key_type(type)); 105 get_key_type(type));
106 break; 106 break;
107 case UBIFS_DENT_KEY: 107 case UBIFS_DENT_KEY:
108 case UBIFS_XENT_KEY: 108 case UBIFS_XENT_KEY:
109 sprintf(p, "(%lu, %s, %#08x)", key_inum(c, key), 109 sprintf(p, "(%lu, %s, %#08x)",
110 (unsigned long)key_inum(c, key),
110 get_key_type(type), key_hash(c, key)); 111 get_key_type(type), key_hash(c, key));
111 break; 112 break;
112 case UBIFS_DATA_KEY: 113 case UBIFS_DATA_KEY:
113 sprintf(p, "(%lu, %s, %u)", key_inum(c, key), 114 sprintf(p, "(%lu, %s, %u)",
115 (unsigned long)key_inum(c, key),
114 get_key_type(type), key_block(c, key)); 116 get_key_type(type), key_block(c, key));
115 break; 117 break;
116 case UBIFS_TRUN_KEY: 118 case UBIFS_TRUN_KEY:
117 sprintf(p, "(%lu, %s)", 119 sprintf(p, "(%lu, %s)",
118 key_inum(c, key), get_key_type(type)); 120 (unsigned long)key_inum(c, key),
121 get_key_type(type));
119 break; 122 break;
120 default: 123 default:
121 sprintf(p, "(bad key type: %#08x, %#08x)", 124 sprintf(p, "(bad key type: %#08x, %#08x)",
@@ -364,8 +367,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
364 le32_to_cpu(mst->ihead_lnum)); 367 le32_to_cpu(mst->ihead_lnum));
365 printk(KERN_DEBUG "\tihead_offs %u\n", 368 printk(KERN_DEBUG "\tihead_offs %u\n",
366 le32_to_cpu(mst->ihead_offs)); 369 le32_to_cpu(mst->ihead_offs));
367 printk(KERN_DEBUG "\tindex_size %u\n", 370 printk(KERN_DEBUG "\tindex_size %llu\n",
368 le32_to_cpu(mst->index_size)); 371 (unsigned long long)le64_to_cpu(mst->index_size));
369 printk(KERN_DEBUG "\tlpt_lnum %u\n", 372 printk(KERN_DEBUG "\tlpt_lnum %u\n",
370 le32_to_cpu(mst->lpt_lnum)); 373 le32_to_cpu(mst->lpt_lnum));
371 printk(KERN_DEBUG "\tlpt_offs %u\n", 374 printk(KERN_DEBUG "\tlpt_offs %u\n",
@@ -1589,7 +1592,7 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
1589 1592
1590 if (inum > c->highest_inum) { 1593 if (inum > c->highest_inum) {
1591 ubifs_err("too high inode number, max. is %lu", 1594 ubifs_err("too high inode number, max. is %lu",
1592 c->highest_inum); 1595 (unsigned long)c->highest_inum);
1593 return ERR_PTR(-EINVAL); 1596 return ERR_PTR(-EINVAL);
1594 } 1597 }
1595 1598
@@ -1668,16 +1671,18 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c,
1668 ino_key_init(c, &key, inum); 1671 ino_key_init(c, &key, inum);
1669 err = ubifs_lookup_level0(c, &key, &znode, &n); 1672 err = ubifs_lookup_level0(c, &key, &znode, &n);
1670 if (!err) { 1673 if (!err) {
1671 ubifs_err("inode %lu not found in index", inum); 1674 ubifs_err("inode %lu not found in index", (unsigned long)inum);
1672 return ERR_PTR(-ENOENT); 1675 return ERR_PTR(-ENOENT);
1673 } else if (err < 0) { 1676 } else if (err < 0) {
1674 ubifs_err("error %d while looking up inode %lu", err, inum); 1677 ubifs_err("error %d while looking up inode %lu",
1678 err, (unsigned long)inum);
1675 return ERR_PTR(err); 1679 return ERR_PTR(err);
1676 } 1680 }
1677 1681
1678 zbr = &znode->zbranch[n]; 1682 zbr = &znode->zbranch[n];
1679 if (zbr->len < UBIFS_INO_NODE_SZ) { 1683 if (zbr->len < UBIFS_INO_NODE_SZ) {
1680 ubifs_err("bad node %lu node length %d", inum, zbr->len); 1684 ubifs_err("bad node %lu node length %d",
1685 (unsigned long)inum, zbr->len);
1681 return ERR_PTR(-EINVAL); 1686 return ERR_PTR(-EINVAL);
1682 } 1687 }
1683 1688
@@ -1697,7 +1702,7 @@ static struct fsck_inode *read_add_inode(struct ubifs_info *c,
1697 kfree(ino); 1702 kfree(ino);
1698 if (IS_ERR(fscki)) { 1703 if (IS_ERR(fscki)) {
1699 ubifs_err("error %ld while adding inode %lu node", 1704 ubifs_err("error %ld while adding inode %lu node",
1700 PTR_ERR(fscki), inum); 1705 PTR_ERR(fscki), (unsigned long)inum);
1701 return fscki; 1706 return fscki;
1702 } 1707 }
1703 1708
@@ -1786,7 +1791,8 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
1786 if (IS_ERR(fscki)) { 1791 if (IS_ERR(fscki)) {
1787 err = PTR_ERR(fscki); 1792 err = PTR_ERR(fscki);
1788 ubifs_err("error %d while processing data node and " 1793 ubifs_err("error %d while processing data node and "
1789 "trying to find inode node %lu", err, inum); 1794 "trying to find inode node %lu",
1795 err, (unsigned long)inum);
1790 goto out_dump; 1796 goto out_dump;
1791 } 1797 }
1792 1798
@@ -1819,7 +1825,8 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
1819 if (IS_ERR(fscki)) { 1825 if (IS_ERR(fscki)) {
1820 err = PTR_ERR(fscki); 1826 err = PTR_ERR(fscki);
1821 ubifs_err("error %d while processing entry node and " 1827 ubifs_err("error %d while processing entry node and "
1822 "trying to find inode node %lu", err, inum); 1828 "trying to find inode node %lu",
1829 err, (unsigned long)inum);
1823 goto out_dump; 1830 goto out_dump;
1824 } 1831 }
1825 1832
@@ -1832,7 +1839,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
1832 err = PTR_ERR(fscki); 1839 err = PTR_ERR(fscki);
1833 ubifs_err("error %d while processing entry node and " 1840 ubifs_err("error %d while processing entry node and "
1834 "trying to find parent inode node %lu", 1841 "trying to find parent inode node %lu",
1835 err, inum); 1842 err, (unsigned long)inum);
1836 goto out_dump; 1843 goto out_dump;
1837 } 1844 }
1838 1845
@@ -1923,7 +1930,8 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd)
1923 fscki->references != 1) { 1930 fscki->references != 1) {
1924 ubifs_err("directory inode %lu has %d " 1931 ubifs_err("directory inode %lu has %d "
1925 "direntries which refer it, but " 1932 "direntries which refer it, but "
1926 "should be 1", fscki->inum, 1933 "should be 1",
1934 (unsigned long)fscki->inum,
1927 fscki->references); 1935 fscki->references);
1928 goto out_dump; 1936 goto out_dump;
1929 } 1937 }
@@ -1931,27 +1939,29 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd)
1931 fscki->references != 0) { 1939 fscki->references != 0) {
1932 ubifs_err("root inode %lu has non-zero (%d) " 1940 ubifs_err("root inode %lu has non-zero (%d) "
1933 "direntries which refer it", 1941 "direntries which refer it",
1934 fscki->inum, fscki->references); 1942 (unsigned long)fscki->inum,
1943 fscki->references);
1935 goto out_dump; 1944 goto out_dump;
1936 } 1945 }
1937 if (fscki->calc_sz != fscki->size) { 1946 if (fscki->calc_sz != fscki->size) {
1938 ubifs_err("directory inode %lu size is %lld, " 1947 ubifs_err("directory inode %lu size is %lld, "
1939 "but calculated size is %lld", 1948 "but calculated size is %lld",
1940 fscki->inum, fscki->size, 1949 (unsigned long)fscki->inum,
1941 fscki->calc_sz); 1950 fscki->size, fscki->calc_sz);
1942 goto out_dump; 1951 goto out_dump;
1943 } 1952 }
1944 if (fscki->calc_cnt != fscki->nlink) { 1953 if (fscki->calc_cnt != fscki->nlink) {
1945 ubifs_err("directory inode %lu nlink is %d, " 1954 ubifs_err("directory inode %lu nlink is %d, "
1946 "but calculated nlink is %d", 1955 "but calculated nlink is %d",
1947 fscki->inum, fscki->nlink, 1956 (unsigned long)fscki->inum,
1948 fscki->calc_cnt); 1957 fscki->nlink, fscki->calc_cnt);
1949 goto out_dump; 1958 goto out_dump;
1950 } 1959 }
1951 } else { 1960 } else {
1952 if (fscki->references != fscki->nlink) { 1961 if (fscki->references != fscki->nlink) {
1953 ubifs_err("inode %lu nlink is %d, but " 1962 ubifs_err("inode %lu nlink is %d, but "
1954 "calculated nlink is %d", fscki->inum, 1963 "calculated nlink is %d",
1964 (unsigned long)fscki->inum,
1955 fscki->nlink, fscki->references); 1965 fscki->nlink, fscki->references);
1956 goto out_dump; 1966 goto out_dump;
1957 } 1967 }
@@ -1959,20 +1969,21 @@ static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd)
1959 if (fscki->xattr_sz != fscki->calc_xsz) { 1969 if (fscki->xattr_sz != fscki->calc_xsz) {
1960 ubifs_err("inode %lu has xattr size %u, but " 1970 ubifs_err("inode %lu has xattr size %u, but "
1961 "calculated size is %lld", 1971 "calculated size is %lld",
1962 fscki->inum, fscki->xattr_sz, 1972 (unsigned long)fscki->inum, fscki->xattr_sz,
1963 fscki->calc_xsz); 1973 fscki->calc_xsz);
1964 goto out_dump; 1974 goto out_dump;
1965 } 1975 }
1966 if (fscki->xattr_cnt != fscki->calc_xcnt) { 1976 if (fscki->xattr_cnt != fscki->calc_xcnt) {
1967 ubifs_err("inode %lu has %u xattrs, but " 1977 ubifs_err("inode %lu has %u xattrs, but "
1968 "calculated count is %lld", fscki->inum, 1978 "calculated count is %lld",
1979 (unsigned long)fscki->inum,
1969 fscki->xattr_cnt, fscki->calc_xcnt); 1980 fscki->xattr_cnt, fscki->calc_xcnt);
1970 goto out_dump; 1981 goto out_dump;
1971 } 1982 }
1972 if (fscki->xattr_nms != fscki->calc_xnms) { 1983 if (fscki->xattr_nms != fscki->calc_xnms) {
1973 ubifs_err("inode %lu has xattr names' size %u, but " 1984 ubifs_err("inode %lu has xattr names' size %u, but "
1974 "calculated names' size is %lld", 1985 "calculated names' size is %lld",
1975 fscki->inum, fscki->xattr_nms, 1986 (unsigned long)fscki->inum, fscki->xattr_nms,
1976 fscki->calc_xnms); 1987 fscki->calc_xnms);
1977 goto out_dump; 1988 goto out_dump;
1978 } 1989 }
@@ -1985,11 +1996,12 @@ out_dump:
1985 ino_key_init(c, &key, fscki->inum); 1996 ino_key_init(c, &key, fscki->inum);
1986 err = ubifs_lookup_level0(c, &key, &znode, &n); 1997 err = ubifs_lookup_level0(c, &key, &znode, &n);
1987 if (!err) { 1998 if (!err) {
1988 ubifs_err("inode %lu not found in index", fscki->inum); 1999 ubifs_err("inode %lu not found in index",
2000 (unsigned long)fscki->inum);
1989 return -ENOENT; 2001 return -ENOENT;
1990 } else if (err < 0) { 2002 } else if (err < 0) {
1991 ubifs_err("error %d while looking up inode %lu", 2003 ubifs_err("error %d while looking up inode %lu",
1992 err, fscki->inum); 2004 err, (unsigned long)fscki->inum);
1993 return err; 2005 return err;
1994 } 2006 }
1995 2007
@@ -2007,7 +2019,7 @@ out_dump:
2007 } 2019 }
2008 2020
2009 ubifs_msg("dump of the inode %lu sitting in LEB %d:%d", 2021 ubifs_msg("dump of the inode %lu sitting in LEB %d:%d",
2010 fscki->inum, zbr->lnum, zbr->offs); 2022 (unsigned long)fscki->inum, zbr->lnum, zbr->offs);
2011 dbg_dump_node(c, ino); 2023 dbg_dump_node(c, ino);
2012 kfree(ino); 2024 kfree(ino);
2013 return -EINVAL; 2025 return -EINVAL;
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 526c01ec8003..0422c98e1793 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -161,7 +161,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
161 return ERR_PTR(-EINVAL); 161 return ERR_PTR(-EINVAL);
162 } 162 }
163 ubifs_warn("running out of inode numbers (current %lu, max %d)", 163 ubifs_warn("running out of inode numbers (current %lu, max %d)",
164 c->highest_inum, INUM_WATERMARK); 164 (unsigned long)c->highest_inum, INUM_WATERMARK);
165 } 165 }
166 166
167 inode->i_ino = ++c->highest_inum; 167 inode->i_ino = ++c->highest_inum;
@@ -428,7 +428,8 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
428 dbg_gen("feed '%s', ino %llu, new f_pos %#x", 428 dbg_gen("feed '%s', ino %llu, new f_pos %#x",
429 dent->name, (unsigned long long)le64_to_cpu(dent->inum), 429 dent->name, (unsigned long long)le64_to_cpu(dent->inum),
430 key_hash_flash(c, &dent->key)); 430 key_hash_flash(c, &dent->key));
431 ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); 431 ubifs_assert(le64_to_cpu(dent->ch.sqnum) >
432 ubifs_inode(dir)->creat_sqnum);
432 433
433 nm.len = le16_to_cpu(dent->nlen); 434 nm.len = le16_to_cpu(dent->nlen);
434 over = filldir(dirent, dent->name, nm.len, file->f_pos, 435 over = filldir(dirent, dent->name, nm.len, file->f_pos,
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 51cf511d44d9..2624411d9758 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -72,7 +72,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
72 return err; 72 return err;
73 } 73 }
74 74
75 ubifs_assert(dn->ch.sqnum > ubifs_inode(inode)->creat_sqnum); 75 ubifs_assert(le64_to_cpu(dn->ch.sqnum) > ubifs_inode(inode)->creat_sqnum);
76 76
77 len = le32_to_cpu(dn->size); 77 len = le32_to_cpu(dn->size);
78 if (len <= 0 || len > UBIFS_BLOCK_SIZE) 78 if (len <= 0 || len > UBIFS_BLOCK_SIZE)
@@ -626,7 +626,7 @@ static int populate_page(struct ubifs_info *c, struct page *page,
626 626
627 dn = bu->buf + (bu->zbranch[nn].offs - offs); 627 dn = bu->buf + (bu->zbranch[nn].offs - offs);
628 628
629 ubifs_assert(dn->ch.sqnum > 629 ubifs_assert(le64_to_cpu(dn->ch.sqnum) >
630 ubifs_inode(inode)->creat_sqnum); 630 ubifs_inode(inode)->creat_sqnum);
631 631
632 len = le32_to_cpu(dn->size); 632 len = le32_to_cpu(dn->size);
@@ -691,32 +691,22 @@ out_err:
691/** 691/**
692 * ubifs_do_bulk_read - do bulk-read. 692 * ubifs_do_bulk_read - do bulk-read.
693 * @c: UBIFS file-system description object 693 * @c: UBIFS file-system description object
694 * @page1: first page 694 * @bu: bulk-read information
695 * @page1: first page to read
695 * 696 *
696 * This function returns %1 if the bulk-read is done, otherwise %0 is returned. 697 * This function returns %1 if the bulk-read is done, otherwise %0 is returned.
697 */ 698 */
698static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1) 699static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
700 struct page *page1)
699{ 701{
700 pgoff_t offset = page1->index, end_index; 702 pgoff_t offset = page1->index, end_index;
701 struct address_space *mapping = page1->mapping; 703 struct address_space *mapping = page1->mapping;
702 struct inode *inode = mapping->host; 704 struct inode *inode = mapping->host;
703 struct ubifs_inode *ui = ubifs_inode(inode); 705 struct ubifs_inode *ui = ubifs_inode(inode);
704 struct bu_info *bu;
705 int err, page_idx, page_cnt, ret = 0, n = 0; 706 int err, page_idx, page_cnt, ret = 0, n = 0;
707 int allocate = bu->buf ? 0 : 1;
706 loff_t isize; 708 loff_t isize;
707 709
708 bu = kmalloc(sizeof(struct bu_info), GFP_NOFS);
709 if (!bu)
710 return 0;
711
712 bu->buf_len = c->bulk_read_buf_size;
713 bu->buf = kmalloc(bu->buf_len, GFP_NOFS);
714 if (!bu->buf)
715 goto out_free;
716
717 data_key_init(c, &bu->key, inode->i_ino,
718 offset << UBIFS_BLOCKS_PER_PAGE_SHIFT);
719
720 err = ubifs_tnc_get_bu_keys(c, bu); 710 err = ubifs_tnc_get_bu_keys(c, bu);
721 if (err) 711 if (err)
722 goto out_warn; 712 goto out_warn;
@@ -735,12 +725,25 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1)
735 * together. If all the pages were like this, bulk-read would 725 * together. If all the pages were like this, bulk-read would
736 * reduce performance, so we turn it off for a while. 726 * reduce performance, so we turn it off for a while.
737 */ 727 */
738 ui->read_in_a_row = 0; 728 goto out_bu_off;
739 ui->bulk_read = 0;
740 goto out_free;
741 } 729 }
742 730
743 if (bu->cnt) { 731 if (bu->cnt) {
732 if (allocate) {
733 /*
734 * Allocate bulk-read buffer depending on how many data
735 * nodes we are going to read.
736 */
737 bu->buf_len = bu->zbranch[bu->cnt - 1].offs +
738 bu->zbranch[bu->cnt - 1].len -
739 bu->zbranch[0].offs;
740 ubifs_assert(bu->buf_len > 0);
741 ubifs_assert(bu->buf_len <= c->leb_size);
742 bu->buf = kmalloc(bu->buf_len, GFP_NOFS | __GFP_NOWARN);
743 if (!bu->buf)
744 goto out_bu_off;
745 }
746
744 err = ubifs_tnc_bulk_read(c, bu); 747 err = ubifs_tnc_bulk_read(c, bu);
745 if (err) 748 if (err)
746 goto out_warn; 749 goto out_warn;
@@ -779,13 +782,17 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1)
779 ui->last_page_read = offset + page_idx - 1; 782 ui->last_page_read = offset + page_idx - 1;
780 783
781out_free: 784out_free:
782 kfree(bu->buf); 785 if (allocate)
783 kfree(bu); 786 kfree(bu->buf);
784 return ret; 787 return ret;
785 788
786out_warn: 789out_warn:
787 ubifs_warn("ignoring error %d and skipping bulk-read", err); 790 ubifs_warn("ignoring error %d and skipping bulk-read", err);
788 goto out_free; 791 goto out_free;
792
793out_bu_off:
794 ui->read_in_a_row = ui->bulk_read = 0;
795 goto out_free;
789} 796}
790 797
791/** 798/**
@@ -803,18 +810,20 @@ static int ubifs_bulk_read(struct page *page)
803 struct ubifs_info *c = inode->i_sb->s_fs_info; 810 struct ubifs_info *c = inode->i_sb->s_fs_info;
804 struct ubifs_inode *ui = ubifs_inode(inode); 811 struct ubifs_inode *ui = ubifs_inode(inode);
805 pgoff_t index = page->index, last_page_read = ui->last_page_read; 812 pgoff_t index = page->index, last_page_read = ui->last_page_read;
806 int ret = 0; 813 struct bu_info *bu;
814 int err = 0, allocated = 0;
807 815
808 ui->last_page_read = index; 816 ui->last_page_read = index;
809
810 if (!c->bulk_read) 817 if (!c->bulk_read)
811 return 0; 818 return 0;
819
812 /* 820 /*
813 * Bulk-read is protected by ui_mutex, but it is an optimization, so 821 * Bulk-read is protected by @ui->ui_mutex, but it is an optimization,
814 * don't bother if we cannot lock the mutex. 822 * so don't bother if we cannot lock the mutex.
815 */ 823 */
816 if (!mutex_trylock(&ui->ui_mutex)) 824 if (!mutex_trylock(&ui->ui_mutex))
817 return 0; 825 return 0;
826
818 if (index != last_page_read + 1) { 827 if (index != last_page_read + 1) {
819 /* Turn off bulk-read if we stop reading sequentially */ 828 /* Turn off bulk-read if we stop reading sequentially */
820 ui->read_in_a_row = 1; 829 ui->read_in_a_row = 1;
@@ -822,6 +831,7 @@ static int ubifs_bulk_read(struct page *page)
822 ui->bulk_read = 0; 831 ui->bulk_read = 0;
823 goto out_unlock; 832 goto out_unlock;
824 } 833 }
834
825 if (!ui->bulk_read) { 835 if (!ui->bulk_read) {
826 ui->read_in_a_row += 1; 836 ui->read_in_a_row += 1;
827 if (ui->read_in_a_row < 3) 837 if (ui->read_in_a_row < 3)
@@ -829,10 +839,35 @@ static int ubifs_bulk_read(struct page *page)
829 /* Three reads in a row, so switch on bulk-read */ 839 /* Three reads in a row, so switch on bulk-read */
830 ui->bulk_read = 1; 840 ui->bulk_read = 1;
831 } 841 }
832 ret = ubifs_do_bulk_read(c, page); 842
843 /*
844 * If possible, try to use pre-allocated bulk-read information, which
845 * is protected by @c->bu_mutex.
846 */
847 if (mutex_trylock(&c->bu_mutex))
848 bu = &c->bu;
849 else {
850 bu = kmalloc(sizeof(struct bu_info), GFP_NOFS | __GFP_NOWARN);
851 if (!bu)
852 goto out_unlock;
853
854 bu->buf = NULL;
855 allocated = 1;
856 }
857
858 bu->buf_len = c->max_bu_buf_len;
859 data_key_init(c, &bu->key, inode->i_ino,
860 page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT);
861 err = ubifs_do_bulk_read(c, bu, page);
862
863 if (!allocated)
864 mutex_unlock(&c->bu_mutex);
865 else
866 kfree(bu);
867
833out_unlock: 868out_unlock:
834 mutex_unlock(&ui->ui_mutex); 869 mutex_unlock(&ui->ui_mutex);
835 return ret; 870 return err;
836} 871}
837 872
838static int ubifs_readpage(struct file *file, struct page *page) 873static int ubifs_readpage(struct file *file, struct page *page)
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 22993f867d19..f91b745908ea 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -690,8 +690,9 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
690 int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; 690 int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR;
691 struct ubifs_inode *ui = ubifs_inode(inode); 691 struct ubifs_inode *ui = ubifs_inode(inode);
692 692
693 dbg_jnl("ino %lu, blk %u, len %d, key %s", key_inum(c, key), 693 dbg_jnl("ino %lu, blk %u, len %d, key %s",
694 key_block(c, key), len, DBGKEY(key)); 694 (unsigned long)key_inum(c, key), key_block(c, key), len,
695 DBGKEY(key));
695 ubifs_assert(len <= UBIFS_BLOCK_SIZE); 696 ubifs_assert(len <= UBIFS_BLOCK_SIZE);
696 697
697 data = kmalloc(dlen, GFP_NOFS); 698 data = kmalloc(dlen, GFP_NOFS);
@@ -1128,7 +1129,8 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
1128 ino_t inum = inode->i_ino; 1129 ino_t inum = inode->i_ino;
1129 unsigned int blk; 1130 unsigned int blk;
1130 1131
1131 dbg_jnl("ino %lu, size %lld -> %lld", inum, old_size, new_size); 1132 dbg_jnl("ino %lu, size %lld -> %lld",
1133 (unsigned long)inum, old_size, new_size);
1132 ubifs_assert(!ui->data_len); 1134 ubifs_assert(!ui->data_len);
1133 ubifs_assert(S_ISREG(inode->i_mode)); 1135 ubifs_assert(S_ISREG(inode->i_mode));
1134 ubifs_assert(mutex_is_locked(&ui->ui_mutex)); 1136 ubifs_assert(mutex_is_locked(&ui->ui_mutex));
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index 9ee65086f627..3f1f16bc25c9 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -345,7 +345,7 @@ static inline int key_type_flash(const struct ubifs_info *c, const void *k)
345{ 345{
346 const union ubifs_key *key = k; 346 const union ubifs_key *key = k;
347 347
348 return le32_to_cpu(key->u32[1]) >> UBIFS_S_KEY_BLOCK_BITS; 348 return le32_to_cpu(key->j32[1]) >> UBIFS_S_KEY_BLOCK_BITS;
349} 349}
350 350
351/** 351/**
@@ -416,7 +416,7 @@ static inline unsigned int key_block_flash(const struct ubifs_info *c,
416{ 416{
417 const union ubifs_key *key = k; 417 const union ubifs_key *key = k;
418 418
419 return le32_to_cpu(key->u32[1]) & UBIFS_S_KEY_BLOCK_MASK; 419 return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_BLOCK_MASK;
420} 420}
421 421
422/** 422/**
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index eed5a0025d63..a41434b42785 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -571,8 +571,6 @@ static struct ubifs_pnode *next_pnode(struct ubifs_info *c,
571 /* We assume here that LEB zero is never an LPT LEB */ 571 /* We assume here that LEB zero is never an LPT LEB */
572 if (nnode->nbranch[iip].lnum) 572 if (nnode->nbranch[iip].lnum)
573 return ubifs_get_pnode(c, nnode, iip); 573 return ubifs_get_pnode(c, nnode, iip);
574 else
575 return NULL;
576 } 574 }
577 575
578 /* Go up while can't go right */ 576 /* Go up while can't go right */
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 02d3462f4d3e..9bd5a43d4526 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -105,7 +105,7 @@ int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
105 list_add_tail(&orphan->list, &c->orph_list); 105 list_add_tail(&orphan->list, &c->orph_list);
106 list_add_tail(&orphan->new_list, &c->orph_new); 106 list_add_tail(&orphan->new_list, &c->orph_new);
107 spin_unlock(&c->orphan_lock); 107 spin_unlock(&c->orphan_lock);
108 dbg_gen("ino %lu", inum); 108 dbg_gen("ino %lu", (unsigned long)inum);
109 return 0; 109 return 0;
110} 110}
111 111
@@ -132,14 +132,16 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
132 else { 132 else {
133 if (o->dnext) { 133 if (o->dnext) {
134 spin_unlock(&c->orphan_lock); 134 spin_unlock(&c->orphan_lock);
135 dbg_gen("deleted twice ino %lu", inum); 135 dbg_gen("deleted twice ino %lu",
136 (unsigned long)inum);
136 return; 137 return;
137 } 138 }
138 if (o->cnext) { 139 if (o->cnext) {
139 o->dnext = c->orph_dnext; 140 o->dnext = c->orph_dnext;
140 c->orph_dnext = o; 141 c->orph_dnext = o;
141 spin_unlock(&c->orphan_lock); 142 spin_unlock(&c->orphan_lock);
142 dbg_gen("delete later ino %lu", inum); 143 dbg_gen("delete later ino %lu",
144 (unsigned long)inum);
143 return; 145 return;
144 } 146 }
145 rb_erase(p, &c->orph_tree); 147 rb_erase(p, &c->orph_tree);
@@ -151,12 +153,12 @@ void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
151 } 153 }
152 spin_unlock(&c->orphan_lock); 154 spin_unlock(&c->orphan_lock);
153 kfree(o); 155 kfree(o);
154 dbg_gen("inum %lu", inum); 156 dbg_gen("inum %lu", (unsigned long)inum);
155 return; 157 return;
156 } 158 }
157 } 159 }
158 spin_unlock(&c->orphan_lock); 160 spin_unlock(&c->orphan_lock);
159 dbg_err("missing orphan ino %lu", inum); 161 dbg_err("missing orphan ino %lu", (unsigned long)inum);
160 dbg_dump_stack(); 162 dbg_dump_stack();
161} 163}
162 164
@@ -448,7 +450,7 @@ static void erase_deleted(struct ubifs_info *c)
448 rb_erase(&orphan->rb, &c->orph_tree); 450 rb_erase(&orphan->rb, &c->orph_tree);
449 list_del(&orphan->list); 451 list_del(&orphan->list);
450 c->tot_orphans -= 1; 452 c->tot_orphans -= 1;
451 dbg_gen("deleting orphan ino %lu", orphan->inum); 453 dbg_gen("deleting orphan ino %lu", (unsigned long)orphan->inum);
452 kfree(orphan); 454 kfree(orphan);
453 } 455 }
454 c->orph_dnext = NULL; 456 c->orph_dnext = NULL;
@@ -536,8 +538,8 @@ static int insert_dead_orphan(struct ubifs_info *c, ino_t inum)
536 list_add_tail(&orphan->list, &c->orph_list); 538 list_add_tail(&orphan->list, &c->orph_list);
537 orphan->dnext = c->orph_dnext; 539 orphan->dnext = c->orph_dnext;
538 c->orph_dnext = orphan; 540 c->orph_dnext = orphan;
539 dbg_mnt("ino %lu, new %d, tot %d", 541 dbg_mnt("ino %lu, new %d, tot %d", (unsigned long)inum,
540 inum, c->new_orphans, c->tot_orphans); 542 c->new_orphans, c->tot_orphans);
541 return 0; 543 return 0;
542} 544}
543 545
@@ -609,7 +611,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
609 n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3; 611 n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3;
610 for (i = 0; i < n; i++) { 612 for (i = 0; i < n; i++) {
611 inum = le64_to_cpu(orph->inos[i]); 613 inum = le64_to_cpu(orph->inos[i]);
612 dbg_rcvry("deleting orphaned inode %lu", inum); 614 dbg_rcvry("deleting orphaned inode %lu",
615 (unsigned long)inum);
613 err = ubifs_tnc_remove_ino(c, inum); 616 err = ubifs_tnc_remove_ino(c, inum);
614 if (err) 617 if (err)
615 return err; 618 return err;
@@ -840,8 +843,8 @@ static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr,
840 if (inum != ci->last_ino) { 843 if (inum != ci->last_ino) {
841 /* Lowest node type is the inode node, so it comes first */ 844 /* Lowest node type is the inode node, so it comes first */
842 if (key_type(c, &zbr->key) != UBIFS_INO_KEY) 845 if (key_type(c, &zbr->key) != UBIFS_INO_KEY)
843 ubifs_err("found orphan node ino %lu, type %d", inum, 846 ubifs_err("found orphan node ino %lu, type %d",
844 key_type(c, &zbr->key)); 847 (unsigned long)inum, key_type(c, &zbr->key));
845 ci->last_ino = inum; 848 ci->last_ino = inum;
846 ci->tot_inos += 1; 849 ci->tot_inos += 1;
847 err = ubifs_tnc_read_node(c, zbr, ci->node); 850 err = ubifs_tnc_read_node(c, zbr, ci->node);
@@ -853,7 +856,8 @@ static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr,
853 /* Must be recorded as an orphan */ 856 /* Must be recorded as an orphan */
854 if (!dbg_find_check_orphan(&ci->root, inum) && 857 if (!dbg_find_check_orphan(&ci->root, inum) &&
855 !dbg_find_orphan(c, inum)) { 858 !dbg_find_orphan(c, inum)) {
856 ubifs_err("missing orphan, ino %lu", inum); 859 ubifs_err("missing orphan, ino %lu",
860 (unsigned long)inum);
857 ci->missing += 1; 861 ci->missing += 1;
858 } 862 }
859 } 863 }
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 77d26c141cf6..90acac603e63 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -168,12 +168,12 @@ static int write_rcvrd_mst_node(struct ubifs_info *c,
168 struct ubifs_mst_node *mst) 168 struct ubifs_mst_node *mst)
169{ 169{
170 int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz; 170 int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz;
171 uint32_t save_flags; 171 __le32 save_flags;
172 172
173 dbg_rcvry("recovery"); 173 dbg_rcvry("recovery");
174 174
175 save_flags = mst->flags; 175 save_flags = mst->flags;
176 mst->flags = cpu_to_le32(le32_to_cpu(mst->flags) | UBIFS_MST_RCVRY); 176 mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY);
177 177
178 ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); 178 ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1);
179 err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); 179 err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM);
@@ -1435,13 +1435,13 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1435 err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); 1435 err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
1436 if (err) 1436 if (err)
1437 goto out; 1437 goto out;
1438 dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", e->inum, lnum, offs, 1438 dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ",
1439 i_size, e->d_size); 1439 (unsigned long)e->inum, lnum, offs, i_size, e->d_size);
1440 return 0; 1440 return 0;
1441 1441
1442out: 1442out:
1443 ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d", 1443 ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d",
1444 e->inum, e->i_size, e->d_size, err); 1444 (unsigned long)e->inum, e->i_size, e->d_size, err);
1445 return err; 1445 return err;
1446} 1446}
1447 1447
@@ -1472,7 +1472,8 @@ int ubifs_recover_size(struct ubifs_info *c)
1472 return err; 1472 return err;
1473 if (err == -ENOENT) { 1473 if (err == -ENOENT) {
1474 /* Remove data nodes that have no inode */ 1474 /* Remove data nodes that have no inode */
1475 dbg_rcvry("removing ino %lu", e->inum); 1475 dbg_rcvry("removing ino %lu",
1476 (unsigned long)e->inum);
1476 err = ubifs_tnc_remove_ino(c, e->inum); 1477 err = ubifs_tnc_remove_ino(c, e->inum);
1477 if (err) 1478 if (err)
1478 return err; 1479 return err;
@@ -1493,8 +1494,8 @@ int ubifs_recover_size(struct ubifs_info *c)
1493 return PTR_ERR(inode); 1494 return PTR_ERR(inode);
1494 if (inode->i_size < e->d_size) { 1495 if (inode->i_size < e->d_size) {
1495 dbg_rcvry("ino %lu size %lld -> %lld", 1496 dbg_rcvry("ino %lu size %lld -> %lld",
1496 e->inum, e->d_size, 1497 (unsigned long)e->inum,
1497 inode->i_size); 1498 e->d_size, inode->i_size);
1498 inode->i_size = e->d_size; 1499 inode->i_size = e->d_size;
1499 ubifs_inode(inode)->ui_size = e->d_size; 1500 ubifs_inode(inode)->ui_size = e->d_size;
1500 e->inode = inode; 1501 e->inode = inode;
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 7399692af859..21f7d047c306 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -1065,7 +1065,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
1065 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1065 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1066 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " 1066 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
1067 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1067 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
1068 c->highest_inum); 1068 (unsigned long)c->highest_inum);
1069out: 1069out:
1070 destroy_replay_tree(c); 1070 destroy_replay_tree(c);
1071 destroy_bud_list(c); 1071 destroy_bud_list(c);
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 2bf753b38889..0f392351dc5a 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -81,6 +81,7 @@ static int create_default_filesystem(struct ubifs_info *c)
81 int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0; 81 int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0;
82 int min_leb_cnt = UBIFS_MIN_LEB_CNT; 82 int min_leb_cnt = UBIFS_MIN_LEB_CNT;
83 uint64_t tmp64, main_bytes; 83 uint64_t tmp64, main_bytes;
84 __le64 tmp_le64;
84 85
85 /* Some functions called from here depend on the @c->key_len filed */ 86 /* Some functions called from here depend on the @c->key_len filed */
86 c->key_len = UBIFS_SK_LEN; 87 c->key_len = UBIFS_SK_LEN;
@@ -295,10 +296,10 @@ static int create_default_filesystem(struct ubifs_info *c)
295 ino->ch.node_type = UBIFS_INO_NODE; 296 ino->ch.node_type = UBIFS_INO_NODE;
296 ino->creat_sqnum = cpu_to_le64(++c->max_sqnum); 297 ino->creat_sqnum = cpu_to_le64(++c->max_sqnum);
297 ino->nlink = cpu_to_le32(2); 298 ino->nlink = cpu_to_le32(2);
298 tmp = cpu_to_le64(CURRENT_TIME_SEC.tv_sec); 299 tmp_le64 = cpu_to_le64(CURRENT_TIME_SEC.tv_sec);
299 ino->atime_sec = tmp; 300 ino->atime_sec = tmp_le64;
300 ino->ctime_sec = tmp; 301 ino->ctime_sec = tmp_le64;
301 ino->mtime_sec = tmp; 302 ino->mtime_sec = tmp_le64;
302 ino->atime_nsec = 0; 303 ino->atime_nsec = 0;
303 ino->ctime_nsec = 0; 304 ino->ctime_nsec = 0;
304 ino->mtime_nsec = 0; 305 ino->mtime_nsec = 0;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 8780efbf40ac..d80b2aef42b6 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -36,6 +36,12 @@
36#include <linux/mount.h> 36#include <linux/mount.h>
37#include "ubifs.h" 37#include "ubifs.h"
38 38
39/*
40 * Maximum amount of memory we may 'kmalloc()' without worrying that we are
41 * allocating too much.
42 */
43#define UBIFS_KMALLOC_OK (128*1024)
44
39/* Slab cache for UBIFS inodes */ 45/* Slab cache for UBIFS inodes */
40struct kmem_cache *ubifs_inode_slab; 46struct kmem_cache *ubifs_inode_slab;
41 47
@@ -561,18 +567,11 @@ static int init_constants_early(struct ubifs_info *c)
561 * calculations when reporting free space. 567 * calculations when reporting free space.
562 */ 568 */
563 c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; 569 c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ;
564 /* Buffer size for bulk-reads */
565 c->bulk_read_buf_size = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ;
566 if (c->bulk_read_buf_size > c->leb_size)
567 c->bulk_read_buf_size = c->leb_size;
568 if (c->bulk_read_buf_size > 128 * 1024) {
569 /* Check if we can kmalloc more than 128KiB */
570 void *try = kmalloc(c->bulk_read_buf_size, GFP_KERNEL);
571 570
572 kfree(try); 571 /* Buffer size for bulk-reads */
573 if (!try) 572 c->max_bu_buf_len = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ;
574 c->bulk_read_buf_size = 128 * 1024; 573 if (c->max_bu_buf_len > c->leb_size)
575 } 574 c->max_bu_buf_len = c->leb_size;
576 return 0; 575 return 0;
577} 576}
578 577
@@ -992,6 +991,34 @@ static void destroy_journal(struct ubifs_info *c)
992} 991}
993 992
994/** 993/**
994 * bu_init - initialize bulk-read information.
995 * @c: UBIFS file-system description object
996 */
997static void bu_init(struct ubifs_info *c)
998{
999 ubifs_assert(c->bulk_read == 1);
1000
1001 if (c->bu.buf)
1002 return; /* Already initialized */
1003
1004again:
1005 c->bu.buf = kmalloc(c->max_bu_buf_len, GFP_KERNEL | __GFP_NOWARN);
1006 if (!c->bu.buf) {
1007 if (c->max_bu_buf_len > UBIFS_KMALLOC_OK) {
1008 c->max_bu_buf_len = UBIFS_KMALLOC_OK;
1009 goto again;
1010 }
1011
1012 /* Just disable bulk-read */
1013 ubifs_warn("Cannot allocate %d bytes of memory for bulk-read, "
1014 "disabling it", c->max_bu_buf_len);
1015 c->mount_opts.bulk_read = 1;
1016 c->bulk_read = 0;
1017 return;
1018 }
1019}
1020
1021/**
995 * mount_ubifs - mount UBIFS file-system. 1022 * mount_ubifs - mount UBIFS file-system.
996 * @c: UBIFS file-system description object 1023 * @c: UBIFS file-system description object
997 * 1024 *
@@ -1059,6 +1086,13 @@ static int mount_ubifs(struct ubifs_info *c)
1059 goto out_free; 1086 goto out_free;
1060 } 1087 }
1061 1088
1089 if (c->bulk_read == 1)
1090 bu_init(c);
1091
1092 /*
1093 * We have to check all CRCs, even for data nodes, when we mount the FS
1094 * (specifically, when we are replaying).
1095 */
1062 c->always_chk_crc = 1; 1096 c->always_chk_crc = 1;
1063 1097
1064 err = ubifs_read_superblock(c); 1098 err = ubifs_read_superblock(c);
@@ -1289,6 +1323,7 @@ out_cbuf:
1289out_dereg: 1323out_dereg:
1290 dbg_failure_mode_deregistration(c); 1324 dbg_failure_mode_deregistration(c);
1291out_free: 1325out_free:
1326 kfree(c->bu.buf);
1292 vfree(c->ileb_buf); 1327 vfree(c->ileb_buf);
1293 vfree(c->sbuf); 1328 vfree(c->sbuf);
1294 kfree(c->bottom_up_buf); 1329 kfree(c->bottom_up_buf);
@@ -1325,10 +1360,11 @@ static void ubifs_umount(struct ubifs_info *c)
1325 kfree(c->cbuf); 1360 kfree(c->cbuf);
1326 kfree(c->rcvrd_mst_node); 1361 kfree(c->rcvrd_mst_node);
1327 kfree(c->mst_node); 1362 kfree(c->mst_node);
1363 kfree(c->bu.buf);
1364 vfree(c->ileb_buf);
1328 vfree(c->sbuf); 1365 vfree(c->sbuf);
1329 kfree(c->bottom_up_buf); 1366 kfree(c->bottom_up_buf);
1330 UBIFS_DBG(vfree(c->dbg_buf)); 1367 UBIFS_DBG(vfree(c->dbg_buf));
1331 vfree(c->ileb_buf);
1332 dbg_failure_mode_deregistration(c); 1368 dbg_failure_mode_deregistration(c);
1333} 1369}
1334 1370
@@ -1626,6 +1662,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
1626 ubifs_err("invalid or unknown remount parameter"); 1662 ubifs_err("invalid or unknown remount parameter");
1627 return err; 1663 return err;
1628 } 1664 }
1665
1629 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { 1666 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
1630 err = ubifs_remount_rw(c); 1667 err = ubifs_remount_rw(c);
1631 if (err) 1668 if (err)
@@ -1633,6 +1670,14 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
1633 } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) 1670 } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))
1634 ubifs_remount_ro(c); 1671 ubifs_remount_ro(c);
1635 1672
1673 if (c->bulk_read == 1)
1674 bu_init(c);
1675 else {
1676 dbg_gen("disable bulk-read");
1677 kfree(c->bu.buf);
1678 c->bu.buf = NULL;
1679 }
1680
1636 return 0; 1681 return 0;
1637} 1682}
1638 1683
@@ -1723,6 +1768,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1723 mutex_init(&c->log_mutex); 1768 mutex_init(&c->log_mutex);
1724 mutex_init(&c->mst_mutex); 1769 mutex_init(&c->mst_mutex);
1725 mutex_init(&c->umount_mutex); 1770 mutex_init(&c->umount_mutex);
1771 mutex_init(&c->bu_mutex);
1726 init_waitqueue_head(&c->cmt_wq); 1772 init_waitqueue_head(&c->cmt_wq);
1727 c->buds = RB_ROOT; 1773 c->buds = RB_ROOT;
1728 c->old_idx = RB_ROOT; 1774 c->old_idx = RB_ROOT;
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index d27fd918b9c9..6eef5344a145 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -1501,7 +1501,12 @@ out:
1501 * @bu: bulk-read parameters and results 1501 * @bu: bulk-read parameters and results
1502 * 1502 *
1503 * Lookup consecutive data node keys for the same inode that reside 1503 * Lookup consecutive data node keys for the same inode that reside
1504 * consecutively in the same LEB. 1504 * consecutively in the same LEB. This function returns zero in case of success
1505 * and a negative error code in case of failure.
1506 *
1507 * Note, if the bulk-read buffer length (@bu->buf_len) is known, this function
1508 * makes sure bulk-read nodes fit the buffer. Otherwise, this function prepares
1509 * maxumum possible amount of nodes for bulk-read.
1505 */ 1510 */
1506int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu) 1511int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu)
1507{ 1512{
@@ -2677,7 +2682,7 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum)
2677 struct ubifs_dent_node *xent, *pxent = NULL; 2682 struct ubifs_dent_node *xent, *pxent = NULL;
2678 struct qstr nm = { .name = NULL }; 2683 struct qstr nm = { .name = NULL };
2679 2684
2680 dbg_tnc("ino %lu", inum); 2685 dbg_tnc("ino %lu", (unsigned long)inum);
2681 2686
2682 /* 2687 /*
2683 * Walk all extended attribute entries and remove them together with 2688 * Walk all extended attribute entries and remove them together with
@@ -2697,7 +2702,8 @@ int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum)
2697 } 2702 }
2698 2703
2699 xattr_inum = le64_to_cpu(xent->inum); 2704 xattr_inum = le64_to_cpu(xent->inum);
2700 dbg_tnc("xent '%s', ino %lu", xent->name, xattr_inum); 2705 dbg_tnc("xent '%s', ino %lu", xent->name,
2706 (unsigned long)xattr_inum);
2701 2707
2702 nm.name = xent->name; 2708 nm.name = xent->name;
2703 nm.len = le16_to_cpu(xent->nlen); 2709 nm.len = le16_to_cpu(xent->nlen);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index a7bd32fa15b9..46b172560a06 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -753,7 +753,7 @@ struct ubifs_znode {
753}; 753};
754 754
755/** 755/**
756 * struct bu_info - bulk-read information 756 * struct bu_info - bulk-read information.
757 * @key: first data node key 757 * @key: first data node key
758 * @zbranch: zbranches of data nodes to bulk read 758 * @zbranch: zbranches of data nodes to bulk read
759 * @buf: buffer to read into 759 * @buf: buffer to read into
@@ -969,7 +969,10 @@ struct ubifs_mount_opts {
969 * @mst_node: master node 969 * @mst_node: master node
970 * @mst_offs: offset of valid master node 970 * @mst_offs: offset of valid master node
971 * @mst_mutex: protects the master node area, @mst_node, and @mst_offs 971 * @mst_mutex: protects the master node area, @mst_node, and @mst_offs
972 * @bulk_read_buf_size: buffer size for bulk-reads 972 *
973 * @max_bu_buf_len: maximum bulk-read buffer length
974 * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu
975 * @bu: pre-allocated bulk-read information
973 * 976 *
974 * @log_lebs: number of logical eraseblocks in the log 977 * @log_lebs: number of logical eraseblocks in the log
975 * @log_bytes: log size in bytes 978 * @log_bytes: log size in bytes
@@ -1217,7 +1220,10 @@ struct ubifs_info {
1217 struct ubifs_mst_node *mst_node; 1220 struct ubifs_mst_node *mst_node;
1218 int mst_offs; 1221 int mst_offs;
1219 struct mutex mst_mutex; 1222 struct mutex mst_mutex;
1220 int bulk_read_buf_size; 1223
1224 int max_bu_buf_len;
1225 struct mutex bu_mutex;
1226 struct bu_info bu;
1221 1227
1222 int log_lebs; 1228 int log_lebs;
1223 long long log_bytes; 1229 long long log_bytes;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 6e74b117aaf0..30ebde490f7f 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -106,6 +106,7 @@ void udf_clear_inode(struct inode *inode)
106 udf_truncate_tail_extent(inode); 106 udf_truncate_tail_extent(inode);
107 unlock_kernel(); 107 unlock_kernel();
108 write_inode_now(inode, 0); 108 write_inode_now(inode, 0);
109 invalidate_inode_buffers(inode);
109 } 110 }
110 iinfo = UDF_I(inode); 111 iinfo = UDF_I(inode);
111 kfree(iinfo->i_ext.i_data); 112 kfree(iinfo->i_ext.i_data);
diff --git a/fs/vfat/Makefile b/fs/vfat/Makefile
deleted file mode 100644
index 40f2798a4f08..000000000000
--- a/fs/vfat/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
1#
2# Makefile for the linux vfat-filesystem routines.
3#
4
5obj-$(CONFIG_VFAT_FS) += vfat.o
6
7vfat-y := namei.o
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 9e561a9cefca..a11a8390bf6c 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1566,11 +1566,14 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1566 int nmap, error, w, count, c, got, i, mapi; 1566 int nmap, error, w, count, c, got, i, mapi;
1567 xfs_trans_t *tp; 1567 xfs_trans_t *tp;
1568 xfs_mount_t *mp; 1568 xfs_mount_t *mp;
1569 xfs_drfsbno_t nblks;
1569 1570
1570 dp = args->dp; 1571 dp = args->dp;
1571 mp = dp->i_mount; 1572 mp = dp->i_mount;
1572 w = args->whichfork; 1573 w = args->whichfork;
1573 tp = args->trans; 1574 tp = args->trans;
1575 nblks = dp->i_d.di_nblocks;
1576
1574 /* 1577 /*
1575 * For new directories adjust the file offset and block count. 1578 * For new directories adjust the file offset and block count.
1576 */ 1579 */
@@ -1647,6 +1650,8 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1647 } 1650 }
1648 if (mapp != &map) 1651 if (mapp != &map)
1649 kmem_free(mapp); 1652 kmem_free(mapp);
1653 /* account for newly allocated blocks in reserved blocks total */
1654 args->total -= dp->i_d.di_nblocks - nblks;
1650 *new_blkno = (xfs_dablk_t)bno; 1655 *new_blkno = (xfs_dablk_t)bno;
1651 return 0; 1656 return 0;
1652} 1657}
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 80e0dc51361c..1afb12278b8d 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -525,11 +525,13 @@ xfs_dir2_grow_inode(
525 xfs_mount_t *mp; 525 xfs_mount_t *mp;
526 int nmap; /* number of bmap entries */ 526 int nmap; /* number of bmap entries */
527 xfs_trans_t *tp; 527 xfs_trans_t *tp;
528 xfs_drfsbno_t nblks;
528 529
529 xfs_dir2_trace_args_s("grow_inode", args, space); 530 xfs_dir2_trace_args_s("grow_inode", args, space);
530 dp = args->dp; 531 dp = args->dp;
531 tp = args->trans; 532 tp = args->trans;
532 mp = dp->i_mount; 533 mp = dp->i_mount;
534 nblks = dp->i_d.di_nblocks;
533 /* 535 /*
534 * Set lowest possible block in the space requested. 536 * Set lowest possible block in the space requested.
535 */ 537 */
@@ -622,7 +624,11 @@ xfs_dir2_grow_inode(
622 */ 624 */
623 if (mapp != &map) 625 if (mapp != &map)
624 kmem_free(mapp); 626 kmem_free(mapp);
627
628 /* account for newly allocated blocks in reserved blocks total */
629 args->total -= dp->i_d.di_nblocks - nblks;
625 *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); 630 *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
631
626 /* 632 /*
627 * Update file's size if this is the data space and it grew. 633 * Update file's size if this is the data space and it grew.
628 */ 634 */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index dbd9cef852ec..a391b955df01 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1414,7 +1414,7 @@ xfs_itruncate_start(
1414 mp = ip->i_mount; 1414 mp = ip->i_mount;
1415 1415
1416 /* wait for the completion of any pending DIOs */ 1416 /* wait for the completion of any pending DIOs */
1417 if (new_size < ip->i_size) 1417 if (new_size == 0 || new_size < ip->i_size)
1418 vn_iowait(ip); 1418 vn_iowait(ip);
1419 1419
1420 /* 1420 /*
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 0b02c6443551..3608a0f0a5f6 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -563,6 +563,11 @@ xfs_log_mount(
563 } 563 }
564 564
565 mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); 565 mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
566 if (!mp->m_log) {
567 cmn_err(CE_WARN, "XFS: Log allocation failed: No memory!");
568 error = ENOMEM;
569 goto out;
570 }
566 571
567 /* 572 /*
568 * Initialize the AIL now we have a log. 573 * Initialize the AIL now we have a log.
@@ -601,6 +606,7 @@ xfs_log_mount(
601 return 0; 606 return 0;
602error: 607error:
603 xfs_log_unmount_dealloc(mp); 608 xfs_log_unmount_dealloc(mp);
609out:
604 return error; 610 return error;
605} /* xfs_log_mount */ 611} /* xfs_log_mount */
606 612
@@ -1217,7 +1223,9 @@ xlog_alloc_log(xfs_mount_t *mp,
1217 int i; 1223 int i;
1218 int iclogsize; 1224 int iclogsize;
1219 1225
1220 log = (xlog_t *)kmem_zalloc(sizeof(xlog_t), KM_SLEEP); 1226 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
1227 if (!log)
1228 return NULL;
1221 1229
1222 log->l_mp = mp; 1230 log->l_mp = mp;
1223 log->l_targ = log_target; 1231 log->l_targ = log_target;
@@ -1249,6 +1257,8 @@ xlog_alloc_log(xfs_mount_t *mp,
1249 xlog_get_iclog_buffer_size(mp, log); 1257 xlog_get_iclog_buffer_size(mp, log);
1250 1258
1251 bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); 1259 bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
1260 if (!bp)
1261 goto out_free_log;
1252 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); 1262 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1253 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb); 1263 XFS_BUF_SET_BDSTRAT_FUNC(bp, xlog_bdstrat_cb);
1254 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1264 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
@@ -1275,13 +1285,17 @@ xlog_alloc_log(xfs_mount_t *mp,
1275 iclogsize = log->l_iclog_size; 1285 iclogsize = log->l_iclog_size;
1276 ASSERT(log->l_iclog_size >= 4096); 1286 ASSERT(log->l_iclog_size >= 4096);
1277 for (i=0; i < log->l_iclog_bufs; i++) { 1287 for (i=0; i < log->l_iclog_bufs; i++) {
1278 *iclogp = (xlog_in_core_t *) 1288 *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL);
1279 kmem_zalloc(sizeof(xlog_in_core_t), KM_SLEEP); 1289 if (!*iclogp)
1290 goto out_free_iclog;
1291
1280 iclog = *iclogp; 1292 iclog = *iclogp;
1281 iclog->ic_prev = prev_iclog; 1293 iclog->ic_prev = prev_iclog;
1282 prev_iclog = iclog; 1294 prev_iclog = iclog;
1283 1295
1284 bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp); 1296 bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
1297 if (!bp)
1298 goto out_free_iclog;
1285 if (!XFS_BUF_CPSEMA(bp)) 1299 if (!XFS_BUF_CPSEMA(bp))
1286 ASSERT(0); 1300 ASSERT(0);
1287 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); 1301 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
@@ -1323,6 +1337,25 @@ xlog_alloc_log(xfs_mount_t *mp,
1323 log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ 1337 log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */
1324 1338
1325 return log; 1339 return log;
1340
1341out_free_iclog:
1342 for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
1343 prev_iclog = iclog->ic_next;
1344 if (iclog->ic_bp) {
1345 sv_destroy(&iclog->ic_force_wait);
1346 sv_destroy(&iclog->ic_write_wait);
1347 xfs_buf_free(iclog->ic_bp);
1348 xlog_trace_iclog_dealloc(iclog);
1349 }
1350 kmem_free(iclog);
1351 }
1352 spinlock_destroy(&log->l_icloglock);
1353 spinlock_destroy(&log->l_grant_lock);
1354 xlog_trace_loggrant_dealloc(log);
1355 xfs_buf_free(log->l_xbuf);
1356out_free_log:
1357 kmem_free(log);
1358 return NULL;
1326} /* xlog_alloc_log */ 1359} /* xlog_alloc_log */
1327 1360
1328 1361
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 82d46ce69d5f..70e3ba32e6be 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1419,7 +1419,13 @@ xlog_recover_add_to_trans(
1419 return 0; 1419 return 0;
1420 item = trans->r_itemq; 1420 item = trans->r_itemq;
1421 if (item == NULL) { 1421 if (item == NULL) {
1422 ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC); 1422 /* we need to catch log corruptions here */
1423 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
1424 xlog_warn("XFS: xlog_recover_add_to_trans: "
1425 "bad header magic number");
1426 ASSERT(0);
1427 return XFS_ERROR(EIO);
1428 }
1423 if (len == sizeof(xfs_trans_header_t)) 1429 if (len == sizeof(xfs_trans_header_t))
1424 xlog_recover_add_item(&trans->r_itemq); 1430 xlog_recover_add_item(&trans->r_itemq);
1425 memcpy(&trans->r_theader, dp, len); /* d, s, l */ 1431 memcpy(&trans->r_theader, dp, len); /* d, s, l */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index a4503f5e9497..15f5dd22fbb2 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1245,6 +1245,9 @@ xfs_unmountfs(
1245 1245
1246 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING); 1246 XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
1247 1247
1248 if (mp->m_quotainfo)
1249 XFS_QM_DONE(mp);
1250
1248 /* 1251 /*
1249 * Flush out the log synchronously so that we know for sure 1252 * Flush out the log synchronously so that we know for sure
1250 * that nothing is pinned. This is important because bflush() 1253 * that nothing is pinned. This is important because bflush()
@@ -1297,8 +1300,6 @@ xfs_unmountfs(
1297 xfs_errortag_clearall(mp, 0); 1300 xfs_errortag_clearall(mp, 0);
1298#endif 1301#endif
1299 xfs_free_perag(mp); 1302 xfs_free_perag(mp);
1300 if (mp->m_quotainfo)
1301 XFS_QM_DONE(mp);
1302} 1303}
1303 1304
1304STATIC void 1305STATIC void
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index d700dacdb10e..c903130be7fd 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -212,7 +212,7 @@ xfs_rename(
212 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && 212 if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
213 (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { 213 (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
214 error = XFS_ERROR(EXDEV); 214 error = XFS_ERROR(EXDEV);
215 xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); 215 xfs_rename_unlock4(inodes, XFS_ILOCK_EXCL);
216 xfs_trans_cancel(tp, cancel_flags); 216 xfs_trans_cancel(tp, cancel_flags);
217 goto std_return; 217 goto std_return;
218 } 218 }