aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-05-11 08:44:27 -0400
committerIngo Molnar <mingo@elte.hu>2009-05-11 08:44:31 -0400
commit41fb454ebe6024f5c1e3b3cbc0abc0da762e7b51 (patch)
tree51c50bcb67a5039448ddfa1869d7948cab1217e9 /fs
parent19c1a6f5764d787113fa323ffb18be7991208f82 (diff)
parent091bf7624d1c90cec9e578a18529f615213ff847 (diff)
Merge commit 'v2.6.30-rc5' into core/iommu
Merge reason: core/iommu was on an .30-rc1 base, update it to .30-rc5 to refresh. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/file.c2
-rw-r--r--fs/afs/netdevices.c3
-rw-r--r--fs/autofs/dirhash.c34
-rw-r--r--fs/autofs4/dev-ioctl.c12
-rw-r--r--fs/autofs4/expire.c4
-rw-r--r--fs/befs/super.c1
-rw-r--r--fs/binfmt_elf_fdpic.c4
-rw-r--r--fs/bio.c125
-rw-r--r--fs/btrfs/Makefile19
-rw-r--r--fs/btrfs/acl.c18
-rw-r--r--fs/btrfs/async-thread.c60
-rw-r--r--fs/btrfs/async-thread.h2
-rw-r--r--fs/btrfs/ctree.c17
-rw-r--r--fs/btrfs/ctree.h6
-rw-r--r--fs/btrfs/disk-io.c102
-rw-r--r--fs/btrfs/extent-tree.c49
-rw-r--r--fs/btrfs/extent_io.c167
-rw-r--r--fs/btrfs/extent_map.c17
-rw-r--r--fs/btrfs/file.c95
-rw-r--r--fs/btrfs/free-space-cache.c15
-rw-r--r--fs/btrfs/inode-map.c2
-rw-r--r--fs/btrfs/inode.c183
-rw-r--r--fs/btrfs/ioctl.c58
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/super.c40
-rw-r--r--fs/btrfs/transaction.c6
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c159
-rw-r--r--fs/btrfs/volumes.h16
-rw-r--r--fs/buffer.c79
-rw-r--r--fs/cifs/CHANGES16
-rw-r--r--fs/cifs/README10
-rw-r--r--fs/cifs/cifs_dfs_ref.c32
-rw-r--r--fs/cifs/cifs_spnego.c2
-rw-r--r--fs/cifs/cifs_unicode.c198
-rw-r--r--fs/cifs/cifs_unicode.h23
-rw-r--r--fs/cifs/cifsfs.c48
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h19
-rw-r--r--fs/cifs/cifspdu.h8
-rw-r--r--fs/cifs/cifsproto.h5
-rw-r--r--fs/cifs/cifssmb.c223
-rw-r--r--fs/cifs/connect.c1357
-rw-r--r--fs/cifs/dir.c160
-rw-r--r--fs/cifs/dns_resolve.c2
-rw-r--r--fs/cifs/file.c141
-rw-r--r--fs/cifs/inode.c98
-rw-r--r--fs/cifs/link.c114
-rw-r--r--fs/cifs/misc.c71
-rw-r--r--fs/cifs/netmisc.c2
-rw-r--r--fs/cifs/nterr.h9
-rw-r--r--fs/cifs/ntlmssp.h68
-rw-r--r--fs/cifs/readdir.c78
-rw-r--r--fs/cifs/sess.c373
-rw-r--r--fs/cifs/smberr.h1
-rw-r--r--fs/compat.c48
-rw-r--r--fs/compat_ioctl.c7
-rw-r--r--fs/configfs/symlink.c2
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/ecryptfs/crypto.c21
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h1
-rw-r--r--fs/ecryptfs/inode.c37
-rw-r--r--fs/ecryptfs/main.c14
-rw-r--r--fs/ecryptfs/messaging.c82
-rw-r--r--fs/ecryptfs/miscdev.c43
-rw-r--r--fs/ecryptfs/mmap.c11
-rw-r--r--fs/ecryptfs/read_write.c32
-rw-r--r--fs/ecryptfs/super.c7
-rw-r--r--fs/exec.c32
-rw-r--r--fs/ext2/inode.c44
-rw-r--r--fs/ext2/super.c4
-rw-r--r--fs/ext3/inode.c23
-rw-r--r--fs/ext4/extents.c20
-rw-r--r--fs/ext4/ialloc.c6
-rw-r--r--fs/ext4/inode.c32
-rw-r--r--fs/ext4/super.c9
-rw-r--r--fs/fat/Kconfig3
-rw-r--r--fs/filesystems.c2
-rw-r--r--fs/fuse/file.c8
-rw-r--r--fs/gfs2/glock.c10
-rw-r--r--fs/gfs2/glops.c6
-rw-r--r--fs/gfs2/inode.c8
-rw-r--r--fs/gfs2/inode.h14
-rw-r--r--fs/gfs2/ops_file.c12
-rw-r--r--fs/gfs2/ops_fstype.c5
-rw-r--r--fs/gfs2/ops_inode.c1
-rw-r--r--fs/gfs2/quota.c4
-rw-r--r--fs/gfs2/rgrp.c13
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfs/mdb.c1
-rw-r--r--fs/hugetlbfs/inode.c3
-rw-r--r--fs/inode.c36
-rw-r--r--fs/ioctl.c75
-rw-r--r--fs/jbd/commit.c2
-rw-r--r--fs/jbd/revoke.c44
-rw-r--r--fs/jbd2/commit.c3
-rw-r--r--fs/jbd2/revoke.c21
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c7
-rw-r--r--fs/ncpfs/ioctl.c21
-rw-r--r--fs/nfs/file.c6
-rw-r--r--fs/nfs/nfs3xdr.c3
-rw-r--r--fs/nfsd/nfs4recover.c46
-rw-r--r--fs/nfsd/vfs.c34
-rw-r--r--fs/nilfs2/bmap.c5
-rw-r--r--fs/nilfs2/nilfs.h5
-rw-r--r--fs/nilfs2/recovery.c20
-rw-r--r--fs/nilfs2/sufile.c290
-rw-r--r--fs/nilfs2/sufile.h79
-rw-r--r--fs/nilfs2/super.c7
-rw-r--r--fs/nilfs2/the_nilfs.c4
-rw-r--r--fs/notify/inotify/inotify_user.c2
-rw-r--r--fs/ocfs2/dcache.c15
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/export.c9
-rw-r--r--fs/ocfs2/file.c94
-rw-r--r--fs/ocfs2/journal.h5
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/ocfs2/suballoc.c21
-rw-r--r--fs/pipe.c42
-rw-r--r--fs/proc/array.c13
-rw-r--r--fs/proc/base.c9
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/proc/stat.c5
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/proc/task_nommu.c2
-rw-r--r--fs/quota/Makefile9
-rw-r--r--fs/romfs/internal.h4
-rw-r--r--fs/romfs/storage.c68
-rw-r--r--fs/romfs/super.c4
-rw-r--r--fs/splice.c370
-rw-r--r--fs/stat.c137
-rw-r--r--fs/sysfs/bin.c13
-rw-r--r--fs/sysfs/file.c16
-rw-r--r--fs/xattr.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c38
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c23
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c78
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h9
-rw-r--r--fs/xfs/xfs_bmap.c192
-rw-r--r--fs/xfs/xfs_iget.c23
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_iomap.c61
-rw-r--r--fs/xfs/xfs_iomap.h3
-rw-r--r--fs/xfs/xfs_log.c78
-rw-r--r--fs/xfs/xfs_mount.c3
-rw-r--r--fs/xfs/xfs_mount.h2
-rw-r--r--fs/xfs/xfs_vnodeops.c7
154 files changed, 3338 insertions, 3738 deletions
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 7a1d942ef68d..0149dab365e7 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -102,6 +102,7 @@ int afs_release(struct inode *inode, struct file *file)
102 return 0; 102 return 0;
103} 103}
104 104
105#ifdef CONFIG_AFS_FSCACHE
105/* 106/*
106 * deal with notification that a page was read from the cache 107 * deal with notification that a page was read from the cache
107 */ 108 */
@@ -117,6 +118,7 @@ static void afs_file_readpage_read_complete(struct page *page,
117 SetPageUptodate(page); 118 SetPageUptodate(page);
118 unlock_page(page); 119 unlock_page(page);
119} 120}
121#endif
120 122
121/* 123/*
122 * AFS read page from file, directory or symlink 124 * AFS read page from file, directory or symlink
diff --git a/fs/afs/netdevices.c b/fs/afs/netdevices.c
index 49f189423063..7ad36506c256 100644
--- a/fs/afs/netdevices.c
+++ b/fs/afs/netdevices.c
@@ -20,8 +20,7 @@ int afs_get_MAC_address(u8 *mac, size_t maclen)
20 struct net_device *dev; 20 struct net_device *dev;
21 int ret = -ENODEV; 21 int ret = -ENODEV;
22 22
23 if (maclen != ETH_ALEN) 23 BUG_ON(maclen != ETH_ALEN);
24 BUG();
25 24
26 rtnl_lock(); 25 rtnl_lock();
27 dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER); 26 dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER);
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index bf8c8af98004..4eb4d8dfb2f1 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -39,10 +39,12 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
39{ 39{
40 struct autofs_dirhash *dh = &sbi->dirhash; 40 struct autofs_dirhash *dh = &sbi->dirhash;
41 struct autofs_dir_ent *ent; 41 struct autofs_dir_ent *ent;
42 struct dentry *dentry;
43 unsigned long timeout = sbi->exp_timeout; 42 unsigned long timeout = sbi->exp_timeout;
44 43
45 while (1) { 44 while (1) {
45 struct path path;
46 int umount_ok;
47
46 if ( list_empty(&dh->expiry_head) || sbi->catatonic ) 48 if ( list_empty(&dh->expiry_head) || sbi->catatonic )
47 return NULL; /* No entries */ 49 return NULL; /* No entries */
48 /* We keep the list sorted by last_usage and want old stuff */ 50 /* We keep the list sorted by last_usage and want old stuff */
@@ -57,17 +59,17 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
57 return ent; /* Symlinks are always expirable */ 59 return ent; /* Symlinks are always expirable */
58 60
59 /* Get the dentry for the autofs subdirectory */ 61 /* Get the dentry for the autofs subdirectory */
60 dentry = ent->dentry; 62 path.dentry = ent->dentry;
61 63
62 if ( !dentry ) { 64 if (!path.dentry) {
63 /* Should only happen in catatonic mode */ 65 /* Should only happen in catatonic mode */
64 printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name); 66 printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name);
65 autofs_delete_usage(ent); 67 autofs_delete_usage(ent);
66 continue; 68 continue;
67 } 69 }
68 70
69 if ( !dentry->d_inode ) { 71 if (!path.dentry->d_inode) {
70 dput(dentry); 72 dput(path.dentry);
71 printk("autofs: negative dentry on expiry queue: %s\n", 73 printk("autofs: negative dentry on expiry queue: %s\n",
72 ent->name); 74 ent->name);
73 autofs_delete_usage(ent); 75 autofs_delete_usage(ent);
@@ -76,29 +78,29 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
76 78
77 /* Make sure entry is mounted and unused; note that dentry will 79 /* Make sure entry is mounted and unused; note that dentry will
78 point to the mounted-on-top root. */ 80 point to the mounted-on-top root. */
79 if (!S_ISDIR(dentry->d_inode->i_mode)||!d_mountpoint(dentry)) { 81 if (!S_ISDIR(path.dentry->d_inode->i_mode) ||
82 !d_mountpoint(path.dentry)) {
80 DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); 83 DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
81 continue; 84 continue;
82 } 85 }
83 mntget(mnt); 86 path.mnt = mnt;
84 dget(dentry); 87 path_get(&path);
85 if (!follow_down(&mnt, &dentry)) { 88 if (!follow_down(&path.mnt, &path.dentry)) {
86 dput(dentry); 89 path_put(&path);
87 mntput(mnt);
88 DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); 90 DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
89 continue; 91 continue;
90 } 92 }
91 while (d_mountpoint(dentry) && follow_down(&mnt, &dentry)) 93 while (d_mountpoint(path.dentry) &&
94 follow_down(&path.mnt, &path.dentry))
92 ; 95 ;
93 dput(dentry); 96 umount_ok = may_umount(path.mnt);
97 path_put(&path);
94 98
95 if ( may_umount(mnt) ) { 99 if (umount_ok) {
96 mntput(mnt);
97 DPRINTK(("autofs: signaling expire on %s\n", ent->name)); 100 DPRINTK(("autofs: signaling expire on %s\n", ent->name));
98 return ent; /* Expirable! */ 101 return ent; /* Expirable! */
99 } 102 }
100 DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name)); 103 DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name));
101 mntput(mnt);
102 } 104 }
103 return NULL; /* No expirable entries */ 105 return NULL; /* No expirable entries */
104} 106}
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 9e5ae8a4f5c8..84168c0dcc2d 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -54,11 +54,10 @@ static int check_name(const char *name)
54 * Check a string doesn't overrun the chunk of 54 * Check a string doesn't overrun the chunk of
55 * memory we copied from user land. 55 * memory we copied from user land.
56 */ 56 */
57static int invalid_str(char *str, void *end) 57static int invalid_str(char *str, size_t size)
58{ 58{
59 while ((void *) str <= end) 59 if (memchr(str, 0, size))
60 if (!*str++) 60 return 0;
61 return 0;
62 return -EINVAL; 61 return -EINVAL;
63} 62}
64 63
@@ -138,8 +137,7 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
138 } 137 }
139 138
140 if (param->size > sizeof(*param)) { 139 if (param->size > sizeof(*param)) {
141 err = invalid_str(param->path, 140 err = invalid_str(param->path, param->size - sizeof(*param));
142 (void *) ((size_t) param + param->size));
143 if (err) { 141 if (err) {
144 AUTOFS_WARN( 142 AUTOFS_WARN(
145 "path string terminator missing for cmd(0x%08x)", 143 "path string terminator missing for cmd(0x%08x)",
@@ -488,7 +486,7 @@ static int autofs_dev_ioctl_requester(struct file *fp,
488 } 486 }
489 487
490 path = param->path; 488 path = param->path;
491 devid = sbi->sb->s_dev; 489 devid = new_encode_dev(sbi->sb->s_dev);
492 490
493 param->requester.uid = param->requester.gid = -1; 491 param->requester.uid = param->requester.gid = -1;
494 492
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 75f7ddacf7d6..3077d8f16523 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -70,8 +70,10 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
70 * Otherwise it's an offset mount and we need to check 70 * Otherwise it's an offset mount and we need to check
71 * if we can umount its mount, if there is one. 71 * if we can umount its mount, if there is one.
72 */ 72 */
73 if (!d_mountpoint(dentry)) 73 if (!d_mountpoint(dentry)) {
74 status = 0;
74 goto done; 75 goto done;
76 }
75 } 77 }
76 78
77 /* Update the expiry counter if fs is busy */ 79 /* Update the expiry counter if fs is busy */
diff --git a/fs/befs/super.c b/fs/befs/super.c
index 41f2b4d0093e..ca40f828f64d 100644
--- a/fs/befs/super.c
+++ b/fs/befs/super.c
@@ -8,6 +8,7 @@
8 */ 8 */
9 9
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <asm/page.h> /* for PAGE_SIZE */
11 12
12#include "befs.h" 13#include "befs.h"
13#include "super.h" 14#include "super.h"
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 70cfc4b84ae0..fdb66faa24f1 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1388,7 +1388,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus,
1388 prstatus->pr_sigpend = p->pending.signal.sig[0]; 1388 prstatus->pr_sigpend = p->pending.signal.sig[0];
1389 prstatus->pr_sighold = p->blocked.sig[0]; 1389 prstatus->pr_sighold = p->blocked.sig[0];
1390 prstatus->pr_pid = task_pid_vnr(p); 1390 prstatus->pr_pid = task_pid_vnr(p);
1391 prstatus->pr_ppid = task_pid_vnr(p->parent); 1391 prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1392 prstatus->pr_pgrp = task_pgrp_vnr(p); 1392 prstatus->pr_pgrp = task_pgrp_vnr(p);
1393 prstatus->pr_sid = task_session_vnr(p); 1393 prstatus->pr_sid = task_session_vnr(p);
1394 if (thread_group_leader(p)) { 1394 if (thread_group_leader(p)) {
@@ -1433,7 +1433,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1433 psinfo->pr_psargs[len] = 0; 1433 psinfo->pr_psargs[len] = 0;
1434 1434
1435 psinfo->pr_pid = task_pid_vnr(p); 1435 psinfo->pr_pid = task_pid_vnr(p);
1436 psinfo->pr_ppid = task_pid_vnr(p->parent); 1436 psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1437 psinfo->pr_pgrp = task_pgrp_vnr(p); 1437 psinfo->pr_pgrp = task_pgrp_vnr(p);
1438 psinfo->pr_sid = task_session_vnr(p); 1438 psinfo->pr_sid = task_session_vnr(p);
1439 1439
diff --git a/fs/bio.c b/fs/bio.c
index e0c9e545bbfa..98711647ece4 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -175,14 +175,6 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
175 struct bio_vec *bvl; 175 struct bio_vec *bvl;
176 176
177 /* 177 /*
178 * If 'bs' is given, lookup the pool and do the mempool alloc.
179 * If not, this is a bio_kmalloc() allocation and just do a
180 * kzalloc() for the exact number of vecs right away.
181 */
182 if (!bs)
183 bvl = kmalloc(nr * sizeof(struct bio_vec), gfp_mask);
184
185 /*
186 * see comment near bvec_array define! 178 * see comment near bvec_array define!
187 */ 179 */
188 switch (nr) { 180 switch (nr) {
@@ -260,21 +252,6 @@ void bio_free(struct bio *bio, struct bio_set *bs)
260 mempool_free(p, bs->bio_pool); 252 mempool_free(p, bs->bio_pool);
261} 253}
262 254
263/*
264 * default destructor for a bio allocated with bio_alloc_bioset()
265 */
266static void bio_fs_destructor(struct bio *bio)
267{
268 bio_free(bio, fs_bio_set);
269}
270
271static void bio_kmalloc_destructor(struct bio *bio)
272{
273 if (bio_has_allocated_vec(bio))
274 kfree(bio->bi_io_vec);
275 kfree(bio);
276}
277
278void bio_init(struct bio *bio) 255void bio_init(struct bio *bio)
279{ 256{
280 memset(bio, 0, sizeof(*bio)); 257 memset(bio, 0, sizeof(*bio));
@@ -301,21 +278,15 @@ void bio_init(struct bio *bio)
301 **/ 278 **/
302struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 279struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
303{ 280{
281 unsigned long idx = BIO_POOL_NONE;
304 struct bio_vec *bvl = NULL; 282 struct bio_vec *bvl = NULL;
305 struct bio *bio = NULL; 283 struct bio *bio;
306 unsigned long idx = 0; 284 void *p;
307 void *p = NULL; 285
308 286 p = mempool_alloc(bs->bio_pool, gfp_mask);
309 if (bs) { 287 if (unlikely(!p))
310 p = mempool_alloc(bs->bio_pool, gfp_mask); 288 return NULL;
311 if (!p) 289 bio = p + bs->front_pad;
312 goto err;
313 bio = p + bs->front_pad;
314 } else {
315 bio = kmalloc(sizeof(*bio), gfp_mask);
316 if (!bio)
317 goto err;
318 }
319 290
320 bio_init(bio); 291 bio_init(bio);
321 292
@@ -332,22 +303,33 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
332 303
333 nr_iovecs = bvec_nr_vecs(idx); 304 nr_iovecs = bvec_nr_vecs(idx);
334 } 305 }
306out_set:
335 bio->bi_flags |= idx << BIO_POOL_OFFSET; 307 bio->bi_flags |= idx << BIO_POOL_OFFSET;
336 bio->bi_max_vecs = nr_iovecs; 308 bio->bi_max_vecs = nr_iovecs;
337out_set:
338 bio->bi_io_vec = bvl; 309 bio->bi_io_vec = bvl;
339
340 return bio; 310 return bio;
341 311
342err_free: 312err_free:
343 if (bs) 313 mempool_free(p, bs->bio_pool);
344 mempool_free(p, bs->bio_pool);
345 else
346 kfree(bio);
347err:
348 return NULL; 314 return NULL;
349} 315}
350 316
317static void bio_fs_destructor(struct bio *bio)
318{
319 bio_free(bio, fs_bio_set);
320}
321
322/**
323 * bio_alloc - allocate a new bio, memory pool backed
324 * @gfp_mask: allocation mask to use
325 * @nr_iovecs: number of iovecs
326 *
327 * Allocate a new bio with @nr_iovecs bvecs. If @gfp_mask
328 * contains __GFP_WAIT, the allocation is guaranteed to succeed.
329 *
330 * RETURNS:
331 * Pointer to new bio on success, NULL on failure.
332 */
351struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) 333struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
352{ 334{
353 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); 335 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
@@ -358,19 +340,45 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
358 return bio; 340 return bio;
359} 341}
360 342
361/* 343static void bio_kmalloc_destructor(struct bio *bio)
362 * Like bio_alloc(), but doesn't use a mempool backing. This means that 344{
363 * it CAN fail, but while bio_alloc() can only be used for allocations 345 if (bio_integrity(bio))
364 * that have a short (finite) life span, bio_kmalloc() should be used 346 bio_integrity_free(bio);
365 * for more permanent bio allocations (like allocating some bio's for 347 kfree(bio);
366 * initalization or setup purposes). 348}
367 */ 349
350/**
351 * bio_alloc - allocate a bio for I/O
352 * @gfp_mask: the GFP_ mask given to the slab allocator
353 * @nr_iovecs: number of iovecs to pre-allocate
354 *
355 * Description:
356 * bio_alloc will allocate a bio and associated bio_vec array that can hold
357 * at least @nr_iovecs entries. Allocations will be done from the
358 * fs_bio_set. Also see @bio_alloc_bioset.
359 *
360 * If %__GFP_WAIT is set, then bio_alloc will always be able to allocate
361 * a bio. This is due to the mempool guarantees. To make this work, callers
362 * must never allocate more than 1 bio at the time from this pool. Callers
363 * that need to allocate more than 1 bio must always submit the previously
364 * allocate bio for IO before attempting to allocate a new one. Failure to
365 * do so can cause livelocks under memory pressure.
366 *
367 **/
368struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) 368struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
369{ 369{
370 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); 370 struct bio *bio;
371 371
372 if (bio) 372 bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
373 bio->bi_destructor = bio_kmalloc_destructor; 373 gfp_mask);
374 if (unlikely(!bio))
375 return NULL;
376
377 bio_init(bio);
378 bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET;
379 bio->bi_max_vecs = nr_iovecs;
380 bio->bi_io_vec = bio->bi_inline_vecs;
381 bio->bi_destructor = bio_kmalloc_destructor;
374 382
375 return bio; 383 return bio;
376} 384}
@@ -809,12 +817,15 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
809 len += iov[i].iov_len; 817 len += iov[i].iov_len;
810 } 818 }
811 819
820 if (offset)
821 nr_pages++;
822
812 bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); 823 bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
813 if (!bmd) 824 if (!bmd)
814 return ERR_PTR(-ENOMEM); 825 return ERR_PTR(-ENOMEM);
815 826
816 ret = -ENOMEM; 827 ret = -ENOMEM;
817 bio = bio_alloc(gfp_mask, nr_pages); 828 bio = bio_kmalloc(gfp_mask, nr_pages);
818 if (!bio) 829 if (!bio)
819 goto out_bmd; 830 goto out_bmd;
820 831
@@ -938,7 +949,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
938 if (!nr_pages) 949 if (!nr_pages)
939 return ERR_PTR(-EINVAL); 950 return ERR_PTR(-EINVAL);
940 951
941 bio = bio_alloc(gfp_mask, nr_pages); 952 bio = bio_kmalloc(gfp_mask, nr_pages);
942 if (!bio) 953 if (!bio)
943 return ERR_PTR(-ENOMEM); 954 return ERR_PTR(-ENOMEM);
944 955
@@ -1122,7 +1133,7 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data,
1122 int offset, i; 1133 int offset, i;
1123 struct bio *bio; 1134 struct bio *bio;
1124 1135
1125 bio = bio_alloc(gfp_mask, nr_pages); 1136 bio = bio_kmalloc(gfp_mask, nr_pages);
1126 if (!bio) 1137 if (!bio)
1127 return ERR_PTR(-ENOMEM); 1138 return ERR_PTR(-ENOMEM);
1128 1139
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9adf5e4f7e96..94212844a9bc 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -1,25 +1,10 @@
1ifneq ($(KERNELRELEASE),)
2# kbuild part of makefile
3 1
4obj-$(CONFIG_BTRFS_FS) := btrfs.o 2obj-$(CONFIG_BTRFS_FS) := btrfs.o
5btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ 3
4btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 file-item.o inode-item.o inode-map.o disk-io.o \ 5 file-item.o inode-item.o inode-map.o disk-io.o \
7 transaction.o inode.o file.o tree-defrag.o \ 6 transaction.o inode.o file.o tree-defrag.o \
8 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
9 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
10 ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ 9 ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \
11 compression.o delayed-ref.o 10 compression.o delayed-ref.o
12else
13
14# Normal Makefile
15
16KERNELDIR := /lib/modules/`uname -r`/build
17all:
18 $(MAKE) -C $(KERNELDIR) M=`pwd` CONFIG_BTRFS_FS=m modules
19
20modules_install:
21 $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install
22clean:
23 $(MAKE) -C $(KERNELDIR) M=`pwd` clean
24
25endif
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 7fdd184a528d..cbba000dccbe 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -60,15 +60,20 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
60 return ERR_PTR(-EINVAL); 60 return ERR_PTR(-EINVAL);
61 } 61 }
62 62
63 /* Handle the cached NULL acl case without locking */
64 acl = ACCESS_ONCE(*p_acl);
65 if (!acl)
66 return acl;
67
63 spin_lock(&inode->i_lock); 68 spin_lock(&inode->i_lock);
64 if (*p_acl != BTRFS_ACL_NOT_CACHED) 69 acl = *p_acl;
65 acl = posix_acl_dup(*p_acl); 70 if (acl != BTRFS_ACL_NOT_CACHED)
71 acl = posix_acl_dup(acl);
66 spin_unlock(&inode->i_lock); 72 spin_unlock(&inode->i_lock);
67 73
68 if (acl) 74 if (acl != BTRFS_ACL_NOT_CACHED)
69 return acl; 75 return acl;
70 76
71
72 size = __btrfs_getxattr(inode, name, "", 0); 77 size = __btrfs_getxattr(inode, name, "", 0);
73 if (size > 0) { 78 if (size > 0) {
74 value = kzalloc(size, GFP_NOFS); 79 value = kzalloc(size, GFP_NOFS);
@@ -80,9 +85,12 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
80 btrfs_update_cached_acl(inode, p_acl, acl); 85 btrfs_update_cached_acl(inode, p_acl, acl);
81 } 86 }
82 kfree(value); 87 kfree(value);
83 } else if (size == -ENOENT) { 88 } else if (size == -ENOENT || size == -ENODATA || size == 0) {
89 /* FIXME, who returns -ENOENT? I think nobody */
84 acl = NULL; 90 acl = NULL;
85 btrfs_update_cached_acl(inode, p_acl, acl); 91 btrfs_update_cached_acl(inode, p_acl, acl);
92 } else {
93 acl = ERR_PTR(-EIO);
86 } 94 }
87 95
88 return acl; 96 return acl;
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 51bfdfc8fcda..502c3d61de62 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -25,6 +25,7 @@
25#define WORK_QUEUED_BIT 0 25#define WORK_QUEUED_BIT 0
26#define WORK_DONE_BIT 1 26#define WORK_DONE_BIT 1
27#define WORK_ORDER_DONE_BIT 2 27#define WORK_ORDER_DONE_BIT 2
28#define WORK_HIGH_PRIO_BIT 3
28 29
29/* 30/*
30 * container for the kthread task pointer and the list of pending work 31 * container for the kthread task pointer and the list of pending work
@@ -36,6 +37,7 @@ struct btrfs_worker_thread {
36 37
37 /* list of struct btrfs_work that are waiting for service */ 38 /* list of struct btrfs_work that are waiting for service */
38 struct list_head pending; 39 struct list_head pending;
40 struct list_head prio_pending;
39 41
40 /* list of worker threads from struct btrfs_workers */ 42 /* list of worker threads from struct btrfs_workers */
41 struct list_head worker_list; 43 struct list_head worker_list;
@@ -103,10 +105,16 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
103 105
104 spin_lock_irqsave(&workers->lock, flags); 106 spin_lock_irqsave(&workers->lock, flags);
105 107
106 while (!list_empty(&workers->order_list)) { 108 while (1) {
107 work = list_entry(workers->order_list.next, 109 if (!list_empty(&workers->prio_order_list)) {
108 struct btrfs_work, order_list); 110 work = list_entry(workers->prio_order_list.next,
109 111 struct btrfs_work, order_list);
112 } else if (!list_empty(&workers->order_list)) {
113 work = list_entry(workers->order_list.next,
114 struct btrfs_work, order_list);
115 } else {
116 break;
117 }
110 if (!test_bit(WORK_DONE_BIT, &work->flags)) 118 if (!test_bit(WORK_DONE_BIT, &work->flags))
111 break; 119 break;
112 120
@@ -143,8 +151,14 @@ static int worker_loop(void *arg)
143 do { 151 do {
144 spin_lock_irq(&worker->lock); 152 spin_lock_irq(&worker->lock);
145again_locked: 153again_locked:
146 while (!list_empty(&worker->pending)) { 154 while (1) {
147 cur = worker->pending.next; 155 if (!list_empty(&worker->prio_pending))
156 cur = worker->prio_pending.next;
157 else if (!list_empty(&worker->pending))
158 cur = worker->pending.next;
159 else
160 break;
161
148 work = list_entry(cur, struct btrfs_work, list); 162 work = list_entry(cur, struct btrfs_work, list);
149 list_del(&work->list); 163 list_del(&work->list);
150 clear_bit(WORK_QUEUED_BIT, &work->flags); 164 clear_bit(WORK_QUEUED_BIT, &work->flags);
@@ -163,7 +177,6 @@ again_locked:
163 177
164 spin_lock_irq(&worker->lock); 178 spin_lock_irq(&worker->lock);
165 check_idle_worker(worker); 179 check_idle_worker(worker);
166
167 } 180 }
168 if (freezing(current)) { 181 if (freezing(current)) {
169 worker->working = 0; 182 worker->working = 0;
@@ -178,7 +191,8 @@ again_locked:
178 * jump_in? 191 * jump_in?
179 */ 192 */
180 smp_mb(); 193 smp_mb();
181 if (!list_empty(&worker->pending)) 194 if (!list_empty(&worker->pending) ||
195 !list_empty(&worker->prio_pending))
182 continue; 196 continue;
183 197
184 /* 198 /*
@@ -191,7 +205,8 @@ again_locked:
191 */ 205 */
192 schedule_timeout(1); 206 schedule_timeout(1);
193 smp_mb(); 207 smp_mb();
194 if (!list_empty(&worker->pending)) 208 if (!list_empty(&worker->pending) ||
209 !list_empty(&worker->prio_pending))
195 continue; 210 continue;
196 211
197 if (kthread_should_stop()) 212 if (kthread_should_stop())
@@ -200,7 +215,8 @@ again_locked:
200 /* still no more work?, sleep for real */ 215 /* still no more work?, sleep for real */
201 spin_lock_irq(&worker->lock); 216 spin_lock_irq(&worker->lock);
202 set_current_state(TASK_INTERRUPTIBLE); 217 set_current_state(TASK_INTERRUPTIBLE);
203 if (!list_empty(&worker->pending)) 218 if (!list_empty(&worker->pending) ||
219 !list_empty(&worker->prio_pending))
204 goto again_locked; 220 goto again_locked;
205 221
206 /* 222 /*
@@ -248,6 +264,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
248 INIT_LIST_HEAD(&workers->worker_list); 264 INIT_LIST_HEAD(&workers->worker_list);
249 INIT_LIST_HEAD(&workers->idle_list); 265 INIT_LIST_HEAD(&workers->idle_list);
250 INIT_LIST_HEAD(&workers->order_list); 266 INIT_LIST_HEAD(&workers->order_list);
267 INIT_LIST_HEAD(&workers->prio_order_list);
251 spin_lock_init(&workers->lock); 268 spin_lock_init(&workers->lock);
252 workers->max_workers = max; 269 workers->max_workers = max;
253 workers->idle_thresh = 32; 270 workers->idle_thresh = 32;
@@ -273,6 +290,7 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
273 } 290 }
274 291
275 INIT_LIST_HEAD(&worker->pending); 292 INIT_LIST_HEAD(&worker->pending);
293 INIT_LIST_HEAD(&worker->prio_pending);
276 INIT_LIST_HEAD(&worker->worker_list); 294 INIT_LIST_HEAD(&worker->worker_list);
277 spin_lock_init(&worker->lock); 295 spin_lock_init(&worker->lock);
278 atomic_set(&worker->num_pending, 0); 296 atomic_set(&worker->num_pending, 0);
@@ -396,7 +414,10 @@ int btrfs_requeue_work(struct btrfs_work *work)
396 goto out; 414 goto out;
397 415
398 spin_lock_irqsave(&worker->lock, flags); 416 spin_lock_irqsave(&worker->lock, flags);
399 list_add_tail(&work->list, &worker->pending); 417 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
418 list_add_tail(&work->list, &worker->prio_pending);
419 else
420 list_add_tail(&work->list, &worker->pending);
400 atomic_inc(&worker->num_pending); 421 atomic_inc(&worker->num_pending);
401 422
402 /* by definition we're busy, take ourselves off the idle 423 /* by definition we're busy, take ourselves off the idle
@@ -422,6 +443,11 @@ out:
422 return 0; 443 return 0;
423} 444}
424 445
446void btrfs_set_work_high_prio(struct btrfs_work *work)
447{
448 set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
449}
450
425/* 451/*
426 * places a struct btrfs_work into the pending queue of one of the kthreads 452 * places a struct btrfs_work into the pending queue of one of the kthreads
427 */ 453 */
@@ -438,7 +464,12 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
438 worker = find_worker(workers); 464 worker = find_worker(workers);
439 if (workers->ordered) { 465 if (workers->ordered) {
440 spin_lock_irqsave(&workers->lock, flags); 466 spin_lock_irqsave(&workers->lock, flags);
441 list_add_tail(&work->order_list, &workers->order_list); 467 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
468 list_add_tail(&work->order_list,
469 &workers->prio_order_list);
470 } else {
471 list_add_tail(&work->order_list, &workers->order_list);
472 }
442 spin_unlock_irqrestore(&workers->lock, flags); 473 spin_unlock_irqrestore(&workers->lock, flags);
443 } else { 474 } else {
444 INIT_LIST_HEAD(&work->order_list); 475 INIT_LIST_HEAD(&work->order_list);
@@ -446,7 +477,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
446 477
447 spin_lock_irqsave(&worker->lock, flags); 478 spin_lock_irqsave(&worker->lock, flags);
448 479
449 list_add_tail(&work->list, &worker->pending); 480 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
481 list_add_tail(&work->list, &worker->prio_pending);
482 else
483 list_add_tail(&work->list, &worker->pending);
450 atomic_inc(&worker->num_pending); 484 atomic_inc(&worker->num_pending);
451 check_busy_worker(worker); 485 check_busy_worker(worker);
452 486
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 31be4ed8b63e..1b511c109db6 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -85,6 +85,7 @@ struct btrfs_workers {
85 * of work items waiting for completion 85 * of work items waiting for completion
86 */ 86 */
87 struct list_head order_list; 87 struct list_head order_list;
88 struct list_head prio_order_list;
88 89
89 /* lock for finding the next worker thread to queue on */ 90 /* lock for finding the next worker thread to queue on */
90 spinlock_t lock; 91 spinlock_t lock;
@@ -98,4 +99,5 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers);
98int btrfs_stop_workers(struct btrfs_workers *workers); 99int btrfs_stop_workers(struct btrfs_workers *workers);
99void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); 100void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max);
100int btrfs_requeue_work(struct btrfs_work *work); 101int btrfs_requeue_work(struct btrfs_work *work);
102void btrfs_set_work_high_prio(struct btrfs_work *work);
101#endif 103#endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index e5b2533b691a..a99f1c2a710d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1325,12 +1325,12 @@ static noinline int reada_for_balance(struct btrfs_root *root,
1325 int ret = 0; 1325 int ret = 0;
1326 int blocksize; 1326 int blocksize;
1327 1327
1328 parent = path->nodes[level - 1]; 1328 parent = path->nodes[level + 1];
1329 if (!parent) 1329 if (!parent)
1330 return 0; 1330 return 0;
1331 1331
1332 nritems = btrfs_header_nritems(parent); 1332 nritems = btrfs_header_nritems(parent);
1333 slot = path->slots[level]; 1333 slot = path->slots[level + 1];
1334 blocksize = btrfs_level_size(root, level); 1334 blocksize = btrfs_level_size(root, level);
1335 1335
1336 if (slot > 0) { 1336 if (slot > 0) {
@@ -1341,7 +1341,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
1341 block1 = 0; 1341 block1 = 0;
1342 free_extent_buffer(eb); 1342 free_extent_buffer(eb);
1343 } 1343 }
1344 if (slot < nritems) { 1344 if (slot + 1 < nritems) {
1345 block2 = btrfs_node_blockptr(parent, slot + 1); 1345 block2 = btrfs_node_blockptr(parent, slot + 1);
1346 gen = btrfs_node_ptr_generation(parent, slot + 1); 1346 gen = btrfs_node_ptr_generation(parent, slot + 1);
1347 eb = btrfs_find_tree_block(root, block2, blocksize); 1347 eb = btrfs_find_tree_block(root, block2, blocksize);
@@ -1351,7 +1351,11 @@ static noinline int reada_for_balance(struct btrfs_root *root,
1351 } 1351 }
1352 if (block1 || block2) { 1352 if (block1 || block2) {
1353 ret = -EAGAIN; 1353 ret = -EAGAIN;
1354
1355 /* release the whole path */
1354 btrfs_release_path(root, path); 1356 btrfs_release_path(root, path);
1357
1358 /* read the blocks */
1355 if (block1) 1359 if (block1)
1356 readahead_tree_block(root, block1, blocksize, 0); 1360 readahead_tree_block(root, block1, blocksize, 0);
1357 if (block2) 1361 if (block2)
@@ -1361,7 +1365,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
1361 eb = read_tree_block(root, block1, blocksize, 0); 1365 eb = read_tree_block(root, block1, blocksize, 0);
1362 free_extent_buffer(eb); 1366 free_extent_buffer(eb);
1363 } 1367 }
1364 if (block1) { 1368 if (block2) {
1365 eb = read_tree_block(root, block2, blocksize, 0); 1369 eb = read_tree_block(root, block2, blocksize, 0);
1366 free_extent_buffer(eb); 1370 free_extent_buffer(eb);
1367 } 1371 }
@@ -1481,12 +1485,15 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1481 * of the btree by dropping locks before 1485 * of the btree by dropping locks before
1482 * we read. 1486 * we read.
1483 */ 1487 */
1484 btrfs_release_path(NULL, p); 1488 btrfs_unlock_up_safe(p, level + 1);
1489 btrfs_set_path_blocking(p);
1490
1485 if (tmp) 1491 if (tmp)
1486 free_extent_buffer(tmp); 1492 free_extent_buffer(tmp);
1487 if (p->reada) 1493 if (p->reada)
1488 reada_for_search(root, p, level, slot, key->objectid); 1494 reada_for_search(root, p, level, slot, key->objectid);
1489 1495
1496 btrfs_release_path(NULL, p);
1490 tmp = read_tree_block(root, blocknr, blocksize, gen); 1497 tmp = read_tree_block(root, blocknr, blocksize, gen);
1491 if (tmp) 1498 if (tmp)
1492 free_extent_buffer(tmp); 1499 free_extent_buffer(tmp);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ad96495dedc5..4414a5d9983a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -881,6 +881,9 @@ struct btrfs_fs_info {
881 u64 metadata_alloc_profile; 881 u64 metadata_alloc_profile;
882 u64 system_alloc_profile; 882 u64 system_alloc_profile;
883 883
884 unsigned data_chunk_allocations;
885 unsigned metadata_ratio;
886
884 void *bdev_holder; 887 void *bdev_holder;
885}; 888};
886 889
@@ -2174,7 +2177,8 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
2174extern struct file_operations btrfs_file_operations; 2177extern struct file_operations btrfs_file_operations;
2175int btrfs_drop_extents(struct btrfs_trans_handle *trans, 2178int btrfs_drop_extents(struct btrfs_trans_handle *trans,
2176 struct btrfs_root *root, struct inode *inode, 2179 struct btrfs_root *root, struct inode *inode,
2177 u64 start, u64 end, u64 inline_limit, u64 *hint_block); 2180 u64 start, u64 end, u64 locked_end,
2181 u64 inline_limit, u64 *hint_block);
2178int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2182int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2179 struct btrfs_root *root, 2183 struct btrfs_root *root,
2180 struct inode *inode, u64 start, u64 end); 2184 struct inode *inode, u64 start, u64 end);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 92caa8035f36..0ff16d3331da 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -232,10 +232,14 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
232 memcpy(&found, result, csum_size); 232 memcpy(&found, result, csum_size);
233 233
234 read_extent_buffer(buf, &val, 0, csum_size); 234 read_extent_buffer(buf, &val, 0, csum_size);
235 printk(KERN_INFO "btrfs: %s checksum verify failed " 235 if (printk_ratelimit()) {
236 "on %llu wanted %X found %X level %d\n", 236 printk(KERN_INFO "btrfs: %s checksum verify "
237 root->fs_info->sb->s_id, 237 "failed on %llu wanted %X found %X "
238 buf->start, val, found, btrfs_header_level(buf)); 238 "level %d\n",
239 root->fs_info->sb->s_id,
240 (unsigned long long)buf->start, val, found,
241 btrfs_header_level(buf));
242 }
239 if (result != (char *)&inline_result) 243 if (result != (char *)&inline_result)
240 kfree(result); 244 kfree(result);
241 return 1; 245 return 1;
@@ -268,10 +272,13 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
268 ret = 0; 272 ret = 0;
269 goto out; 273 goto out;
270 } 274 }
271 printk("parent transid verify failed on %llu wanted %llu found %llu\n", 275 if (printk_ratelimit()) {
272 (unsigned long long)eb->start, 276 printk("parent transid verify failed on %llu wanted %llu "
273 (unsigned long long)parent_transid, 277 "found %llu\n",
274 (unsigned long long)btrfs_header_generation(eb)); 278 (unsigned long long)eb->start,
279 (unsigned long long)parent_transid,
280 (unsigned long long)btrfs_header_generation(eb));
281 }
275 ret = 1; 282 ret = 1;
276 clear_extent_buffer_uptodate(io_tree, eb); 283 clear_extent_buffer_uptodate(io_tree, eb);
277out: 284out:
@@ -415,9 +422,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
415 422
416 found_start = btrfs_header_bytenr(eb); 423 found_start = btrfs_header_bytenr(eb);
417 if (found_start != start) { 424 if (found_start != start) {
418 printk(KERN_INFO "btrfs bad tree block start %llu %llu\n", 425 if (printk_ratelimit()) {
419 (unsigned long long)found_start, 426 printk(KERN_INFO "btrfs bad tree block start "
420 (unsigned long long)eb->start); 427 "%llu %llu\n",
428 (unsigned long long)found_start,
429 (unsigned long long)eb->start);
430 }
421 ret = -EIO; 431 ret = -EIO;
422 goto err; 432 goto err;
423 } 433 }
@@ -429,8 +439,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
429 goto err; 439 goto err;
430 } 440 }
431 if (check_tree_block_fsid(root, eb)) { 441 if (check_tree_block_fsid(root, eb)) {
432 printk(KERN_INFO "btrfs bad fsid on block %llu\n", 442 if (printk_ratelimit()) {
433 (unsigned long long)eb->start); 443 printk(KERN_INFO "btrfs bad fsid on block %llu\n",
444 (unsigned long long)eb->start);
445 }
434 ret = -EIO; 446 ret = -EIO;
435 goto err; 447 goto err;
436 } 448 }
@@ -579,19 +591,12 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
579 async->bio_flags = bio_flags; 591 async->bio_flags = bio_flags;
580 592
581 atomic_inc(&fs_info->nr_async_submits); 593 atomic_inc(&fs_info->nr_async_submits);
594
595 if (rw & (1 << BIO_RW_SYNCIO))
596 btrfs_set_work_high_prio(&async->work);
597
582 btrfs_queue_worker(&fs_info->workers, &async->work); 598 btrfs_queue_worker(&fs_info->workers, &async->work);
583#if 0
584 int limit = btrfs_async_submit_limit(fs_info);
585 if (atomic_read(&fs_info->nr_async_submits) > limit) {
586 wait_event_timeout(fs_info->async_submit_wait,
587 (atomic_read(&fs_info->nr_async_submits) < limit),
588 HZ/10);
589 599
590 wait_event_timeout(fs_info->async_submit_wait,
591 (atomic_read(&fs_info->nr_async_bios) < limit),
592 HZ/10);
593 }
594#endif
595 while (atomic_read(&fs_info->async_submit_draining) && 600 while (atomic_read(&fs_info->async_submit_draining) &&
596 atomic_read(&fs_info->nr_async_submits)) { 601 atomic_read(&fs_info->nr_async_submits)) {
597 wait_event(fs_info->async_submit_wait, 602 wait_event(fs_info->async_submit_wait,
@@ -656,6 +661,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
656 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, 661 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
657 mirror_num, 0); 662 mirror_num, 0);
658 } 663 }
664
659 /* 665 /*
660 * kthread helpers are used to submit writes so that checksumming 666 * kthread helpers are used to submit writes so that checksumming
661 * can happen in parallel across all CPUs 667 * can happen in parallel across all CPUs
@@ -765,27 +771,6 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
765 } 771 }
766} 772}
767 773
768#if 0
769static int btree_writepage(struct page *page, struct writeback_control *wbc)
770{
771 struct buffer_head *bh;
772 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
773 struct buffer_head *head;
774 if (!page_has_buffers(page)) {
775 create_empty_buffers(page, root->fs_info->sb->s_blocksize,
776 (1 << BH_Dirty)|(1 << BH_Uptodate));
777 }
778 head = page_buffers(page);
779 bh = head;
780 do {
781 if (buffer_dirty(bh))
782 csum_tree_block(root, bh, 0);
783 bh = bh->b_this_page;
784 } while (bh != head);
785 return block_write_full_page(page, btree_get_block, wbc);
786}
787#endif
788
789static struct address_space_operations btree_aops = { 774static struct address_space_operations btree_aops = {
790 .readpage = btree_readpage, 775 .readpage = btree_readpage,
791 .writepage = btree_writepage, 776 .writepage = btree_writepage,
@@ -1273,11 +1258,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1273 int ret = 0; 1258 int ret = 0;
1274 struct btrfs_device *device; 1259 struct btrfs_device *device;
1275 struct backing_dev_info *bdi; 1260 struct backing_dev_info *bdi;
1276#if 0 1261
1277 if ((bdi_bits & (1 << BDI_write_congested)) &&
1278 btrfs_congested_async(info, 0))
1279 return 1;
1280#endif
1281 list_for_each_entry(device, &info->fs_devices->devices, dev_list) { 1262 list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
1282 if (!device->bdev) 1263 if (!device->bdev)
1283 continue; 1264 continue;
@@ -1599,6 +1580,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1599 fs_info->btree_inode = new_inode(sb); 1580 fs_info->btree_inode = new_inode(sb);
1600 fs_info->btree_inode->i_ino = 1; 1581 fs_info->btree_inode->i_ino = 1;
1601 fs_info->btree_inode->i_nlink = 1; 1582 fs_info->btree_inode->i_nlink = 1;
1583 fs_info->metadata_ratio = 8;
1602 1584
1603 fs_info->thread_pool_size = min_t(unsigned long, 1585 fs_info->thread_pool_size = min_t(unsigned long,
1604 num_online_cpus() + 2, 8); 1586 num_online_cpus() + 2, 8);
@@ -1689,7 +1671,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1689 if (features) { 1671 if (features) {
1690 printk(KERN_ERR "BTRFS: couldn't mount because of " 1672 printk(KERN_ERR "BTRFS: couldn't mount because of "
1691 "unsupported optional features (%Lx).\n", 1673 "unsupported optional features (%Lx).\n",
1692 features); 1674 (unsigned long long)features);
1693 err = -EINVAL; 1675 err = -EINVAL;
1694 goto fail_iput; 1676 goto fail_iput;
1695 } 1677 }
@@ -1699,7 +1681,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1699 if (!(sb->s_flags & MS_RDONLY) && features) { 1681 if (!(sb->s_flags & MS_RDONLY) && features) {
1700 printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " 1682 printk(KERN_ERR "BTRFS: couldn't mount RDWR because of "
1701 "unsupported option features (%Lx).\n", 1683 "unsupported option features (%Lx).\n",
1702 features); 1684 (unsigned long long)features);
1703 err = -EINVAL; 1685 err = -EINVAL;
1704 goto fail_iput; 1686 goto fail_iput;
1705 } 1687 }
@@ -2095,10 +2077,10 @@ static int write_dev_supers(struct btrfs_device *device,
2095 device->barriers = 0; 2077 device->barriers = 0;
2096 get_bh(bh); 2078 get_bh(bh);
2097 lock_buffer(bh); 2079 lock_buffer(bh);
2098 ret = submit_bh(WRITE, bh); 2080 ret = submit_bh(WRITE_SYNC, bh);
2099 } 2081 }
2100 } else { 2082 } else {
2101 ret = submit_bh(WRITE, bh); 2083 ret = submit_bh(WRITE_SYNC, bh);
2102 } 2084 }
2103 2085
2104 if (!ret && wait) { 2086 if (!ret && wait) {
@@ -2291,7 +2273,7 @@ int close_ctree(struct btrfs_root *root)
2291 2273
2292 if (fs_info->delalloc_bytes) { 2274 if (fs_info->delalloc_bytes) {
2293 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", 2275 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
2294 fs_info->delalloc_bytes); 2276 (unsigned long long)fs_info->delalloc_bytes);
2295 } 2277 }
2296 if (fs_info->total_ref_cache_size) { 2278 if (fs_info->total_ref_cache_size) {
2297 printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", 2279 printk(KERN_INFO "btrfs: at umount reference cache size %llu\n",
@@ -2328,16 +2310,6 @@ int close_ctree(struct btrfs_root *root)
2328 btrfs_stop_workers(&fs_info->endio_write_workers); 2310 btrfs_stop_workers(&fs_info->endio_write_workers);
2329 btrfs_stop_workers(&fs_info->submit_workers); 2311 btrfs_stop_workers(&fs_info->submit_workers);
2330 2312
2331#if 0
2332 while (!list_empty(&fs_info->hashers)) {
2333 struct btrfs_hasher *hasher;
2334 hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher,
2335 hashers);
2336 list_del(&hasher->hashers);
2337 crypto_free_hash(&fs_info->hash_tfm);
2338 kfree(hasher);
2339 }
2340#endif
2341 btrfs_close_devices(fs_info->fs_devices); 2313 btrfs_close_devices(fs_info->fs_devices);
2342 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2314 btrfs_mapping_tree_free(&fs_info->mapping_tree);
2343 2315
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 178df4c67de4..e4966444811b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1844,10 +1844,14 @@ again:
1844 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" 1844 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
1845 ", %llu bytes_used, %llu bytes_reserved, " 1845 ", %llu bytes_used, %llu bytes_reserved, "
1846 "%llu bytes_pinned, %llu bytes_readonly, %llu may use" 1846 "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
1847 "%llu total\n", bytes, data_sinfo->bytes_delalloc, 1847 "%llu total\n", (unsigned long long)bytes,
1848 data_sinfo->bytes_used, data_sinfo->bytes_reserved, 1848 (unsigned long long)data_sinfo->bytes_delalloc,
1849 data_sinfo->bytes_pinned, data_sinfo->bytes_readonly, 1849 (unsigned long long)data_sinfo->bytes_used,
1850 data_sinfo->bytes_may_use, data_sinfo->total_bytes); 1850 (unsigned long long)data_sinfo->bytes_reserved,
1851 (unsigned long long)data_sinfo->bytes_pinned,
1852 (unsigned long long)data_sinfo->bytes_readonly,
1853 (unsigned long long)data_sinfo->bytes_may_use,
1854 (unsigned long long)data_sinfo->total_bytes);
1851 return -ENOSPC; 1855 return -ENOSPC;
1852 } 1856 }
1853 data_sinfo->bytes_may_use += bytes; 1857 data_sinfo->bytes_may_use += bytes;
@@ -1918,15 +1922,29 @@ void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
1918 spin_unlock(&info->lock); 1922 spin_unlock(&info->lock);
1919} 1923}
1920 1924
1925static void force_metadata_allocation(struct btrfs_fs_info *info)
1926{
1927 struct list_head *head = &info->space_info;
1928 struct btrfs_space_info *found;
1929
1930 rcu_read_lock();
1931 list_for_each_entry_rcu(found, head, list) {
1932 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
1933 found->force_alloc = 1;
1934 }
1935 rcu_read_unlock();
1936}
1937
1921static int do_chunk_alloc(struct btrfs_trans_handle *trans, 1938static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1922 struct btrfs_root *extent_root, u64 alloc_bytes, 1939 struct btrfs_root *extent_root, u64 alloc_bytes,
1923 u64 flags, int force) 1940 u64 flags, int force)
1924{ 1941{
1925 struct btrfs_space_info *space_info; 1942 struct btrfs_space_info *space_info;
1943 struct btrfs_fs_info *fs_info = extent_root->fs_info;
1926 u64 thresh; 1944 u64 thresh;
1927 int ret = 0; 1945 int ret = 0;
1928 1946
1929 mutex_lock(&extent_root->fs_info->chunk_mutex); 1947 mutex_lock(&fs_info->chunk_mutex);
1930 1948
1931 flags = btrfs_reduce_alloc_profile(extent_root, flags); 1949 flags = btrfs_reduce_alloc_profile(extent_root, flags);
1932 1950
@@ -1958,6 +1976,18 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1958 } 1976 }
1959 spin_unlock(&space_info->lock); 1977 spin_unlock(&space_info->lock);
1960 1978
1979 /*
1980 * if we're doing a data chunk, go ahead and make sure that
1981 * we keep a reasonable number of metadata chunks allocated in the
1982 * FS as well.
1983 */
1984 if (flags & BTRFS_BLOCK_GROUP_DATA) {
1985 fs_info->data_chunk_allocations++;
1986 if (!(fs_info->data_chunk_allocations %
1987 fs_info->metadata_ratio))
1988 force_metadata_allocation(fs_info);
1989 }
1990
1961 ret = btrfs_alloc_chunk(trans, extent_root, flags); 1991 ret = btrfs_alloc_chunk(trans, extent_root, flags);
1962 if (ret) 1992 if (ret)
1963 space_info->full = 1; 1993 space_info->full = 1;
@@ -2798,9 +2828,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
2798 info->bytes_pinned - info->bytes_reserved), 2828 info->bytes_pinned - info->bytes_reserved),
2799 (info->full) ? "" : "not "); 2829 (info->full) ? "" : "not ");
2800 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 2830 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
2801 " may_use=%llu, used=%llu\n", info->total_bytes, 2831 " may_use=%llu, used=%llu\n",
2802 info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use, 2832 (unsigned long long)info->total_bytes,
2803 info->bytes_used); 2833 (unsigned long long)info->bytes_pinned,
2834 (unsigned long long)info->bytes_delalloc,
2835 (unsigned long long)info->bytes_may_use,
2836 (unsigned long long)info->bytes_used);
2804 2837
2805 down_read(&info->groups_sem); 2838 down_read(&info->groups_sem);
2806 list_for_each_entry(cache, &info->block_groups, list) { 2839 list_for_each_entry(cache, &info->block_groups, list) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index eb2bee8b7fbf..fe9eb990e443 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -17,12 +17,6 @@
17#include "ctree.h" 17#include "ctree.h"
18#include "btrfs_inode.h" 18#include "btrfs_inode.h"
19 19
20/* temporary define until extent_map moves out of btrfs */
21struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
22 unsigned long extra_flags,
23 void (*ctor)(void *, struct kmem_cache *,
24 unsigned long));
25
26static struct kmem_cache *extent_state_cache; 20static struct kmem_cache *extent_state_cache;
27static struct kmem_cache *extent_buffer_cache; 21static struct kmem_cache *extent_buffer_cache;
28 22
@@ -50,20 +44,23 @@ struct extent_page_data {
50 /* tells writepage not to lock the state bits for this range 44 /* tells writepage not to lock the state bits for this range
51 * it still does the unlocking 45 * it still does the unlocking
52 */ 46 */
53 int extent_locked; 47 unsigned int extent_locked:1;
48
49 /* tells the submit_bio code to use a WRITE_SYNC */
50 unsigned int sync_io:1;
54}; 51};
55 52
56int __init extent_io_init(void) 53int __init extent_io_init(void)
57{ 54{
58 extent_state_cache = btrfs_cache_create("extent_state", 55 extent_state_cache = kmem_cache_create("extent_state",
59 sizeof(struct extent_state), 0, 56 sizeof(struct extent_state), 0,
60 NULL); 57 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
61 if (!extent_state_cache) 58 if (!extent_state_cache)
62 return -ENOMEM; 59 return -ENOMEM;
63 60
64 extent_buffer_cache = btrfs_cache_create("extent_buffers", 61 extent_buffer_cache = kmem_cache_create("extent_buffers",
65 sizeof(struct extent_buffer), 0, 62 sizeof(struct extent_buffer), 0,
66 NULL); 63 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
67 if (!extent_buffer_cache) 64 if (!extent_buffer_cache)
68 goto free_state_cache; 65 goto free_state_cache;
69 return 0; 66 return 0;
@@ -1404,69 +1401,6 @@ out:
1404 return total_bytes; 1401 return total_bytes;
1405} 1402}
1406 1403
1407#if 0
1408/*
1409 * helper function to lock both pages and extents in the tree.
1410 * pages must be locked first.
1411 */
1412static int lock_range(struct extent_io_tree *tree, u64 start, u64 end)
1413{
1414 unsigned long index = start >> PAGE_CACHE_SHIFT;
1415 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1416 struct page *page;
1417 int err;
1418
1419 while (index <= end_index) {
1420 page = grab_cache_page(tree->mapping, index);
1421 if (!page) {
1422 err = -ENOMEM;
1423 goto failed;
1424 }
1425 if (IS_ERR(page)) {
1426 err = PTR_ERR(page);
1427 goto failed;
1428 }
1429 index++;
1430 }
1431 lock_extent(tree, start, end, GFP_NOFS);
1432 return 0;
1433
1434failed:
1435 /*
1436 * we failed above in getting the page at 'index', so we undo here
1437 * up to but not including the page at 'index'
1438 */
1439 end_index = index;
1440 index = start >> PAGE_CACHE_SHIFT;
1441 while (index < end_index) {
1442 page = find_get_page(tree->mapping, index);
1443 unlock_page(page);
1444 page_cache_release(page);
1445 index++;
1446 }
1447 return err;
1448}
1449
1450/*
1451 * helper function to unlock both pages and extents in the tree.
1452 */
1453static int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
1454{
1455 unsigned long index = start >> PAGE_CACHE_SHIFT;
1456 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1457 struct page *page;
1458
1459 while (index <= end_index) {
1460 page = find_get_page(tree->mapping, index);
1461 unlock_page(page);
1462 page_cache_release(page);
1463 index++;
1464 }
1465 unlock_extent(tree, start, end, GFP_NOFS);
1466 return 0;
1467}
1468#endif
1469
1470/* 1404/*
1471 * set the private field for a given byte offset in the tree. If there isn't 1405 * set the private field for a given byte offset in the tree. If there isn't
1472 * an extent_state there already, this does nothing. 1406 * an extent_state there already, this does nothing.
@@ -2101,6 +2035,16 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2101 return ret; 2035 return ret;
2102} 2036}
2103 2037
2038static noinline void update_nr_written(struct page *page,
2039 struct writeback_control *wbc,
2040 unsigned long nr_written)
2041{
2042 wbc->nr_to_write -= nr_written;
2043 if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
2044 wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
2045 page->mapping->writeback_index = page->index + nr_written;
2046}
2047
2104/* 2048/*
2105 * the writepage semantics are similar to regular writepage. extent 2049 * the writepage semantics are similar to regular writepage. extent
2106 * records are inserted to lock ranges in the tree, and as dirty areas 2050 * records are inserted to lock ranges in the tree, and as dirty areas
@@ -2136,8 +2080,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2136 u64 delalloc_end; 2080 u64 delalloc_end;
2137 int page_started; 2081 int page_started;
2138 int compressed; 2082 int compressed;
2083 int write_flags;
2139 unsigned long nr_written = 0; 2084 unsigned long nr_written = 0;
2140 2085
2086 if (wbc->sync_mode == WB_SYNC_ALL)
2087 write_flags = WRITE_SYNC_PLUG;
2088 else
2089 write_flags = WRITE;
2090
2141 WARN_ON(!PageLocked(page)); 2091 WARN_ON(!PageLocked(page));
2142 pg_offset = i_size & (PAGE_CACHE_SIZE - 1); 2092 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
2143 if (page->index > end_index || 2093 if (page->index > end_index ||
@@ -2164,6 +2114,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2164 delalloc_end = 0; 2114 delalloc_end = 0;
2165 page_started = 0; 2115 page_started = 0;
2166 if (!epd->extent_locked) { 2116 if (!epd->extent_locked) {
2117 /*
2118 * make sure the wbc mapping index is at least updated
2119 * to this page.
2120 */
2121 update_nr_written(page, wbc, 0);
2122
2167 while (delalloc_end < page_end) { 2123 while (delalloc_end < page_end) {
2168 nr_delalloc = find_lock_delalloc_range(inode, tree, 2124 nr_delalloc = find_lock_delalloc_range(inode, tree,
2169 page, 2125 page,
@@ -2185,7 +2141,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2185 */ 2141 */
2186 if (page_started) { 2142 if (page_started) {
2187 ret = 0; 2143 ret = 0;
2188 goto update_nr_written; 2144 /*
2145 * we've unlocked the page, so we can't update
2146 * the mapping's writeback index, just update
2147 * nr_to_write.
2148 */
2149 wbc->nr_to_write -= nr_written;
2150 goto done_unlocked;
2189 } 2151 }
2190 } 2152 }
2191 lock_extent(tree, start, page_end, GFP_NOFS); 2153 lock_extent(tree, start, page_end, GFP_NOFS);
@@ -2198,13 +2160,18 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2198 if (ret == -EAGAIN) { 2160 if (ret == -EAGAIN) {
2199 unlock_extent(tree, start, page_end, GFP_NOFS); 2161 unlock_extent(tree, start, page_end, GFP_NOFS);
2200 redirty_page_for_writepage(wbc, page); 2162 redirty_page_for_writepage(wbc, page);
2163 update_nr_written(page, wbc, nr_written);
2201 unlock_page(page); 2164 unlock_page(page);
2202 ret = 0; 2165 ret = 0;
2203 goto update_nr_written; 2166 goto done_unlocked;
2204 } 2167 }
2205 } 2168 }
2206 2169
2207 nr_written++; 2170 /*
2171 * we don't want to touch the inode after unlocking the page,
2172 * so we update the mapping writeback index now
2173 */
2174 update_nr_written(page, wbc, nr_written + 1);
2208 2175
2209 end = page_end; 2176 end = page_end;
2210 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) 2177 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
@@ -2314,9 +2281,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2314 (unsigned long long)end); 2281 (unsigned long long)end);
2315 } 2282 }
2316 2283
2317 ret = submit_extent_page(WRITE, tree, page, sector, 2284 ret = submit_extent_page(write_flags, tree, page,
2318 iosize, pg_offset, bdev, 2285 sector, iosize, pg_offset,
2319 &epd->bio, max_nr, 2286 bdev, &epd->bio, max_nr,
2320 end_bio_extent_writepage, 2287 end_bio_extent_writepage,
2321 0, 0, 0); 2288 0, 0, 0);
2322 if (ret) 2289 if (ret)
@@ -2336,11 +2303,8 @@ done:
2336 unlock_extent(tree, unlock_start, page_end, GFP_NOFS); 2303 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2337 unlock_page(page); 2304 unlock_page(page);
2338 2305
2339update_nr_written: 2306done_unlocked:
2340 wbc->nr_to_write -= nr_written; 2307
2341 if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
2342 wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
2343 page->mapping->writeback_index = page->index + nr_written;
2344 return 0; 2308 return 0;
2345} 2309}
2346 2310
@@ -2460,15 +2424,23 @@ retry:
2460 return ret; 2424 return ret;
2461} 2425}
2462 2426
2463static noinline void flush_write_bio(void *data) 2427static void flush_epd_write_bio(struct extent_page_data *epd)
2464{ 2428{
2465 struct extent_page_data *epd = data;
2466 if (epd->bio) { 2429 if (epd->bio) {
2467 submit_one_bio(WRITE, epd->bio, 0, 0); 2430 if (epd->sync_io)
2431 submit_one_bio(WRITE_SYNC, epd->bio, 0, 0);
2432 else
2433 submit_one_bio(WRITE, epd->bio, 0, 0);
2468 epd->bio = NULL; 2434 epd->bio = NULL;
2469 } 2435 }
2470} 2436}
2471 2437
2438static noinline void flush_write_bio(void *data)
2439{
2440 struct extent_page_data *epd = data;
2441 flush_epd_write_bio(epd);
2442}
2443
2472int extent_write_full_page(struct extent_io_tree *tree, struct page *page, 2444int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2473 get_extent_t *get_extent, 2445 get_extent_t *get_extent,
2474 struct writeback_control *wbc) 2446 struct writeback_control *wbc)
@@ -2480,23 +2452,22 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2480 .tree = tree, 2452 .tree = tree,
2481 .get_extent = get_extent, 2453 .get_extent = get_extent,
2482 .extent_locked = 0, 2454 .extent_locked = 0,
2455 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
2483 }; 2456 };
2484 struct writeback_control wbc_writepages = { 2457 struct writeback_control wbc_writepages = {
2485 .bdi = wbc->bdi, 2458 .bdi = wbc->bdi,
2486 .sync_mode = WB_SYNC_NONE, 2459 .sync_mode = wbc->sync_mode,
2487 .older_than_this = NULL, 2460 .older_than_this = NULL,
2488 .nr_to_write = 64, 2461 .nr_to_write = 64,
2489 .range_start = page_offset(page) + PAGE_CACHE_SIZE, 2462 .range_start = page_offset(page) + PAGE_CACHE_SIZE,
2490 .range_end = (loff_t)-1, 2463 .range_end = (loff_t)-1,
2491 }; 2464 };
2492 2465
2493
2494 ret = __extent_writepage(page, wbc, &epd); 2466 ret = __extent_writepage(page, wbc, &epd);
2495 2467
2496 extent_write_cache_pages(tree, mapping, &wbc_writepages, 2468 extent_write_cache_pages(tree, mapping, &wbc_writepages,
2497 __extent_writepage, &epd, flush_write_bio); 2469 __extent_writepage, &epd, flush_write_bio);
2498 if (epd.bio) 2470 flush_epd_write_bio(&epd);
2499 submit_one_bio(WRITE, epd.bio, 0, 0);
2500 return ret; 2471 return ret;
2501} 2472}
2502 2473
@@ -2515,6 +2486,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
2515 .tree = tree, 2486 .tree = tree,
2516 .get_extent = get_extent, 2487 .get_extent = get_extent,
2517 .extent_locked = 1, 2488 .extent_locked = 1,
2489 .sync_io = mode == WB_SYNC_ALL,
2518 }; 2490 };
2519 struct writeback_control wbc_writepages = { 2491 struct writeback_control wbc_writepages = {
2520 .bdi = inode->i_mapping->backing_dev_info, 2492 .bdi = inode->i_mapping->backing_dev_info,
@@ -2540,8 +2512,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
2540 start += PAGE_CACHE_SIZE; 2512 start += PAGE_CACHE_SIZE;
2541 } 2513 }
2542 2514
2543 if (epd.bio) 2515 flush_epd_write_bio(&epd);
2544 submit_one_bio(WRITE, epd.bio, 0, 0);
2545 return ret; 2516 return ret;
2546} 2517}
2547 2518
@@ -2556,13 +2527,13 @@ int extent_writepages(struct extent_io_tree *tree,
2556 .tree = tree, 2527 .tree = tree,
2557 .get_extent = get_extent, 2528 .get_extent = get_extent,
2558 .extent_locked = 0, 2529 .extent_locked = 0,
2530 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
2559 }; 2531 };
2560 2532
2561 ret = extent_write_cache_pages(tree, mapping, wbc, 2533 ret = extent_write_cache_pages(tree, mapping, wbc,
2562 __extent_writepage, &epd, 2534 __extent_writepage, &epd,
2563 flush_write_bio); 2535 flush_write_bio);
2564 if (epd.bio) 2536 flush_epd_write_bio(&epd);
2565 submit_one_bio(WRITE, epd.bio, 0, 0);
2566 return ret; 2537 return ret;
2567} 2538}
2568 2539
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index b187917b36fa..30c9365861e6 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -6,19 +6,14 @@
6#include <linux/hardirq.h> 6#include <linux/hardirq.h>
7#include "extent_map.h" 7#include "extent_map.h"
8 8
9/* temporary define until extent_map moves out of btrfs */
10struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
11 unsigned long extra_flags,
12 void (*ctor)(void *, struct kmem_cache *,
13 unsigned long));
14 9
15static struct kmem_cache *extent_map_cache; 10static struct kmem_cache *extent_map_cache;
16 11
17int __init extent_map_init(void) 12int __init extent_map_init(void)
18{ 13{
19 extent_map_cache = btrfs_cache_create("extent_map", 14 extent_map_cache = kmem_cache_create("extent_map",
20 sizeof(struct extent_map), 0, 15 sizeof(struct extent_map), 0,
21 NULL); 16 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
22 if (!extent_map_cache) 17 if (!extent_map_cache)
23 return -ENOMEM; 18 return -ENOMEM;
24 return 0; 19 return 0;
@@ -43,7 +38,6 @@ void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
43 tree->map.rb_node = NULL; 38 tree->map.rb_node = NULL;
44 spin_lock_init(&tree->lock); 39 spin_lock_init(&tree->lock);
45} 40}
46EXPORT_SYMBOL(extent_map_tree_init);
47 41
48/** 42/**
49 * alloc_extent_map - allocate new extent map structure 43 * alloc_extent_map - allocate new extent map structure
@@ -64,7 +58,6 @@ struct extent_map *alloc_extent_map(gfp_t mask)
64 atomic_set(&em->refs, 1); 58 atomic_set(&em->refs, 1);
65 return em; 59 return em;
66} 60}
67EXPORT_SYMBOL(alloc_extent_map);
68 61
69/** 62/**
70 * free_extent_map - drop reference count of an extent_map 63 * free_extent_map - drop reference count of an extent_map
@@ -83,7 +76,6 @@ void free_extent_map(struct extent_map *em)
83 kmem_cache_free(extent_map_cache, em); 76 kmem_cache_free(extent_map_cache, em);
84 } 77 }
85} 78}
86EXPORT_SYMBOL(free_extent_map);
87 79
88static struct rb_node *tree_insert(struct rb_root *root, u64 offset, 80static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
89 struct rb_node *node) 81 struct rb_node *node)
@@ -264,7 +256,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
264out: 256out:
265 return ret; 257 return ret;
266} 258}
267EXPORT_SYMBOL(add_extent_mapping);
268 259
269/* simple helper to do math around the end of an extent, handling wrap */ 260/* simple helper to do math around the end of an extent, handling wrap */
270static u64 range_end(u64 start, u64 len) 261static u64 range_end(u64 start, u64 len)
@@ -326,7 +317,6 @@ found:
326out: 317out:
327 return em; 318 return em;
328} 319}
329EXPORT_SYMBOL(lookup_extent_mapping);
330 320
331/** 321/**
332 * remove_extent_mapping - removes an extent_map from the extent tree 322 * remove_extent_mapping - removes an extent_map from the extent tree
@@ -346,4 +336,3 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
346 em->in_tree = 0; 336 em->in_tree = 0;
347 return ret; 337 return ret;
348} 338}
349EXPORT_SYMBOL(remove_extent_mapping);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9c9fb46ccd08..1d51dc38bb49 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -272,83 +272,6 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
272 return 0; 272 return 0;
273} 273}
274 274
275int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
276{
277 return 0;
278#if 0
279 struct btrfs_path *path;
280 struct btrfs_key found_key;
281 struct extent_buffer *leaf;
282 struct btrfs_file_extent_item *extent;
283 u64 last_offset = 0;
284 int nritems;
285 int slot;
286 int found_type;
287 int ret;
288 int err = 0;
289 u64 extent_end = 0;
290
291 path = btrfs_alloc_path();
292 ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino,
293 last_offset, 0);
294 while (1) {
295 nritems = btrfs_header_nritems(path->nodes[0]);
296 if (path->slots[0] >= nritems) {
297 ret = btrfs_next_leaf(root, path);
298 if (ret)
299 goto out;
300 nritems = btrfs_header_nritems(path->nodes[0]);
301 }
302 slot = path->slots[0];
303 leaf = path->nodes[0];
304 btrfs_item_key_to_cpu(leaf, &found_key, slot);
305 if (found_key.objectid != inode->i_ino)
306 break;
307 if (found_key.type != BTRFS_EXTENT_DATA_KEY)
308 goto out;
309
310 if (found_key.offset < last_offset) {
311 WARN_ON(1);
312 btrfs_print_leaf(root, leaf);
313 printk(KERN_ERR "inode %lu found offset %llu "
314 "expected %llu\n", inode->i_ino,
315 (unsigned long long)found_key.offset,
316 (unsigned long long)last_offset);
317 err = 1;
318 goto out;
319 }
320 extent = btrfs_item_ptr(leaf, slot,
321 struct btrfs_file_extent_item);
322 found_type = btrfs_file_extent_type(leaf, extent);
323 if (found_type == BTRFS_FILE_EXTENT_REG) {
324 extent_end = found_key.offset +
325 btrfs_file_extent_num_bytes(leaf, extent);
326 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
327 struct btrfs_item *item;
328 item = btrfs_item_nr(leaf, slot);
329 extent_end = found_key.offset +
330 btrfs_file_extent_inline_len(leaf, extent);
331 extent_end = (extent_end + root->sectorsize - 1) &
332 ~((u64)root->sectorsize - 1);
333 }
334 last_offset = extent_end;
335 path->slots[0]++;
336 }
337 if (0 && last_offset < inode->i_size) {
338 WARN_ON(1);
339 btrfs_print_leaf(root, leaf);
340 printk(KERN_ERR "inode %lu found offset %llu size %llu\n",
341 inode->i_ino, (unsigned long long)last_offset,
342 (unsigned long long)inode->i_size);
343 err = 1;
344
345 }
346out:
347 btrfs_free_path(path);
348 return err;
349#endif
350}
351
352/* 275/*
353 * this is very complex, but the basic idea is to drop all extents 276 * this is very complex, but the basic idea is to drop all extents
354 * in the range start - end. hint_block is filled in with a block number 277 * in the range start - end. hint_block is filled in with a block number
@@ -363,15 +286,16 @@ out:
363 */ 286 */
364noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, 287noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
365 struct btrfs_root *root, struct inode *inode, 288 struct btrfs_root *root, struct inode *inode,
366 u64 start, u64 end, u64 inline_limit, u64 *hint_byte) 289 u64 start, u64 end, u64 locked_end,
290 u64 inline_limit, u64 *hint_byte)
367{ 291{
368 u64 extent_end = 0; 292 u64 extent_end = 0;
369 u64 locked_end = end;
370 u64 search_start = start; 293 u64 search_start = start;
371 u64 leaf_start; 294 u64 leaf_start;
372 u64 ram_bytes = 0; 295 u64 ram_bytes = 0;
373 u64 orig_parent = 0; 296 u64 orig_parent = 0;
374 u64 disk_bytenr = 0; 297 u64 disk_bytenr = 0;
298 u64 orig_locked_end = locked_end;
375 u8 compression; 299 u8 compression;
376 u8 encryption; 300 u8 encryption;
377 u16 other_encoding = 0; 301 u16 other_encoding = 0;
@@ -684,11 +608,10 @@ next_slot:
684 } 608 }
685out: 609out:
686 btrfs_free_path(path); 610 btrfs_free_path(path);
687 if (locked_end > end) { 611 if (locked_end > orig_locked_end) {
688 unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, 612 unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end,
689 GFP_NOFS); 613 locked_end - 1, GFP_NOFS);
690 } 614 }
691 btrfs_check_file(root, inode);
692 return ret; 615 return ret;
693} 616}
694 617
@@ -830,7 +753,7 @@ again:
830 753
831 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 754 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
832 BUG_ON(ret); 755 BUG_ON(ret);
833 goto done; 756 goto release;
834 } else if (split == start) { 757 } else if (split == start) {
835 if (locked_end < extent_end) { 758 if (locked_end < extent_end) {
836 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 759 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
@@ -926,6 +849,8 @@ again:
926 } 849 }
927done: 850done:
928 btrfs_mark_buffer_dirty(leaf); 851 btrfs_mark_buffer_dirty(leaf);
852
853release:
929 btrfs_release_path(root, path); 854 btrfs_release_path(root, path);
930 if (split_end && split == start) { 855 if (split_end && split == start) {
931 split = end; 856 split = end;
@@ -1131,7 +1056,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1131 if (will_write) { 1056 if (will_write) {
1132 btrfs_fdatawrite_range(inode->i_mapping, pos, 1057 btrfs_fdatawrite_range(inode->i_mapping, pos,
1133 pos + write_bytes - 1, 1058 pos + write_bytes - 1,
1134 WB_SYNC_NONE); 1059 WB_SYNC_ALL);
1135 } else { 1060 } else {
1136 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1061 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
1137 num_pages); 1062 num_pages);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 768b9523662d..0bc93657b460 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -332,13 +332,17 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
332 printk(KERN_ERR "couldn't find space %llu to free\n", 332 printk(KERN_ERR "couldn't find space %llu to free\n",
333 (unsigned long long)offset); 333 (unsigned long long)offset);
334 printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n", 334 printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n",
335 block_group->cached, block_group->key.objectid, 335 block_group->cached,
336 block_group->key.offset); 336 (unsigned long long)block_group->key.objectid,
337 (unsigned long long)block_group->key.offset);
337 btrfs_dump_free_space(block_group, bytes); 338 btrfs_dump_free_space(block_group, bytes);
338 } else if (info) { 339 } else if (info) {
339 printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, " 340 printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, "
340 "but wanted offset=%llu bytes=%llu\n", 341 "but wanted offset=%llu bytes=%llu\n",
341 info->offset, info->bytes, offset, bytes); 342 (unsigned long long)info->offset,
343 (unsigned long long)info->bytes,
344 (unsigned long long)offset,
345 (unsigned long long)bytes);
342 } 346 }
343 WARN_ON(1); 347 WARN_ON(1);
344 } 348 }
@@ -357,8 +361,9 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
357 info = rb_entry(n, struct btrfs_free_space, offset_index); 361 info = rb_entry(n, struct btrfs_free_space, offset_index);
358 if (info->bytes >= bytes) 362 if (info->bytes >= bytes)
359 count++; 363 count++;
360 printk(KERN_ERR "entry offset %llu, bytes %llu\n", info->offset, 364 printk(KERN_ERR "entry offset %llu, bytes %llu\n",
361 info->bytes); 365 (unsigned long long)info->offset,
366 (unsigned long long)info->bytes);
362 } 367 }
363 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 368 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
364 "\n", count); 369 "\n", count);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index cc7334d833c9..9abbced1123d 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -79,7 +79,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
79 } 79 }
80 path = btrfs_alloc_path(); 80 path = btrfs_alloc_path();
81 BUG_ON(!path); 81 BUG_ON(!path);
82 search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); 82 search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID);
83 search_key.objectid = search_start; 83 search_key.objectid = search_start;
84 search_key.type = 0; 84 search_key.type = 0;
85 search_key.offset = 0; 85 search_key.offset = 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a0d1dd492a58..90c23eb28829 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -70,7 +70,6 @@ static struct extent_io_ops btrfs_extent_io_ops;
70static struct kmem_cache *btrfs_inode_cachep; 70static struct kmem_cache *btrfs_inode_cachep;
71struct kmem_cache *btrfs_trans_handle_cachep; 71struct kmem_cache *btrfs_trans_handle_cachep;
72struct kmem_cache *btrfs_transaction_cachep; 72struct kmem_cache *btrfs_transaction_cachep;
73struct kmem_cache *btrfs_bit_radix_cachep;
74struct kmem_cache *btrfs_path_cachep; 73struct kmem_cache *btrfs_path_cachep;
75 74
76#define S_SHIFT 12 75#define S_SHIFT 12
@@ -234,7 +233,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
234 } 233 }
235 234
236 ret = btrfs_drop_extents(trans, root, inode, start, 235 ret = btrfs_drop_extents(trans, root, inode, start,
237 aligned_end, start, &hint_byte); 236 aligned_end, aligned_end, start, &hint_byte);
238 BUG_ON(ret); 237 BUG_ON(ret);
239 238
240 if (isize > actual_end) 239 if (isize > actual_end)
@@ -1439,6 +1438,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1439 struct inode *inode, u64 file_pos, 1438 struct inode *inode, u64 file_pos,
1440 u64 disk_bytenr, u64 disk_num_bytes, 1439 u64 disk_bytenr, u64 disk_num_bytes,
1441 u64 num_bytes, u64 ram_bytes, 1440 u64 num_bytes, u64 ram_bytes,
1441 u64 locked_end,
1442 u8 compression, u8 encryption, 1442 u8 compression, u8 encryption,
1443 u16 other_encoding, int extent_type) 1443 u16 other_encoding, int extent_type)
1444{ 1444{
@@ -1455,7 +1455,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1455 1455
1456 path->leave_spinning = 1; 1456 path->leave_spinning = 1;
1457 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1457 ret = btrfs_drop_extents(trans, root, inode, file_pos,
1458 file_pos + num_bytes, file_pos, &hint); 1458 file_pos + num_bytes, locked_end,
1459 file_pos, &hint);
1459 BUG_ON(ret); 1460 BUG_ON(ret);
1460 1461
1461 ins.objectid = inode->i_ino; 1462 ins.objectid = inode->i_ino;
@@ -1590,6 +1591,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1590 ordered_extent->disk_len, 1591 ordered_extent->disk_len,
1591 ordered_extent->len, 1592 ordered_extent->len,
1592 ordered_extent->len, 1593 ordered_extent->len,
1594 ordered_extent->file_offset +
1595 ordered_extent->len,
1593 compressed, 0, 0, 1596 compressed, 0, 0,
1594 BTRFS_FILE_EXTENT_REG); 1597 BTRFS_FILE_EXTENT_REG);
1595 BUG_ON(ret); 1598 BUG_ON(ret);
@@ -1819,10 +1822,12 @@ good:
1819 return 0; 1822 return 0;
1820 1823
1821zeroit: 1824zeroit:
1822 printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " 1825 if (printk_ratelimit()) {
1823 "private %llu\n", page->mapping->host->i_ino, 1826 printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u "
1824 (unsigned long long)start, csum, 1827 "private %llu\n", page->mapping->host->i_ino,
1825 (unsigned long long)private); 1828 (unsigned long long)start, csum,
1829 (unsigned long long)private);
1830 }
1826 memset(kaddr + offset, 1, end - start + 1); 1831 memset(kaddr + offset, 1, end - start + 1);
1827 flush_dcache_page(page); 1832 flush_dcache_page(page);
1828 kunmap_atomic(kaddr, KM_USER0); 1833 kunmap_atomic(kaddr, KM_USER0);
@@ -2011,6 +2016,57 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2011} 2016}
2012 2017
2013/* 2018/*
2019 * very simple check to peek ahead in the leaf looking for xattrs. If we
2020 * don't find any xattrs, we know there can't be any acls.
2021 *
2022 * slot is the slot the inode is in, objectid is the objectid of the inode
2023 */
2024static noinline int acls_after_inode_item(struct extent_buffer *leaf,
2025 int slot, u64 objectid)
2026{
2027 u32 nritems = btrfs_header_nritems(leaf);
2028 struct btrfs_key found_key;
2029 int scanned = 0;
2030
2031 slot++;
2032 while (slot < nritems) {
2033 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2034
2035 /* we found a different objectid, there must not be acls */
2036 if (found_key.objectid != objectid)
2037 return 0;
2038
2039 /* we found an xattr, assume we've got an acl */
2040 if (found_key.type == BTRFS_XATTR_ITEM_KEY)
2041 return 1;
2042
2043 /*
2044 * we found a key greater than an xattr key, there can't
2045 * be any acls later on
2046 */
2047 if (found_key.type > BTRFS_XATTR_ITEM_KEY)
2048 return 0;
2049
2050 slot++;
2051 scanned++;
2052
2053 /*
2054 * it goes inode, inode backrefs, xattrs, extents,
2055 * so if there are a ton of hard links to an inode there can
2056 * be a lot of backrefs. Don't waste time searching too hard,
2057 * this is just an optimization
2058 */
2059 if (scanned >= 8)
2060 break;
2061 }
2062 /* we hit the end of the leaf before we found an xattr or
2063 * something larger than an xattr. We have to assume the inode
2064 * has acls
2065 */
2066 return 1;
2067}
2068
2069/*
2014 * read an inode from the btree into the in-memory inode 2070 * read an inode from the btree into the in-memory inode
2015 */ 2071 */
2016void btrfs_read_locked_inode(struct inode *inode) 2072void btrfs_read_locked_inode(struct inode *inode)
@@ -2021,6 +2077,7 @@ void btrfs_read_locked_inode(struct inode *inode)
2021 struct btrfs_timespec *tspec; 2077 struct btrfs_timespec *tspec;
2022 struct btrfs_root *root = BTRFS_I(inode)->root; 2078 struct btrfs_root *root = BTRFS_I(inode)->root;
2023 struct btrfs_key location; 2079 struct btrfs_key location;
2080 int maybe_acls;
2024 u64 alloc_group_block; 2081 u64 alloc_group_block;
2025 u32 rdev; 2082 u32 rdev;
2026 int ret; 2083 int ret;
@@ -2067,6 +2124,16 @@ void btrfs_read_locked_inode(struct inode *inode)
2067 2124
2068 alloc_group_block = btrfs_inode_block_group(leaf, inode_item); 2125 alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
2069 2126
2127 /*
2128 * try to precache a NULL acl entry for files that don't have
2129 * any xattrs or acls
2130 */
2131 maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino);
2132 if (!maybe_acls) {
2133 BTRFS_I(inode)->i_acl = NULL;
2134 BTRFS_I(inode)->i_default_acl = NULL;
2135 }
2136
2070 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, 2137 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
2071 alloc_group_block, 0); 2138 alloc_group_block, 0);
2072 btrfs_free_path(path); 2139 btrfs_free_path(path);
@@ -2877,6 +2944,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
2877 err = btrfs_drop_extents(trans, root, inode, 2944 err = btrfs_drop_extents(trans, root, inode,
2878 cur_offset, 2945 cur_offset,
2879 cur_offset + hole_size, 2946 cur_offset + hole_size,
2947 block_end,
2880 cur_offset, &hint_byte); 2948 cur_offset, &hint_byte);
2881 if (err) 2949 if (err)
2882 break; 2950 break;
@@ -3041,8 +3109,8 @@ static noinline void init_btrfs_i(struct inode *inode)
3041{ 3109{
3042 struct btrfs_inode *bi = BTRFS_I(inode); 3110 struct btrfs_inode *bi = BTRFS_I(inode);
3043 3111
3044 bi->i_acl = NULL; 3112 bi->i_acl = BTRFS_ACL_NOT_CACHED;
3045 bi->i_default_acl = NULL; 3113 bi->i_default_acl = BTRFS_ACL_NOT_CACHED;
3046 3114
3047 bi->generation = 0; 3115 bi->generation = 0;
3048 bi->sequence = 0; 3116 bi->sequence = 0;
@@ -4634,47 +4702,36 @@ void btrfs_destroy_cachep(void)
4634 kmem_cache_destroy(btrfs_trans_handle_cachep); 4702 kmem_cache_destroy(btrfs_trans_handle_cachep);
4635 if (btrfs_transaction_cachep) 4703 if (btrfs_transaction_cachep)
4636 kmem_cache_destroy(btrfs_transaction_cachep); 4704 kmem_cache_destroy(btrfs_transaction_cachep);
4637 if (btrfs_bit_radix_cachep)
4638 kmem_cache_destroy(btrfs_bit_radix_cachep);
4639 if (btrfs_path_cachep) 4705 if (btrfs_path_cachep)
4640 kmem_cache_destroy(btrfs_path_cachep); 4706 kmem_cache_destroy(btrfs_path_cachep);
4641} 4707}
4642 4708
4643struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
4644 unsigned long extra_flags,
4645 void (*ctor)(void *))
4646{
4647 return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
4648 SLAB_MEM_SPREAD | extra_flags), ctor);
4649}
4650
4651int btrfs_init_cachep(void) 4709int btrfs_init_cachep(void)
4652{ 4710{
4653 btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache", 4711 btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
4654 sizeof(struct btrfs_inode), 4712 sizeof(struct btrfs_inode), 0,
4655 0, init_once); 4713 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once);
4656 if (!btrfs_inode_cachep) 4714 if (!btrfs_inode_cachep)
4657 goto fail; 4715 goto fail;
4658 btrfs_trans_handle_cachep = 4716
4659 btrfs_cache_create("btrfs_trans_handle_cache", 4717 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
4660 sizeof(struct btrfs_trans_handle), 4718 sizeof(struct btrfs_trans_handle), 0,
4661 0, NULL); 4719 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
4662 if (!btrfs_trans_handle_cachep) 4720 if (!btrfs_trans_handle_cachep)
4663 goto fail; 4721 goto fail;
4664 btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache", 4722
4665 sizeof(struct btrfs_transaction), 4723 btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
4666 0, NULL); 4724 sizeof(struct btrfs_transaction), 0,
4725 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
4667 if (!btrfs_transaction_cachep) 4726 if (!btrfs_transaction_cachep)
4668 goto fail; 4727 goto fail;
4669 btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache", 4728
4670 sizeof(struct btrfs_path), 4729 btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
4671 0, NULL); 4730 sizeof(struct btrfs_path), 0,
4731 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
4672 if (!btrfs_path_cachep) 4732 if (!btrfs_path_cachep)
4673 goto fail; 4733 goto fail;
4674 btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256, 4734
4675 SLAB_DESTROY_BY_RCU, NULL);
4676 if (!btrfs_bit_radix_cachep)
4677 goto fail;
4678 return 0; 4735 return 0;
4679fail: 4736fail:
4680 btrfs_destroy_cachep(); 4737 btrfs_destroy_cachep();
@@ -4970,10 +5027,10 @@ out_fail:
4970 return err; 5027 return err;
4971} 5028}
4972 5029
4973static int prealloc_file_range(struct inode *inode, u64 start, u64 end, 5030static int prealloc_file_range(struct btrfs_trans_handle *trans,
4974 u64 alloc_hint, int mode) 5031 struct inode *inode, u64 start, u64 end,
5032 u64 locked_end, u64 alloc_hint, int mode)
4975{ 5033{
4976 struct btrfs_trans_handle *trans;
4977 struct btrfs_root *root = BTRFS_I(inode)->root; 5034 struct btrfs_root *root = BTRFS_I(inode)->root;
4978 struct btrfs_key ins; 5035 struct btrfs_key ins;
4979 u64 alloc_size; 5036 u64 alloc_size;
@@ -4981,10 +5038,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
4981 u64 num_bytes = end - start; 5038 u64 num_bytes = end - start;
4982 int ret = 0; 5039 int ret = 0;
4983 5040
4984 trans = btrfs_join_transaction(root, 1);
4985 BUG_ON(!trans);
4986 btrfs_set_trans_block_group(trans, inode);
4987
4988 while (num_bytes > 0) { 5041 while (num_bytes > 0) {
4989 alloc_size = min(num_bytes, root->fs_info->max_extent); 5042 alloc_size = min(num_bytes, root->fs_info->max_extent);
4990 ret = btrfs_reserve_extent(trans, root, alloc_size, 5043 ret = btrfs_reserve_extent(trans, root, alloc_size,
@@ -4997,7 +5050,8 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
4997 ret = insert_reserved_file_extent(trans, inode, 5050 ret = insert_reserved_file_extent(trans, inode,
4998 cur_offset, ins.objectid, 5051 cur_offset, ins.objectid,
4999 ins.offset, ins.offset, 5052 ins.offset, ins.offset,
5000 ins.offset, 0, 0, 0, 5053 ins.offset, locked_end,
5054 0, 0, 0,
5001 BTRFS_FILE_EXTENT_PREALLOC); 5055 BTRFS_FILE_EXTENT_PREALLOC);
5002 BUG_ON(ret); 5056 BUG_ON(ret);
5003 num_bytes -= ins.offset; 5057 num_bytes -= ins.offset;
@@ -5015,7 +5069,6 @@ out:
5015 BUG_ON(ret); 5069 BUG_ON(ret);
5016 } 5070 }
5017 5071
5018 btrfs_end_transaction(trans, root);
5019 return ret; 5072 return ret;
5020} 5073}
5021 5074
@@ -5027,13 +5080,21 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5027 u64 alloc_start; 5080 u64 alloc_start;
5028 u64 alloc_end; 5081 u64 alloc_end;
5029 u64 alloc_hint = 0; 5082 u64 alloc_hint = 0;
5083 u64 locked_end;
5030 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 5084 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5031 struct extent_map *em; 5085 struct extent_map *em;
5086 struct btrfs_trans_handle *trans;
5032 int ret; 5087 int ret;
5033 5088
5034 alloc_start = offset & ~mask; 5089 alloc_start = offset & ~mask;
5035 alloc_end = (offset + len + mask) & ~mask; 5090 alloc_end = (offset + len + mask) & ~mask;
5036 5091
5092 /*
5093 * wait for ordered IO before we have any locks. We'll loop again
5094 * below with the locks held.
5095 */
5096 btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
5097
5037 mutex_lock(&inode->i_mutex); 5098 mutex_lock(&inode->i_mutex);
5038 if (alloc_start > inode->i_size) { 5099 if (alloc_start > inode->i_size) {
5039 ret = btrfs_cont_expand(inode, alloc_start); 5100 ret = btrfs_cont_expand(inode, alloc_start);
@@ -5041,10 +5102,21 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5041 goto out; 5102 goto out;
5042 } 5103 }
5043 5104
5105 locked_end = alloc_end - 1;
5044 while (1) { 5106 while (1) {
5045 struct btrfs_ordered_extent *ordered; 5107 struct btrfs_ordered_extent *ordered;
5046 lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, 5108
5047 alloc_end - 1, GFP_NOFS); 5109 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
5110 if (!trans) {
5111 ret = -EIO;
5112 goto out;
5113 }
5114
5115 /* the extent lock is ordered inside the running
5116 * transaction
5117 */
5118 lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5119 GFP_NOFS);
5048 ordered = btrfs_lookup_first_ordered_extent(inode, 5120 ordered = btrfs_lookup_first_ordered_extent(inode,
5049 alloc_end - 1); 5121 alloc_end - 1);
5050 if (ordered && 5122 if (ordered &&
@@ -5052,7 +5124,13 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5052 ordered->file_offset < alloc_end) { 5124 ordered->file_offset < alloc_end) {
5053 btrfs_put_ordered_extent(ordered); 5125 btrfs_put_ordered_extent(ordered);
5054 unlock_extent(&BTRFS_I(inode)->io_tree, 5126 unlock_extent(&BTRFS_I(inode)->io_tree,
5055 alloc_start, alloc_end - 1, GFP_NOFS); 5127 alloc_start, locked_end, GFP_NOFS);
5128 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5129
5130 /*
5131 * we can't wait on the range with the transaction
5132 * running or with the extent lock held
5133 */
5056 btrfs_wait_ordered_range(inode, alloc_start, 5134 btrfs_wait_ordered_range(inode, alloc_start,
5057 alloc_end - alloc_start); 5135 alloc_end - alloc_start);
5058 } else { 5136 } else {
@@ -5070,8 +5148,9 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5070 last_byte = min(extent_map_end(em), alloc_end); 5148 last_byte = min(extent_map_end(em), alloc_end);
5071 last_byte = (last_byte + mask) & ~mask; 5149 last_byte = (last_byte + mask) & ~mask;
5072 if (em->block_start == EXTENT_MAP_HOLE) { 5150 if (em->block_start == EXTENT_MAP_HOLE) {
5073 ret = prealloc_file_range(inode, cur_offset, 5151 ret = prealloc_file_range(trans, inode, cur_offset,
5074 last_byte, alloc_hint, mode); 5152 last_byte, locked_end + 1,
5153 alloc_hint, mode);
5075 if (ret < 0) { 5154 if (ret < 0) {
5076 free_extent_map(em); 5155 free_extent_map(em);
5077 break; 5156 break;
@@ -5087,8 +5166,10 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5087 break; 5166 break;
5088 } 5167 }
5089 } 5168 }
5090 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, 5169 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5091 GFP_NOFS); 5170 GFP_NOFS);
5171
5172 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5092out: 5173out:
5093 mutex_unlock(&inode->i_mutex); 5174 mutex_unlock(&inode->i_mutex);
5094 return ret; 5175 return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7594bec1be10..5e94ea6e1cbe 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -461,15 +461,9 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
461 if (!capable(CAP_SYS_ADMIN)) 461 if (!capable(CAP_SYS_ADMIN))
462 return -EPERM; 462 return -EPERM;
463 463
464 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); 464 vol_args = memdup_user(arg, sizeof(*vol_args));
465 465 if (IS_ERR(vol_args))
466 if (!vol_args) 466 return PTR_ERR(vol_args);
467 return -ENOMEM;
468
469 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
470 ret = -EFAULT;
471 goto out;
472 }
473 467
474 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 468 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
475 namelen = strlen(vol_args->name); 469 namelen = strlen(vol_args->name);
@@ -483,11 +477,13 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
483 *devstr = '\0'; 477 *devstr = '\0';
484 devstr = vol_args->name; 478 devstr = vol_args->name;
485 devid = simple_strtoull(devstr, &end, 10); 479 devid = simple_strtoull(devstr, &end, 10);
486 printk(KERN_INFO "resizing devid %llu\n", devid); 480 printk(KERN_INFO "resizing devid %llu\n",
481 (unsigned long long)devid);
487 } 482 }
488 device = btrfs_find_device(root, devid, NULL, NULL); 483 device = btrfs_find_device(root, devid, NULL, NULL);
489 if (!device) { 484 if (!device) {
490 printk(KERN_INFO "resizer unable to find device %llu\n", devid); 485 printk(KERN_INFO "resizer unable to find device %llu\n",
486 (unsigned long long)devid);
491 ret = -EINVAL; 487 ret = -EINVAL;
492 goto out_unlock; 488 goto out_unlock;
493 } 489 }
@@ -545,7 +541,6 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
545 541
546out_unlock: 542out_unlock:
547 mutex_unlock(&root->fs_info->volume_mutex); 543 mutex_unlock(&root->fs_info->volume_mutex);
548out:
549 kfree(vol_args); 544 kfree(vol_args);
550 return ret; 545 return ret;
551} 546}
@@ -565,15 +560,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
565 if (root->fs_info->sb->s_flags & MS_RDONLY) 560 if (root->fs_info->sb->s_flags & MS_RDONLY)
566 return -EROFS; 561 return -EROFS;
567 562
568 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); 563 vol_args = memdup_user(arg, sizeof(*vol_args));
569 564 if (IS_ERR(vol_args))
570 if (!vol_args) 565 return PTR_ERR(vol_args);
571 return -ENOMEM;
572
573 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
574 ret = -EFAULT;
575 goto out;
576 }
577 566
578 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 567 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
579 namelen = strlen(vol_args->name); 568 namelen = strlen(vol_args->name);
@@ -675,19 +664,13 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
675 if (!capable(CAP_SYS_ADMIN)) 664 if (!capable(CAP_SYS_ADMIN))
676 return -EPERM; 665 return -EPERM;
677 666
678 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); 667 vol_args = memdup_user(arg, sizeof(*vol_args));
668 if (IS_ERR(vol_args))
669 return PTR_ERR(vol_args);
679 670
680 if (!vol_args)
681 return -ENOMEM;
682
683 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
684 ret = -EFAULT;
685 goto out;
686 }
687 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 671 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
688 ret = btrfs_init_new_device(root, vol_args->name); 672 ret = btrfs_init_new_device(root, vol_args->name);
689 673
690out:
691 kfree(vol_args); 674 kfree(vol_args);
692 return ret; 675 return ret;
693} 676}
@@ -703,19 +686,13 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
703 if (root->fs_info->sb->s_flags & MS_RDONLY) 686 if (root->fs_info->sb->s_flags & MS_RDONLY)
704 return -EROFS; 687 return -EROFS;
705 688
706 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); 689 vol_args = memdup_user(arg, sizeof(*vol_args));
690 if (IS_ERR(vol_args))
691 return PTR_ERR(vol_args);
707 692
708 if (!vol_args)
709 return -ENOMEM;
710
711 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
712 ret = -EFAULT;
713 goto out;
714 }
715 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 693 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
716 ret = btrfs_rm_device(root, vol_args->name); 694 ret = btrfs_rm_device(root, vol_args->name);
717 695
718out:
719 kfree(vol_args); 696 kfree(vol_args);
720 return ret; 697 return ret;
721} 698}
@@ -830,7 +807,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
830 BUG_ON(!trans); 807 BUG_ON(!trans);
831 808
832 /* punch hole in destination first */ 809 /* punch hole in destination first */
833 btrfs_drop_extents(trans, root, inode, off, off+len, 0, &hint_byte); 810 btrfs_drop_extents(trans, root, inode, off, off + len,
811 off + len, 0, &hint_byte);
834 812
835 /* clone data */ 813 /* clone data */
836 key.objectid = src->i_ino; 814 key.objectid = src->i_ino;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 53c87b197d70..d6f0806c682f 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -489,7 +489,7 @@ again:
489 /* start IO across the range first to instantiate any delalloc 489 /* start IO across the range first to instantiate any delalloc
490 * extents 490 * extents
491 */ 491 */
492 btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); 492 btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL);
493 493
494 /* The compression code will leave pages locked but return from 494 /* The compression code will leave pages locked but return from
495 * writepage without setting the page writeback. Starting again 495 * writepage without setting the page writeback. Starting again
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9744af9d71e9..3536bdb2d7cb 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -68,7 +68,7 @@ enum {
68 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, 68 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
69 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, 69 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
70 Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog, 70 Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog,
71 Opt_flushoncommit, Opt_err, 71 Opt_ratio, Opt_flushoncommit, Opt_err,
72}; 72};
73 73
74static match_table_t tokens = { 74static match_table_t tokens = {
@@ -87,6 +87,7 @@ static match_table_t tokens = {
87 {Opt_noacl, "noacl"}, 87 {Opt_noacl, "noacl"},
88 {Opt_notreelog, "notreelog"}, 88 {Opt_notreelog, "notreelog"},
89 {Opt_flushoncommit, "flushoncommit"}, 89 {Opt_flushoncommit, "flushoncommit"},
90 {Opt_ratio, "metadata_ratio=%d"},
90 {Opt_err, NULL}, 91 {Opt_err, NULL},
91}; 92};
92 93
@@ -195,7 +196,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
195 info->max_extent = max_t(u64, 196 info->max_extent = max_t(u64,
196 info->max_extent, root->sectorsize); 197 info->max_extent, root->sectorsize);
197 printk(KERN_INFO "btrfs: max_extent at %llu\n", 198 printk(KERN_INFO "btrfs: max_extent at %llu\n",
198 info->max_extent); 199 (unsigned long long)info->max_extent);
199 } 200 }
200 break; 201 break;
201 case Opt_max_inline: 202 case Opt_max_inline:
@@ -210,7 +211,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
210 root->sectorsize); 211 root->sectorsize);
211 } 212 }
212 printk(KERN_INFO "btrfs: max_inline at %llu\n", 213 printk(KERN_INFO "btrfs: max_inline at %llu\n",
213 info->max_inline); 214 (unsigned long long)info->max_inline);
214 } 215 }
215 break; 216 break;
216 case Opt_alloc_start: 217 case Opt_alloc_start:
@@ -220,7 +221,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
220 kfree(num); 221 kfree(num);
221 printk(KERN_INFO 222 printk(KERN_INFO
222 "btrfs: allocations start at %llu\n", 223 "btrfs: allocations start at %llu\n",
223 info->alloc_start); 224 (unsigned long long)info->alloc_start);
224 } 225 }
225 break; 226 break;
226 case Opt_noacl: 227 case Opt_noacl:
@@ -234,6 +235,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
234 printk(KERN_INFO "btrfs: turning on flush-on-commit\n"); 235 printk(KERN_INFO "btrfs: turning on flush-on-commit\n");
235 btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); 236 btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT);
236 break; 237 break;
238 case Opt_ratio:
239 intarg = 0;
240 match_int(&args[0], &intarg);
241 if (intarg) {
242 info->metadata_ratio = intarg;
243 printk(KERN_INFO "btrfs: metadata ratio %d\n",
244 info->metadata_ratio);
245 }
246 break;
237 default: 247 default:
238 break; 248 break;
239 } 249 }
@@ -410,11 +420,14 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
410 if (btrfs_test_opt(root, NOBARRIER)) 420 if (btrfs_test_opt(root, NOBARRIER))
411 seq_puts(seq, ",nobarrier"); 421 seq_puts(seq, ",nobarrier");
412 if (info->max_extent != (u64)-1) 422 if (info->max_extent != (u64)-1)
413 seq_printf(seq, ",max_extent=%llu", info->max_extent); 423 seq_printf(seq, ",max_extent=%llu",
424 (unsigned long long)info->max_extent);
414 if (info->max_inline != 8192 * 1024) 425 if (info->max_inline != 8192 * 1024)
415 seq_printf(seq, ",max_inline=%llu", info->max_inline); 426 seq_printf(seq, ",max_inline=%llu",
427 (unsigned long long)info->max_inline);
416 if (info->alloc_start != 0) 428 if (info->alloc_start != 0)
417 seq_printf(seq, ",alloc_start=%llu", info->alloc_start); 429 seq_printf(seq, ",alloc_start=%llu",
430 (unsigned long long)info->alloc_start);
418 if (info->thread_pool_size != min_t(unsigned long, 431 if (info->thread_pool_size != min_t(unsigned long,
419 num_online_cpus() + 2, 8)) 432 num_online_cpus() + 2, 8))
420 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); 433 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
@@ -635,14 +648,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
635 if (!capable(CAP_SYS_ADMIN)) 648 if (!capable(CAP_SYS_ADMIN))
636 return -EPERM; 649 return -EPERM;
637 650
638 vol = kmalloc(sizeof(*vol), GFP_KERNEL); 651 vol = memdup_user((void __user *)arg, sizeof(*vol));
639 if (!vol) 652 if (IS_ERR(vol))
640 return -ENOMEM; 653 return PTR_ERR(vol);
641
642 if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) {
643 ret = -EFAULT;
644 goto out;
645 }
646 654
647 switch (cmd) { 655 switch (cmd) {
648 case BTRFS_IOC_SCAN_DEV: 656 case BTRFS_IOC_SCAN_DEV:
@@ -650,7 +658,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
650 &btrfs_fs_type, &fs_devices); 658 &btrfs_fs_type, &fs_devices);
651 break; 659 break;
652 } 660 }
653out: 661
654 kfree(vol); 662 kfree(vol);
655 return ret; 663 return ret;
656} 664}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2869b3361eb6..01b143605ec1 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -687,7 +687,13 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
687 prepare_to_wait(&info->transaction_wait, &wait, 687 prepare_to_wait(&info->transaction_wait, &wait,
688 TASK_UNINTERRUPTIBLE); 688 TASK_UNINTERRUPTIBLE);
689 mutex_unlock(&info->trans_mutex); 689 mutex_unlock(&info->trans_mutex);
690
691 atomic_dec(&info->throttles);
692 wake_up(&info->transaction_throttle);
693
690 schedule(); 694 schedule();
695
696 atomic_inc(&info->throttles);
691 mutex_lock(&info->trans_mutex); 697 mutex_lock(&info->trans_mutex);
692 finish_wait(&info->transaction_wait, &wait); 698 finish_wait(&info->transaction_wait, &wait);
693 } 699 }
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 25f20ea11f27..db5e212e8445 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -536,7 +536,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
536 saved_nbytes = inode_get_bytes(inode); 536 saved_nbytes = inode_get_bytes(inode);
537 /* drop any overlapping extents */ 537 /* drop any overlapping extents */
538 ret = btrfs_drop_extents(trans, root, inode, 538 ret = btrfs_drop_extents(trans, root, inode,
539 start, extent_end, start, &alloc_hint); 539 start, extent_end, extent_end, start, &alloc_hint);
540 BUG_ON(ret); 540 BUG_ON(ret);
541 541
542 if (found_type == BTRFS_FILE_EXTENT_REG || 542 if (found_type == BTRFS_FILE_EXTENT_REG ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e0913e469728..5f01dad4b696 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
125 return NULL; 125 return NULL;
126} 126}
127 127
128static void requeue_list(struct btrfs_pending_bios *pending_bios,
129 struct bio *head, struct bio *tail)
130{
131
132 struct bio *old_head;
133
134 old_head = pending_bios->head;
135 pending_bios->head = head;
136 if (pending_bios->tail)
137 tail->bi_next = old_head;
138 else
139 pending_bios->tail = tail;
140}
141
128/* 142/*
129 * we try to collect pending bios for a device so we don't get a large 143 * we try to collect pending bios for a device so we don't get a large
130 * number of procs sending bios down to the same device. This greatly 144 * number of procs sending bios down to the same device. This greatly
@@ -141,10 +155,12 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
141 struct bio *pending; 155 struct bio *pending;
142 struct backing_dev_info *bdi; 156 struct backing_dev_info *bdi;
143 struct btrfs_fs_info *fs_info; 157 struct btrfs_fs_info *fs_info;
158 struct btrfs_pending_bios *pending_bios;
144 struct bio *tail; 159 struct bio *tail;
145 struct bio *cur; 160 struct bio *cur;
146 int again = 0; 161 int again = 0;
147 unsigned long num_run = 0; 162 unsigned long num_run;
163 unsigned long num_sync_run;
148 unsigned long limit; 164 unsigned long limit;
149 unsigned long last_waited = 0; 165 unsigned long last_waited = 0;
150 166
@@ -153,20 +169,30 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
153 limit = btrfs_async_submit_limit(fs_info); 169 limit = btrfs_async_submit_limit(fs_info);
154 limit = limit * 2 / 3; 170 limit = limit * 2 / 3;
155 171
172 /* we want to make sure that every time we switch from the sync
173 * list to the normal list, we unplug
174 */
175 num_sync_run = 0;
176
156loop: 177loop:
157 spin_lock(&device->io_lock); 178 spin_lock(&device->io_lock);
179 num_run = 0;
158 180
159loop_lock: 181loop_lock:
182
160 /* take all the bios off the list at once and process them 183 /* take all the bios off the list at once and process them
161 * later on (without the lock held). But, remember the 184 * later on (without the lock held). But, remember the
162 * tail and other pointers so the bios can be properly reinserted 185 * tail and other pointers so the bios can be properly reinserted
163 * into the list if we hit congestion 186 * into the list if we hit congestion
164 */ 187 */
165 pending = device->pending_bios; 188 if (device->pending_sync_bios.head)
166 tail = device->pending_bio_tail; 189 pending_bios = &device->pending_sync_bios;
190 else
191 pending_bios = &device->pending_bios;
192
193 pending = pending_bios->head;
194 tail = pending_bios->tail;
167 WARN_ON(pending && !tail); 195 WARN_ON(pending && !tail);
168 device->pending_bios = NULL;
169 device->pending_bio_tail = NULL;
170 196
171 /* 197 /*
172 * if pending was null this time around, no bios need processing 198 * if pending was null this time around, no bios need processing
@@ -176,16 +202,41 @@ loop_lock:
176 * device->running_pending is used to synchronize with the 202 * device->running_pending is used to synchronize with the
177 * schedule_bio code. 203 * schedule_bio code.
178 */ 204 */
179 if (pending) { 205 if (device->pending_sync_bios.head == NULL &&
180 again = 1; 206 device->pending_bios.head == NULL) {
181 device->running_pending = 1;
182 } else {
183 again = 0; 207 again = 0;
184 device->running_pending = 0; 208 device->running_pending = 0;
209 } else {
210 again = 1;
211 device->running_pending = 1;
185 } 212 }
213
214 pending_bios->head = NULL;
215 pending_bios->tail = NULL;
216
186 spin_unlock(&device->io_lock); 217 spin_unlock(&device->io_lock);
187 218
219 /*
220 * if we're doing the regular priority list, make sure we unplug
221 * for any high prio bios we've sent down
222 */
223 if (pending_bios == &device->pending_bios && num_sync_run > 0) {
224 num_sync_run = 0;
225 blk_run_backing_dev(bdi, NULL);
226 }
227
188 while (pending) { 228 while (pending) {
229
230 rmb();
231 if (pending_bios != &device->pending_sync_bios &&
232 device->pending_sync_bios.head &&
233 num_run > 16) {
234 cond_resched();
235 spin_lock(&device->io_lock);
236 requeue_list(pending_bios, pending, tail);
237 goto loop_lock;
238 }
239
189 cur = pending; 240 cur = pending;
190 pending = pending->bi_next; 241 pending = pending->bi_next;
191 cur->bi_next = NULL; 242 cur->bi_next = NULL;
@@ -196,10 +247,18 @@ loop_lock:
196 wake_up(&fs_info->async_submit_wait); 247 wake_up(&fs_info->async_submit_wait);
197 248
198 BUG_ON(atomic_read(&cur->bi_cnt) == 0); 249 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
199 bio_get(cur);
200 submit_bio(cur->bi_rw, cur); 250 submit_bio(cur->bi_rw, cur);
201 bio_put(cur);
202 num_run++; 251 num_run++;
252 if (bio_sync(cur))
253 num_sync_run++;
254
255 if (need_resched()) {
256 if (num_sync_run) {
257 blk_run_backing_dev(bdi, NULL);
258 num_sync_run = 0;
259 }
260 cond_resched();
261 }
203 262
204 /* 263 /*
205 * we made progress, there is more work to do and the bdi 264 * we made progress, there is more work to do and the bdi
@@ -208,7 +267,6 @@ loop_lock:
208 */ 267 */
209 if (pending && bdi_write_congested(bdi) && num_run > 16 && 268 if (pending && bdi_write_congested(bdi) && num_run > 16 &&
210 fs_info->fs_devices->open_devices > 1) { 269 fs_info->fs_devices->open_devices > 1) {
211 struct bio *old_head;
212 struct io_context *ioc; 270 struct io_context *ioc;
213 271
214 ioc = current->io_context; 272 ioc = current->io_context;
@@ -233,17 +291,17 @@ loop_lock:
233 * against it before looping 291 * against it before looping
234 */ 292 */
235 last_waited = ioc->last_waited; 293 last_waited = ioc->last_waited;
294 if (need_resched()) {
295 if (num_sync_run) {
296 blk_run_backing_dev(bdi, NULL);
297 num_sync_run = 0;
298 }
299 cond_resched();
300 }
236 continue; 301 continue;
237 } 302 }
238 spin_lock(&device->io_lock); 303 spin_lock(&device->io_lock);
239 304 requeue_list(pending_bios, pending, tail);
240 old_head = device->pending_bios;
241 device->pending_bios = pending;
242 if (device->pending_bio_tail)
243 tail->bi_next = old_head;
244 else
245 device->pending_bio_tail = tail;
246
247 device->running_pending = 1; 305 device->running_pending = 1;
248 306
249 spin_unlock(&device->io_lock); 307 spin_unlock(&device->io_lock);
@@ -251,11 +309,18 @@ loop_lock:
251 goto done; 309 goto done;
252 } 310 }
253 } 311 }
312
313 if (num_sync_run) {
314 num_sync_run = 0;
315 blk_run_backing_dev(bdi, NULL);
316 }
317
318 cond_resched();
254 if (again) 319 if (again)
255 goto loop; 320 goto loop;
256 321
257 spin_lock(&device->io_lock); 322 spin_lock(&device->io_lock);
258 if (device->pending_bios) 323 if (device->pending_bios.head || device->pending_sync_bios.head)
259 goto loop_lock; 324 goto loop_lock;
260 spin_unlock(&device->io_lock); 325 spin_unlock(&device->io_lock);
261 326
@@ -1478,7 +1543,7 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
1478 btrfs_set_device_io_align(leaf, dev_item, device->io_align); 1543 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1479 btrfs_set_device_io_width(leaf, dev_item, device->io_width); 1544 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1480 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); 1545 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1481 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); 1546 btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
1482 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); 1547 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1483 btrfs_mark_buffer_dirty(leaf); 1548 btrfs_mark_buffer_dirty(leaf);
1484 1549
@@ -1875,14 +1940,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1875 device->total_bytes = new_size; 1940 device->total_bytes = new_size;
1876 if (device->writeable) 1941 if (device->writeable)
1877 device->fs_devices->total_rw_bytes -= diff; 1942 device->fs_devices->total_rw_bytes -= diff;
1878 ret = btrfs_update_device(trans, device);
1879 if (ret) {
1880 unlock_chunks(root);
1881 btrfs_end_transaction(trans, root);
1882 goto done;
1883 }
1884 WARN_ON(diff > old_total);
1885 btrfs_set_super_total_bytes(super_copy, old_total - diff);
1886 unlock_chunks(root); 1943 unlock_chunks(root);
1887 btrfs_end_transaction(trans, root); 1944 btrfs_end_transaction(trans, root);
1888 1945
@@ -1914,7 +1971,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1914 length = btrfs_dev_extent_length(l, dev_extent); 1971 length = btrfs_dev_extent_length(l, dev_extent);
1915 1972
1916 if (key.offset + length <= new_size) 1973 if (key.offset + length <= new_size)
1917 goto done; 1974 break;
1918 1975
1919 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); 1976 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1920 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); 1977 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
@@ -1927,6 +1984,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1927 goto done; 1984 goto done;
1928 } 1985 }
1929 1986
1987 /* Shrinking succeeded, else we would be at "done". */
1988 trans = btrfs_start_transaction(root, 1);
1989 if (!trans) {
1990 ret = -ENOMEM;
1991 goto done;
1992 }
1993 lock_chunks(root);
1994
1995 device->disk_total_bytes = new_size;
1996 /* Now btrfs_update_device() will change the on-disk size. */
1997 ret = btrfs_update_device(trans, device);
1998 if (ret) {
1999 unlock_chunks(root);
2000 btrfs_end_transaction(trans, root);
2001 goto done;
2002 }
2003 WARN_ON(diff > old_total);
2004 btrfs_set_super_total_bytes(super_copy, old_total - diff);
2005 unlock_chunks(root);
2006 btrfs_end_transaction(trans, root);
1930done: 2007done:
1931 btrfs_free_path(path); 2008 btrfs_free_path(path);
1932 return ret; 2009 return ret;
@@ -2497,7 +2574,7 @@ again:
2497 max_errors = 1; 2574 max_errors = 1;
2498 } 2575 }
2499 } 2576 }
2500 if (multi_ret && rw == WRITE && 2577 if (multi_ret && (rw & (1 << BIO_RW)) &&
2501 stripes_allocated < stripes_required) { 2578 stripes_allocated < stripes_required) {
2502 stripes_allocated = map->num_stripes; 2579 stripes_allocated = map->num_stripes;
2503 free_extent_map(em); 2580 free_extent_map(em);
@@ -2762,6 +2839,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
2762 int rw, struct bio *bio) 2839 int rw, struct bio *bio)
2763{ 2840{
2764 int should_queue = 1; 2841 int should_queue = 1;
2842 struct btrfs_pending_bios *pending_bios;
2765 2843
2766 /* don't bother with additional async steps for reads, right now */ 2844 /* don't bother with additional async steps for reads, right now */
2767 if (!(rw & (1 << BIO_RW))) { 2845 if (!(rw & (1 << BIO_RW))) {
@@ -2783,13 +2861,17 @@ static noinline int schedule_bio(struct btrfs_root *root,
2783 bio->bi_rw |= rw; 2861 bio->bi_rw |= rw;
2784 2862
2785 spin_lock(&device->io_lock); 2863 spin_lock(&device->io_lock);
2864 if (bio_sync(bio))
2865 pending_bios = &device->pending_sync_bios;
2866 else
2867 pending_bios = &device->pending_bios;
2786 2868
2787 if (device->pending_bio_tail) 2869 if (pending_bios->tail)
2788 device->pending_bio_tail->bi_next = bio; 2870 pending_bios->tail->bi_next = bio;
2789 2871
2790 device->pending_bio_tail = bio; 2872 pending_bios->tail = bio;
2791 if (!device->pending_bios) 2873 if (!pending_bios->head)
2792 device->pending_bios = bio; 2874 pending_bios->head = bio;
2793 if (device->running_pending) 2875 if (device->running_pending)
2794 should_queue = 0; 2876 should_queue = 0;
2795 2877
@@ -3006,7 +3088,8 @@ static int fill_device_from_item(struct extent_buffer *leaf,
3006 unsigned long ptr; 3088 unsigned long ptr;
3007 3089
3008 device->devid = btrfs_device_id(leaf, dev_item); 3090 device->devid = btrfs_device_id(leaf, dev_item);
3009 device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); 3091 device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
3092 device->total_bytes = device->disk_total_bytes;
3010 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); 3093 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
3011 device->type = btrfs_device_type(leaf, dev_item); 3094 device->type = btrfs_device_type(leaf, dev_item);
3012 device->io_align = btrfs_device_io_align(leaf, dev_item); 3095 device->io_align = btrfs_device_io_align(leaf, dev_item);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2185de72ff7d..5c3ff6d02fd7 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -23,13 +23,22 @@
23#include "async-thread.h" 23#include "async-thread.h"
24 24
25struct buffer_head; 25struct buffer_head;
26struct btrfs_pending_bios {
27 struct bio *head;
28 struct bio *tail;
29};
30
26struct btrfs_device { 31struct btrfs_device {
27 struct list_head dev_list; 32 struct list_head dev_list;
28 struct list_head dev_alloc_list; 33 struct list_head dev_alloc_list;
29 struct btrfs_fs_devices *fs_devices; 34 struct btrfs_fs_devices *fs_devices;
30 struct btrfs_root *dev_root; 35 struct btrfs_root *dev_root;
31 struct bio *pending_bios; 36
32 struct bio *pending_bio_tail; 37 /* regular prio bios */
38 struct btrfs_pending_bios pending_bios;
39 /* WRITE_SYNC bios */
40 struct btrfs_pending_bios pending_sync_bios;
41
33 int running_pending; 42 int running_pending;
34 u64 generation; 43 u64 generation;
35 44
@@ -52,6 +61,9 @@ struct btrfs_device {
52 /* size of the device */ 61 /* size of the device */
53 u64 total_bytes; 62 u64 total_bytes;
54 63
64 /* size of the disk */
65 u64 disk_total_bytes;
66
55 /* bytes used */ 67 /* bytes used */
56 u64 bytes_used; 68 u64 bytes_used;
57 69
diff --git a/fs/buffer.c b/fs/buffer.c
index 6e35762b6169..aed297739eb0 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -360,7 +360,7 @@ still_busy:
360 * Completion handler for block_write_full_page() - pages which are unlocked 360 * Completion handler for block_write_full_page() - pages which are unlocked
361 * during I/O, and which have PageWriteback cleared upon I/O completion. 361 * during I/O, and which have PageWriteback cleared upon I/O completion.
362 */ 362 */
363static void end_buffer_async_write(struct buffer_head *bh, int uptodate) 363void end_buffer_async_write(struct buffer_head *bh, int uptodate)
364{ 364{
365 char b[BDEVNAME_SIZE]; 365 char b[BDEVNAME_SIZE];
366 unsigned long flags; 366 unsigned long flags;
@@ -438,11 +438,17 @@ static void mark_buffer_async_read(struct buffer_head *bh)
438 set_buffer_async_read(bh); 438 set_buffer_async_read(bh);
439} 439}
440 440
441void mark_buffer_async_write(struct buffer_head *bh) 441void mark_buffer_async_write_endio(struct buffer_head *bh,
442 bh_end_io_t *handler)
442{ 443{
443 bh->b_end_io = end_buffer_async_write; 444 bh->b_end_io = handler;
444 set_buffer_async_write(bh); 445 set_buffer_async_write(bh);
445} 446}
447
448void mark_buffer_async_write(struct buffer_head *bh)
449{
450 mark_buffer_async_write_endio(bh, end_buffer_async_write);
451}
446EXPORT_SYMBOL(mark_buffer_async_write); 452EXPORT_SYMBOL(mark_buffer_async_write);
447 453
448 454
@@ -547,7 +553,7 @@ repeat:
547 return err; 553 return err;
548} 554}
549 555
550void do_thaw_all(unsigned long unused) 556void do_thaw_all(struct work_struct *work)
551{ 557{
552 struct super_block *sb; 558 struct super_block *sb;
553 char b[BDEVNAME_SIZE]; 559 char b[BDEVNAME_SIZE];
@@ -567,6 +573,7 @@ restart:
567 goto restart; 573 goto restart;
568 } 574 }
569 spin_unlock(&sb_lock); 575 spin_unlock(&sb_lock);
576 kfree(work);
570 printk(KERN_WARNING "Emergency Thaw complete\n"); 577 printk(KERN_WARNING "Emergency Thaw complete\n");
571} 578}
572 579
@@ -577,7 +584,13 @@ restart:
577 */ 584 */
578void emergency_thaw_all(void) 585void emergency_thaw_all(void)
579{ 586{
580 pdflush_operation(do_thaw_all, 0); 587 struct work_struct *work;
588
589 work = kmalloc(sizeof(*work), GFP_ATOMIC);
590 if (work) {
591 INIT_WORK(work, do_thaw_all);
592 schedule_work(work);
593 }
581} 594}
582 595
583/** 596/**
@@ -1596,9 +1609,20 @@ EXPORT_SYMBOL(unmap_underlying_metadata);
1596 * locked buffer. This only can happen if someone has written the buffer 1609 * locked buffer. This only can happen if someone has written the buffer
1597 * directly, with submit_bh(). At the address_space level PageWriteback 1610 * directly, with submit_bh(). At the address_space level PageWriteback
1598 * prevents this contention from occurring. 1611 * prevents this contention from occurring.
1612 *
1613 * If block_write_full_page() is called with wbc->sync_mode ==
1614 * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this
1615 * causes the writes to be flagged as synchronous writes, but the
1616 * block device queue will NOT be unplugged, since usually many pages
1617 * will be pushed to the out before the higher-level caller actually
1618 * waits for the writes to be completed. The various wait functions,
1619 * such as wait_on_writeback_range() will ultimately call sync_page()
1620 * which will ultimately call blk_run_backing_dev(), which will end up
1621 * unplugging the device queue.
1599 */ 1622 */
1600static int __block_write_full_page(struct inode *inode, struct page *page, 1623static int __block_write_full_page(struct inode *inode, struct page *page,
1601 get_block_t *get_block, struct writeback_control *wbc) 1624 get_block_t *get_block, struct writeback_control *wbc,
1625 bh_end_io_t *handler)
1602{ 1626{
1603 int err; 1627 int err;
1604 sector_t block; 1628 sector_t block;
@@ -1606,7 +1630,8 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1606 struct buffer_head *bh, *head; 1630 struct buffer_head *bh, *head;
1607 const unsigned blocksize = 1 << inode->i_blkbits; 1631 const unsigned blocksize = 1 << inode->i_blkbits;
1608 int nr_underway = 0; 1632 int nr_underway = 0;
1609 int write_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); 1633 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1634 WRITE_SYNC_PLUG : WRITE);
1610 1635
1611 BUG_ON(!PageLocked(page)); 1636 BUG_ON(!PageLocked(page));
1612 1637
@@ -1682,7 +1707,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1682 continue; 1707 continue;
1683 } 1708 }
1684 if (test_clear_buffer_dirty(bh)) { 1709 if (test_clear_buffer_dirty(bh)) {
1685 mark_buffer_async_write(bh); 1710 mark_buffer_async_write_endio(bh, handler);
1686 } else { 1711 } else {
1687 unlock_buffer(bh); 1712 unlock_buffer(bh);
1688 } 1713 }
@@ -1735,7 +1760,7 @@ recover:
1735 if (buffer_mapped(bh) && buffer_dirty(bh) && 1760 if (buffer_mapped(bh) && buffer_dirty(bh) &&
1736 !buffer_delay(bh)) { 1761 !buffer_delay(bh)) {
1737 lock_buffer(bh); 1762 lock_buffer(bh);
1738 mark_buffer_async_write(bh); 1763 mark_buffer_async_write_endio(bh, handler);
1739 } else { 1764 } else {
1740 /* 1765 /*
1741 * The buffer may have been set dirty during 1766 * The buffer may have been set dirty during
@@ -2372,7 +2397,8 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2372 if ((page->mapping != inode->i_mapping) || 2397 if ((page->mapping != inode->i_mapping) ||
2373 (page_offset(page) > size)) { 2398 (page_offset(page) > size)) {
2374 /* page got truncated out from underneath us */ 2399 /* page got truncated out from underneath us */
2375 goto out_unlock; 2400 unlock_page(page);
2401 goto out;
2376 } 2402 }
2377 2403
2378 /* page is wholly or partially inside EOF */ 2404 /* page is wholly or partially inside EOF */
@@ -2386,14 +2412,15 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2386 ret = block_commit_write(page, 0, end); 2412 ret = block_commit_write(page, 0, end);
2387 2413
2388 if (unlikely(ret)) { 2414 if (unlikely(ret)) {
2415 unlock_page(page);
2389 if (ret == -ENOMEM) 2416 if (ret == -ENOMEM)
2390 ret = VM_FAULT_OOM; 2417 ret = VM_FAULT_OOM;
2391 else /* -ENOSPC, -EIO, etc */ 2418 else /* -ENOSPC, -EIO, etc */
2392 ret = VM_FAULT_SIGBUS; 2419 ret = VM_FAULT_SIGBUS;
2393 } 2420 } else
2421 ret = VM_FAULT_LOCKED;
2394 2422
2395out_unlock: 2423out:
2396 unlock_page(page);
2397 return ret; 2424 return ret;
2398} 2425}
2399 2426
@@ -2661,7 +2688,8 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
2661out: 2688out:
2662 ret = mpage_writepage(page, get_block, wbc); 2689 ret = mpage_writepage(page, get_block, wbc);
2663 if (ret == -EAGAIN) 2690 if (ret == -EAGAIN)
2664 ret = __block_write_full_page(inode, page, get_block, wbc); 2691 ret = __block_write_full_page(inode, page, get_block, wbc,
2692 end_buffer_async_write);
2665 return ret; 2693 return ret;
2666} 2694}
2667EXPORT_SYMBOL(nobh_writepage); 2695EXPORT_SYMBOL(nobh_writepage);
@@ -2819,9 +2847,10 @@ out:
2819 2847
2820/* 2848/*
2821 * The generic ->writepage function for buffer-backed address_spaces 2849 * The generic ->writepage function for buffer-backed address_spaces
2850 * this form passes in the end_io handler used to finish the IO.
2822 */ 2851 */
2823int block_write_full_page(struct page *page, get_block_t *get_block, 2852int block_write_full_page_endio(struct page *page, get_block_t *get_block,
2824 struct writeback_control *wbc) 2853 struct writeback_control *wbc, bh_end_io_t *handler)
2825{ 2854{
2826 struct inode * const inode = page->mapping->host; 2855 struct inode * const inode = page->mapping->host;
2827 loff_t i_size = i_size_read(inode); 2856 loff_t i_size = i_size_read(inode);
@@ -2830,7 +2859,8 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
2830 2859
2831 /* Is the page fully inside i_size? */ 2860 /* Is the page fully inside i_size? */
2832 if (page->index < end_index) 2861 if (page->index < end_index)
2833 return __block_write_full_page(inode, page, get_block, wbc); 2862 return __block_write_full_page(inode, page, get_block, wbc,
2863 handler);
2834 2864
2835 /* Is the page fully outside i_size? (truncate in progress) */ 2865 /* Is the page fully outside i_size? (truncate in progress) */
2836 offset = i_size & (PAGE_CACHE_SIZE-1); 2866 offset = i_size & (PAGE_CACHE_SIZE-1);
@@ -2853,9 +2883,20 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
2853 * writes to that region are not written out to the file." 2883 * writes to that region are not written out to the file."
2854 */ 2884 */
2855 zero_user_segment(page, offset, PAGE_CACHE_SIZE); 2885 zero_user_segment(page, offset, PAGE_CACHE_SIZE);
2856 return __block_write_full_page(inode, page, get_block, wbc); 2886 return __block_write_full_page(inode, page, get_block, wbc, handler);
2887}
2888
2889/*
2890 * The generic ->writepage function for buffer-backed address_spaces
2891 */
2892int block_write_full_page(struct page *page, get_block_t *get_block,
2893 struct writeback_control *wbc)
2894{
2895 return block_write_full_page_endio(page, get_block, wbc,
2896 end_buffer_async_write);
2857} 2897}
2858 2898
2899
2859sector_t generic_block_bmap(struct address_space *mapping, sector_t block, 2900sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
2860 get_block_t *get_block) 2901 get_block_t *get_block)
2861{ 2902{
@@ -3324,9 +3365,11 @@ EXPORT_SYMBOL(block_read_full_page);
3324EXPORT_SYMBOL(block_sync_page); 3365EXPORT_SYMBOL(block_sync_page);
3325EXPORT_SYMBOL(block_truncate_page); 3366EXPORT_SYMBOL(block_truncate_page);
3326EXPORT_SYMBOL(block_write_full_page); 3367EXPORT_SYMBOL(block_write_full_page);
3368EXPORT_SYMBOL(block_write_full_page_endio);
3327EXPORT_SYMBOL(cont_write_begin); 3369EXPORT_SYMBOL(cont_write_begin);
3328EXPORT_SYMBOL(end_buffer_read_sync); 3370EXPORT_SYMBOL(end_buffer_read_sync);
3329EXPORT_SYMBOL(end_buffer_write_sync); 3371EXPORT_SYMBOL(end_buffer_write_sync);
3372EXPORT_SYMBOL(end_buffer_async_write);
3330EXPORT_SYMBOL(file_fsync); 3373EXPORT_SYMBOL(file_fsync);
3331EXPORT_SYMBOL(generic_block_bmap); 3374EXPORT_SYMBOL(generic_block_bmap);
3332EXPORT_SYMBOL(generic_cont_expand_simple); 3375EXPORT_SYMBOL(generic_cont_expand_simple);
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 65984006192c..f20c4069c220 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,16 @@
1Version 1.58
2------------
3Guard against buffer overruns in various UCS-2 to UTF-8 string conversions
4when the UTF-8 string is composed of unusually long (more than 4 byte) converted
5characters. Add support for mounting root of a share which redirects immediately
6to DFS target. Convert string conversion functions from Unicode to more
7accurately mark string length before allocating memory (which may help the
8rare cases where a UTF-8 string is much larger than the UCS2 string that
9we converted from). Fix endianness of the vcnum field used during
10session setup to distinguish multiple mounts to same server from different
11userids. Raw NTLMSSP fixed (it requires /proc/fs/cifs/experimental
12flag to be set to 2, and mount must enable krb5 to turn on extended security).
13
1Version 1.57 14Version 1.57
2------------ 15------------
3Improve support for multiple security contexts to the same server. We 16Improve support for multiple security contexts to the same server. We
@@ -15,7 +28,8 @@ Posix file open support added (turned off after one attempt if server
15fails to support it properly, as with Samba server versions prior to 3.3.2) 28fails to support it properly, as with Samba server versions prior to 3.3.2)
16Fix "redzone overwritten" bug in cifs_put_tcon (CIFSTcon may allocate too 29Fix "redzone overwritten" bug in cifs_put_tcon (CIFSTcon may allocate too
17little memory for the "nativeFileSystem" field returned by the server 30little memory for the "nativeFileSystem" field returned by the server
18during mount). 31during mount). Endian convert inode numbers if necessary (makes it easier
32to compare inode numbers on network files from big endian systems).
19 33
20Version 1.56 34Version 1.56
21------------ 35------------
diff --git a/fs/cifs/README b/fs/cifs/README
index 07434181623b..db208ddb9899 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -651,7 +651,15 @@ Experimental When set to 1 used to enable certain experimental
651 signing turned on in case buffer was modified 651 signing turned on in case buffer was modified
652 just before it was sent, also this flag will 652 just before it was sent, also this flag will
653 be used to use the new experimental directory change 653 be used to use the new experimental directory change
654 notification code). 654 notification code). When set to 2 enables
655 an additional experimental feature, "raw ntlmssp"
656 session establishment support (which allows
657 specifying "sec=ntlmssp" on mount). The Linux cifs
658 module will use ntlmv2 authentication encapsulated
659 in "raw ntlmssp" (not using SPNEGO) when
660 "sec=ntlmssp" is specified on mount.
661 This support also requires building cifs with
662 the CONFIG_CIFS_EXPERIMENTAL configuration flag.
655 663
656These experimental features and tracing can be enabled by changing flags in 664These experimental features and tracing can be enabled by changing flags in
657/proc/fs/cifs (after the cifs module has been installed or built into the 665/proc/fs/cifs (after the cifs module has been installed or built into the
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 5fdbf8a14472..83d62759c7c7 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -340,28 +340,24 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
340 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 340 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
341 341
342 for (i = 0; i < num_referrals; i++) { 342 for (i = 0; i < num_referrals; i++) {
343 int len;
343 dump_referral(referrals+i); 344 dump_referral(referrals+i);
344 /* connect to a storage node */ 345 /* connect to a node */
345 if (referrals[i].flags & DFSREF_STORAGE_SERVER) { 346 len = strlen(referrals[i].node_name);
346 int len; 347 if (len < 2) {
347 len = strlen(referrals[i].node_name); 348 cERROR(1, ("%s: Net Address path too short: %s",
348 if (len < 2) {
349 cERROR(1, ("%s: Net Address path too short: %s",
350 __func__, referrals[i].node_name)); 349 __func__, referrals[i].node_name));
351 rc = -EINVAL; 350 rc = -EINVAL;
352 goto out_err; 351 goto out_err;
353 } 352 }
354 mnt = cifs_dfs_do_refmount(nd->path.mnt, 353 mnt = cifs_dfs_do_refmount(nd->path.mnt,
355 nd->path.dentry, 354 nd->path.dentry, referrals + i);
356 referrals + i); 355 cFYI(1, ("%s: cifs_dfs_do_refmount:%s , mnt:%p", __func__,
357 cFYI(1, ("%s: cifs_dfs_do_refmount:%s , mnt:%p",
358 __func__,
359 referrals[i].node_name, mnt)); 356 referrals[i].node_name, mnt));
360 357
361 /* complete mount procedure if we accured submount */ 358 /* complete mount procedure if we accured submount */
362 if (!IS_ERR(mnt)) 359 if (!IS_ERR(mnt))
363 break; 360 break;
364 }
365 } 361 }
366 362
367 /* we need it cause for() above could exit without valid submount */ 363 /* we need it cause for() above could exit without valid submount */
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 3fd3a9df043a..67bf93a40d2e 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -41,7 +41,7 @@ cifs_spnego_key_instantiate(struct key *key, const void *data, size_t datalen)
41 41
42 /* attach the data */ 42 /* attach the data */
43 memcpy(payload, data, datalen); 43 memcpy(payload, data, datalen);
44 rcu_assign_pointer(key->payload.data, payload); 44 key->payload.data = payload;
45 ret = 0; 45 ret = 0;
46 46
47error: 47error:
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 7d75272a6b3f..60e3c4253de0 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifs_unicode.c 2 * fs/cifs/cifs_unicode.c
3 * 3 *
4 * Copyright (c) International Business Machines Corp., 2000,2005 4 * Copyright (c) International Business Machines Corp., 2000,2009
5 * Modified by Steve French (sfrench@us.ibm.com) 5 * Modified by Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
@@ -26,31 +26,157 @@
26#include "cifs_debug.h" 26#include "cifs_debug.h"
27 27
28/* 28/*
29 * NAME: cifs_strfromUCS() 29 * cifs_ucs2_bytes - how long will a string be after conversion?
30 * 30 * @ucs - pointer to input string
31 * FUNCTION: Convert little-endian unicode string to character string 31 * @maxbytes - don't go past this many bytes of input string
32 * @codepage - destination codepage
32 * 33 *
34 * Walk a ucs2le string and return the number of bytes that the string will
35 * be after being converted to the given charset, not including any null
36 * termination required. Don't walk past maxbytes in the source buffer.
33 */ 37 */
34int 38int
35cifs_strfromUCS_le(char *to, const __le16 *from, 39cifs_ucs2_bytes(const __le16 *from, int maxbytes,
36 int len, const struct nls_table *codepage) 40 const struct nls_table *codepage)
37{ 41{
38 int i; 42 int i;
39 int outlen = 0; 43 int charlen, outlen = 0;
44 int maxwords = maxbytes / 2;
45 char tmp[NLS_MAX_CHARSET_SIZE];
40 46
41 for (i = 0; (i < len) && from[i]; i++) { 47 for (i = 0; from[i] && i < maxwords; i++) {
42 int charlen; 48 charlen = codepage->uni2char(le16_to_cpu(from[i]), tmp,
43 /* 2.4.0 kernel or greater */ 49 NLS_MAX_CHARSET_SIZE);
44 charlen = 50 if (charlen > 0)
45 codepage->uni2char(le16_to_cpu(from[i]), &to[outlen],
46 NLS_MAX_CHARSET_SIZE);
47 if (charlen > 0) {
48 outlen += charlen; 51 outlen += charlen;
49 } else { 52 else
50 to[outlen++] = '?'; 53 outlen++;
54 }
55
56 return outlen;
57}
58
59/*
60 * cifs_mapchar - convert a little-endian char to proper char in codepage
61 * @target - where converted character should be copied
62 * @src_char - 2 byte little-endian source character
63 * @cp - codepage to which character should be converted
64 * @mapchar - should character be mapped according to mapchars mount option?
65 *
66 * This function handles the conversion of a single character. It is the
67 * responsibility of the caller to ensure that the target buffer is large
68 * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
69 */
70static int
71cifs_mapchar(char *target, const __le16 src_char, const struct nls_table *cp,
72 bool mapchar)
73{
74 int len = 1;
75
76 if (!mapchar)
77 goto cp_convert;
78
79 /*
80 * BB: Cannot handle remapping UNI_SLASH until all the calls to
81 * build_path_from_dentry are modified, as they use slash as
82 * separator.
83 */
84 switch (le16_to_cpu(src_char)) {
85 case UNI_COLON:
86 *target = ':';
87 break;
88 case UNI_ASTERIK:
89 *target = '*';
90 break;
91 case UNI_QUESTION:
92 *target = '?';
93 break;
94 case UNI_PIPE:
95 *target = '|';
96 break;
97 case UNI_GRTRTHAN:
98 *target = '>';
99 break;
100 case UNI_LESSTHAN:
101 *target = '<';
102 break;
103 default:
104 goto cp_convert;
105 }
106
107out:
108 return len;
109
110cp_convert:
111 len = cp->uni2char(le16_to_cpu(src_char), target,
112 NLS_MAX_CHARSET_SIZE);
113 if (len <= 0) {
114 *target = '?';
115 len = 1;
116 }
117 goto out;
118}
119
120/*
121 * cifs_from_ucs2 - convert utf16le string to local charset
122 * @to - destination buffer
123 * @from - source buffer
124 * @tolen - destination buffer size (in bytes)
125 * @fromlen - source buffer size (in bytes)
126 * @codepage - codepage to which characters should be converted
127 * @mapchar - should characters be remapped according to the mapchars option?
128 *
129 * Convert a little-endian ucs2le string (as sent by the server) to a string
130 * in the provided codepage. The tolen and fromlen parameters are to ensure
131 * that the code doesn't walk off of the end of the buffer (which is always
132 * a danger if the alignment of the source buffer is off). The destination
133 * string is always properly null terminated and fits in the destination
134 * buffer. Returns the length of the destination string in bytes (including
135 * null terminator).
136 *
137 * Note that some windows versions actually send multiword UTF-16 characters
138 * instead of straight UCS-2. The linux nls routines however aren't able to
139 * deal with those characters properly. In the event that we get some of
140 * those characters, they won't be translated properly.
141 */
142int
143cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen,
144 const struct nls_table *codepage, bool mapchar)
145{
146 int i, charlen, safelen;
147 int outlen = 0;
148 int nullsize = nls_nullsize(codepage);
149 int fromwords = fromlen / 2;
150 char tmp[NLS_MAX_CHARSET_SIZE];
151
152 /*
153 * because the chars can be of varying widths, we need to take care
154 * not to overflow the destination buffer when we get close to the
155 * end of it. Until we get to this offset, we don't need to check
156 * for overflow however.
157 */
158 safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
159
160 for (i = 0; i < fromwords && from[i]; i++) {
161 /*
162 * check to see if converting this character might make the
163 * conversion bleed into the null terminator
164 */
165 if (outlen >= safelen) {
166 charlen = cifs_mapchar(tmp, from[i], codepage, mapchar);
167 if ((outlen + charlen) > (tolen - nullsize))
168 break;
51 } 169 }
170
171 /* put converted char into 'to' buffer */
172 charlen = cifs_mapchar(&to[outlen], from[i], codepage, mapchar);
173 outlen += charlen;
52 } 174 }
53 to[outlen] = 0; 175
176 /* properly null-terminate string */
177 for (i = 0; i < nullsize; i++)
178 to[outlen++] = 0;
179
54 return outlen; 180 return outlen;
55} 181}
56 182
@@ -88,3 +214,41 @@ cifs_strtoUCS(__le16 *to, const char *from, int len,
88 return i; 214 return i;
89} 215}
90 216
217/*
218 * cifs_strndup_from_ucs - copy a string from wire format to the local codepage
219 * @src - source string
220 * @maxlen - don't walk past this many bytes in the source string
221 * @is_unicode - is this a unicode string?
222 * @codepage - destination codepage
223 *
224 * Take a string given by the server, convert it to the local codepage and
225 * put it in a new buffer. Returns a pointer to the new string or NULL on
226 * error.
227 */
228char *
229cifs_strndup_from_ucs(const char *src, const int maxlen, const bool is_unicode,
230 const struct nls_table *codepage)
231{
232 int len;
233 char *dst;
234
235 if (is_unicode) {
236 len = cifs_ucs2_bytes((__le16 *) src, maxlen, codepage);
237 len += nls_nullsize(codepage);
238 dst = kmalloc(len, GFP_KERNEL);
239 if (!dst)
240 return NULL;
241 cifs_from_ucs2(dst, (__le16 *) src, len, maxlen, codepage,
242 false);
243 } else {
244 len = strnlen(src, maxlen);
245 len++;
246 dst = kmalloc(len, GFP_KERNEL);
247 if (!dst)
248 return NULL;
249 strlcpy(dst, src, len);
250 }
251
252 return dst;
253}
254
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 14eb9a2395d3..650638275a6f 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -5,7 +5,7 @@
5 * Convert a unicode character to upper or lower case using 5 * Convert a unicode character to upper or lower case using
6 * compressed tables. 6 * compressed tables.
7 * 7 *
8 * Copyright (c) International Business Machines Corp., 2000,2007 8 * Copyright (c) International Business Machines Corp., 2000,2009
9 * 9 *
10 * This program is free software; you can redistribute it and/or modify 10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by 11 * it under the terms of the GNU General Public License as published by
@@ -37,6 +37,19 @@
37 37
38#define UNIUPR_NOLOWER /* Example to not expand lower case tables */ 38#define UNIUPR_NOLOWER /* Example to not expand lower case tables */
39 39
40/*
41 * Windows maps these to the user defined 16 bit Unicode range since they are
42 * reserved symbols (along with \ and /), otherwise illegal to store
43 * in filenames in NTFS
44 */
45#define UNI_ASTERIK (__u16) ('*' + 0xF000)
46#define UNI_QUESTION (__u16) ('?' + 0xF000)
47#define UNI_COLON (__u16) (':' + 0xF000)
48#define UNI_GRTRTHAN (__u16) ('>' + 0xF000)
49#define UNI_LESSTHAN (__u16) ('<' + 0xF000)
50#define UNI_PIPE (__u16) ('|' + 0xF000)
51#define UNI_SLASH (__u16) ('\\' + 0xF000)
52
40/* Just define what we want from uniupr.h. We don't want to define the tables 53/* Just define what we want from uniupr.h. We don't want to define the tables
41 * in each source file. 54 * in each source file.
42 */ 55 */
@@ -59,8 +72,14 @@ extern struct UniCaseRange UniLowerRange[];
59#endif /* UNIUPR_NOLOWER */ 72#endif /* UNIUPR_NOLOWER */
60 73
61#ifdef __KERNEL__ 74#ifdef __KERNEL__
62int cifs_strfromUCS_le(char *, const __le16 *, int, const struct nls_table *); 75int cifs_from_ucs2(char *to, const __le16 *from, int tolen, int fromlen,
76 const struct nls_table *codepage, bool mapchar);
77int cifs_ucs2_bytes(const __le16 *from, int maxbytes,
78 const struct nls_table *codepage);
63int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *); 79int cifs_strtoUCS(__le16 *, const char *, int, const struct nls_table *);
80char *cifs_strndup_from_ucs(const char *src, const int maxlen,
81 const bool is_unicode,
82 const struct nls_table *codepage);
64#endif 83#endif
65 84
66/* 85/*
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 38491fd3871d..0d6d8b573652 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -66,9 +66,6 @@ unsigned int sign_CIFS_PDUs = 1;
66extern struct task_struct *oplockThread; /* remove sparse warning */ 66extern struct task_struct *oplockThread; /* remove sparse warning */
67struct task_struct *oplockThread = NULL; 67struct task_struct *oplockThread = NULL;
68/* extern struct task_struct * dnotifyThread; remove sparse warning */ 68/* extern struct task_struct * dnotifyThread; remove sparse warning */
69#ifdef CONFIG_CIFS_EXPERIMENTAL
70static struct task_struct *dnotifyThread = NULL;
71#endif
72static const struct super_operations cifs_super_ops; 69static const struct super_operations cifs_super_ops;
73unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE; 70unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE;
74module_param(CIFSMaxBufSize, int, 0); 71module_param(CIFSMaxBufSize, int, 0);
@@ -316,6 +313,7 @@ cifs_alloc_inode(struct super_block *sb)
316 cifs_inode->clientCanCacheAll = false; 313 cifs_inode->clientCanCacheAll = false;
317 cifs_inode->delete_pending = false; 314 cifs_inode->delete_pending = false;
318 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ 315 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
316 cifs_inode->server_eof = 0;
319 317
320 /* Can not set i_flags here - they get immediately overwritten 318 /* Can not set i_flags here - they get immediately overwritten
321 to zero by the VFS */ 319 to zero by the VFS */
@@ -1040,34 +1038,6 @@ static int cifs_oplock_thread(void *dummyarg)
1040 return 0; 1038 return 0;
1041} 1039}
1042 1040
1043#ifdef CONFIG_CIFS_EXPERIMENTAL
1044static int cifs_dnotify_thread(void *dummyarg)
1045{
1046 struct list_head *tmp;
1047 struct TCP_Server_Info *server;
1048
1049 do {
1050 if (try_to_freeze())
1051 continue;
1052 set_current_state(TASK_INTERRUPTIBLE);
1053 schedule_timeout(15*HZ);
1054 /* check if any stuck requests that need
1055 to be woken up and wakeq so the
1056 thread can wake up and error out */
1057 read_lock(&cifs_tcp_ses_lock);
1058 list_for_each(tmp, &cifs_tcp_ses_list) {
1059 server = list_entry(tmp, struct TCP_Server_Info,
1060 tcp_ses_list);
1061 if (atomic_read(&server->inFlight))
1062 wake_up_all(&server->response_q);
1063 }
1064 read_unlock(&cifs_tcp_ses_lock);
1065 } while (!kthread_should_stop());
1066
1067 return 0;
1068}
1069#endif
1070
1071static int __init 1041static int __init
1072init_cifs(void) 1042init_cifs(void)
1073{ 1043{
@@ -1144,21 +1114,8 @@ init_cifs(void)
1144 goto out_unregister_dfs_key_type; 1114 goto out_unregister_dfs_key_type;
1145 } 1115 }
1146 1116
1147#ifdef CONFIG_CIFS_EXPERIMENTAL
1148 dnotifyThread = kthread_run(cifs_dnotify_thread, NULL, "cifsdnotifyd");
1149 if (IS_ERR(dnotifyThread)) {
1150 rc = PTR_ERR(dnotifyThread);
1151 cERROR(1, ("error %d create dnotify thread", rc));
1152 goto out_stop_oplock_thread;
1153 }
1154#endif
1155
1156 return 0; 1117 return 0;
1157 1118
1158#ifdef CONFIG_CIFS_EXPERIMENTAL
1159 out_stop_oplock_thread:
1160#endif
1161 kthread_stop(oplockThread);
1162 out_unregister_dfs_key_type: 1119 out_unregister_dfs_key_type:
1163#ifdef CONFIG_CIFS_DFS_UPCALL 1120#ifdef CONFIG_CIFS_DFS_UPCALL
1164 unregister_key_type(&key_type_dns_resolver); 1121 unregister_key_type(&key_type_dns_resolver);
@@ -1196,9 +1153,6 @@ exit_cifs(void)
1196 cifs_destroy_inodecache(); 1153 cifs_destroy_inodecache();
1197 cifs_destroy_mids(); 1154 cifs_destroy_mids();
1198 cifs_destroy_request_bufs(); 1155 cifs_destroy_request_bufs();
1199#ifdef CONFIG_CIFS_EXPERIMENTAL
1200 kthread_stop(dnotifyThread);
1201#endif
1202 kthread_stop(oplockThread); 1156 kthread_stop(oplockThread);
1203} 1157}
1204 1158
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 77e190dc2883..051b71cfdea9 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -100,5 +100,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
100extern const struct export_operations cifs_export_ops; 100extern const struct export_operations cifs_export_ops;
101#endif /* EXPERIMENTAL */ 101#endif /* EXPERIMENTAL */
102 102
103#define CIFS_VERSION "1.57" 103#define CIFS_VERSION "1.58"
104#endif /* _CIFSFS_H */ 104#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 9fbf4dff5da6..a61ab772c6f6 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -82,8 +82,8 @@ enum securityEnum {
82 LANMAN, /* Legacy LANMAN auth */ 82 LANMAN, /* Legacy LANMAN auth */
83 NTLM, /* Legacy NTLM012 auth with NTLM hash */ 83 NTLM, /* Legacy NTLM012 auth with NTLM hash */
84 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ 84 NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */
85 RawNTLMSSP, /* NTLMSSP without SPNEGO */ 85 RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */
86 NTLMSSP, /* NTLMSSP via SPNEGO */ 86 NTLMSSP, /* NTLMSSP via SPNEGO, NTLMv2 hash */
87 Kerberos, /* Kerberos via SPNEGO */ 87 Kerberos, /* Kerberos via SPNEGO */
88 MSKerberos, /* MS Kerberos via SPNEGO */ 88 MSKerberos, /* MS Kerberos via SPNEGO */
89}; 89};
@@ -350,7 +350,7 @@ struct cifsFileInfo {
350 bool invalidHandle:1; /* file closed via session abend */ 350 bool invalidHandle:1; /* file closed via session abend */
351 bool messageMode:1; /* for pipes: message vs byte mode */ 351 bool messageMode:1; /* for pipes: message vs byte mode */
352 atomic_t wrtPending; /* handle in use - defer close */ 352 atomic_t wrtPending; /* handle in use - defer close */
353 struct semaphore fh_sem; /* prevents reopen race after dead ses*/ 353 struct mutex fh_mutex; /* prevents reopen race after dead ses*/
354 struct cifs_search_info srch_inf; 354 struct cifs_search_info srch_inf;
355}; 355};
356 356
@@ -370,6 +370,7 @@ struct cifsInodeInfo {
370 bool clientCanCacheAll:1; /* read and writebehind oplock */ 370 bool clientCanCacheAll:1; /* read and writebehind oplock */
371 bool oplockPending:1; 371 bool oplockPending:1;
372 bool delete_pending:1; /* DELETE_ON_CLOSE is set */ 372 bool delete_pending:1; /* DELETE_ON_CLOSE is set */
373 u64 server_eof; /* current file size on server */
373 struct inode vfs_inode; 374 struct inode vfs_inode;
374}; 375};
375 376
@@ -530,6 +531,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
530#define CIFSSEC_MAY_PLNTXT 0 531#define CIFSSEC_MAY_PLNTXT 0
531#endif /* weak passwords */ 532#endif /* weak passwords */
532#define CIFSSEC_MAY_SEAL 0x00040 /* not supported yet */ 533#define CIFSSEC_MAY_SEAL 0x00040 /* not supported yet */
534#define CIFSSEC_MAY_NTLMSSP 0x00080 /* raw ntlmssp with ntlmv2 */
533 535
534#define CIFSSEC_MUST_SIGN 0x01001 536#define CIFSSEC_MUST_SIGN 0x01001
535/* note that only one of the following can be set so the 537/* note that only one of the following can be set so the
@@ -542,22 +544,23 @@ require use of the stronger protocol */
542#define CIFSSEC_MUST_LANMAN 0x10010 544#define CIFSSEC_MUST_LANMAN 0x10010
543#define CIFSSEC_MUST_PLNTXT 0x20020 545#define CIFSSEC_MUST_PLNTXT 0x20020
544#ifdef CONFIG_CIFS_UPCALL 546#ifdef CONFIG_CIFS_UPCALL
545#define CIFSSEC_MASK 0x3F03F /* allows weak security but also krb5 */ 547#define CIFSSEC_MASK 0xAF0AF /* allows weak security but also krb5 */
546#else 548#else
547#define CIFSSEC_MASK 0x37037 /* current flags supported if weak */ 549#define CIFSSEC_MASK 0xA70A7 /* current flags supported if weak */
548#endif /* UPCALL */ 550#endif /* UPCALL */
549#else /* do not allow weak pw hash */ 551#else /* do not allow weak pw hash */
550#ifdef CONFIG_CIFS_UPCALL 552#ifdef CONFIG_CIFS_UPCALL
551#define CIFSSEC_MASK 0x0F00F /* flags supported if no weak allowed */ 553#define CIFSSEC_MASK 0x8F08F /* flags supported if no weak allowed */
552#else 554#else
553#define CIFSSEC_MASK 0x07007 /* flags supported if no weak allowed */ 555#define CIFSSEC_MASK 0x87087 /* flags supported if no weak allowed */
554#endif /* UPCALL */ 556#endif /* UPCALL */
555#endif /* WEAK_PW_HASH */ 557#endif /* WEAK_PW_HASH */
556#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ 558#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */
559#define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */
557 560
558#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2) 561#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2)
559#define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2) 562#define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2)
560#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5) 563#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP)
561/* 564/*
562 ***************************************************************** 565 *****************************************************************
563 * All constants go here 566 * All constants go here
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b370489c8da5..a785f69dbc9f 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -2163,7 +2163,7 @@ typedef struct {
2163 __le32 Type; 2163 __le32 Type;
2164 __le64 DevMajor; 2164 __le64 DevMajor;
2165 __le64 DevMinor; 2165 __le64 DevMinor;
2166 __u64 UniqueId; 2166 __le64 UniqueId;
2167 __le64 Permissions; 2167 __le64 Permissions;
2168 __le64 Nlinks; 2168 __le64 Nlinks;
2169} __attribute__((packed)) FILE_UNIX_BASIC_INFO; /* level 0x200 QPathInfo */ 2169} __attribute__((packed)) FILE_UNIX_BASIC_INFO; /* level 0x200 QPathInfo */
@@ -2308,7 +2308,7 @@ struct unlink_psx_rq { /* level 0x20a SetPathInfo */
2308} __attribute__((packed)); 2308} __attribute__((packed));
2309 2309
2310struct file_internal_info { 2310struct file_internal_info {
2311 __u64 UniqueId; /* inode number */ 2311 __le64 UniqueId; /* inode number */
2312} __attribute__((packed)); /* level 0x3ee */ 2312} __attribute__((packed)); /* level 0x3ee */
2313 2313
2314struct file_mode_info { 2314struct file_mode_info {
@@ -2338,7 +2338,7 @@ typedef struct {
2338 __le32 Type; 2338 __le32 Type;
2339 __le64 DevMajor; 2339 __le64 DevMajor;
2340 __le64 DevMinor; 2340 __le64 DevMinor;
2341 __u64 UniqueId; 2341 __le64 UniqueId;
2342 __le64 Permissions; 2342 __le64 Permissions;
2343 __le64 Nlinks; 2343 __le64 Nlinks;
2344 char FileName[1]; 2344 char FileName[1];
@@ -2386,7 +2386,7 @@ typedef struct {
2386 __le32 FileNameLength; 2386 __le32 FileNameLength;
2387 __le32 EaSize; /* EA size */ 2387 __le32 EaSize; /* EA size */
2388 __le32 Reserved; 2388 __le32 Reserved;
2389 __u64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/ 2389 __le64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/
2390 char FileName[1]; 2390 char FileName[1];
2391} __attribute__((packed)) SEARCH_ID_FULL_DIR_INFO; /* level 0x105 FF rsp data */ 2391} __attribute__((packed)) SEARCH_ID_FULL_DIR_INFO; /* level 0x105 FF rsp data */
2392 2392
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 4167716d32f2..fae083930eee 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -260,8 +260,7 @@ extern int CIFSUnixCreateSymLink(const int xid,
260 const struct nls_table *nls_codepage); 260 const struct nls_table *nls_codepage);
261extern int CIFSSMBUnixQuerySymLink(const int xid, 261extern int CIFSSMBUnixQuerySymLink(const int xid,
262 struct cifsTconInfo *tcon, 262 struct cifsTconInfo *tcon,
263 const unsigned char *searchName, 263 const unsigned char *searchName, char **syminfo,
264 char *syminfo, const int buflen,
265 const struct nls_table *nls_codepage); 264 const struct nls_table *nls_codepage);
266extern int CIFSSMBQueryReparseLinkInfo(const int xid, 265extern int CIFSSMBQueryReparseLinkInfo(const int xid,
267 struct cifsTconInfo *tcon, 266 struct cifsTconInfo *tcon,
@@ -307,8 +306,6 @@ extern int CIFSGetSrvInodeNumber(const int xid, struct cifsTconInfo *tcon,
307 const unsigned char *searchName, __u64 *inode_number, 306 const unsigned char *searchName, __u64 *inode_number,
308 const struct nls_table *nls_codepage, 307 const struct nls_table *nls_codepage,
309 int remap_special_chars); 308 int remap_special_chars);
310extern int cifs_convertUCSpath(char *target, const __le16 *source, int maxlen,
311 const struct nls_table *codepage);
312extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen, 309extern int cifsConvertToUCS(__le16 *target, const char *source, int maxlen,
313 const struct nls_table *cp, int mapChars); 310 const struct nls_table *cp, int mapChars);
314 311
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index bc09c998631f..75e6623a8635 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/cifssmb.c 2 * fs/cifs/cifssmb.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2008 4 * Copyright (C) International Business Machines Corp., 2002,2009
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * Contains the routines for constructing the SMB PDUs themselves 7 * Contains the routines for constructing the SMB PDUs themselves
@@ -81,41 +81,6 @@ static struct {
81#endif /* CONFIG_CIFS_WEAK_PW_HASH */ 81#endif /* CONFIG_CIFS_WEAK_PW_HASH */
82#endif /* CIFS_POSIX */ 82#endif /* CIFS_POSIX */
83 83
84/* Allocates buffer into dst and copies smb string from src to it.
85 * caller is responsible for freeing dst if function returned 0.
86 * returns:
87 * on success - 0
88 * on failure - errno
89 */
90static int
91cifs_strncpy_to_host(char **dst, const char *src, const int maxlen,
92 const bool is_unicode, const struct nls_table *nls_codepage)
93{
94 int plen;
95
96 if (is_unicode) {
97 plen = UniStrnlen((wchar_t *)src, maxlen);
98 *dst = kmalloc(plen + 2, GFP_KERNEL);
99 if (!*dst)
100 goto cifs_strncpy_to_host_ErrExit;
101 cifs_strfromUCS_le(*dst, (__le16 *)src, plen, nls_codepage);
102 } else {
103 plen = strnlen(src, maxlen);
104 *dst = kmalloc(plen + 2, GFP_KERNEL);
105 if (!*dst)
106 goto cifs_strncpy_to_host_ErrExit;
107 strncpy(*dst, src, plen);
108 }
109 (*dst)[plen] = 0;
110 (*dst)[plen+1] = 0; /* harmless for ASCII case, needed for Unicode */
111 return 0;
112
113cifs_strncpy_to_host_ErrExit:
114 cERROR(1, ("Failed to allocate buffer for string\n"));
115 return -ENOMEM;
116}
117
118
119/* Mark as invalid, all open files on tree connections since they 84/* Mark as invalid, all open files on tree connections since they
120 were closed when session to server was lost */ 85 were closed when session to server was lost */
121static void mark_open_files_invalid(struct cifsTconInfo *pTcon) 86static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
@@ -484,6 +449,14 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
484 cFYI(1, ("Kerberos only mechanism, enable extended security")); 449 cFYI(1, ("Kerberos only mechanism, enable extended security"));
485 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC; 450 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
486 } 451 }
452#ifdef CONFIG_CIFS_EXPERIMENTAL
453 else if ((secFlags & CIFSSEC_MUST_NTLMSSP) == CIFSSEC_MUST_NTLMSSP)
454 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
455 else if ((secFlags & CIFSSEC_AUTH_MASK) == CIFSSEC_MAY_NTLMSSP) {
456 cFYI(1, ("NTLMSSP only mechanism, enable extended security"));
457 pSMB->hdr.Flags2 |= SMBFLG2_EXT_SEC;
458 }
459#endif
487 460
488 count = 0; 461 count = 0;
489 for (i = 0; i < CIFS_NUM_PROT; i++) { 462 for (i = 0; i < CIFS_NUM_PROT; i++) {
@@ -620,6 +593,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
620 server->secType = NTLMv2; 593 server->secType = NTLMv2;
621 else if (secFlags & CIFSSEC_MAY_KRB5) 594 else if (secFlags & CIFSSEC_MAY_KRB5)
622 server->secType = Kerberos; 595 server->secType = Kerberos;
596 else if (secFlags & CIFSSEC_MAY_NTLMSSP)
597 server->secType = NTLMSSP;
623 else if (secFlags & CIFSSEC_MAY_LANMAN) 598 else if (secFlags & CIFSSEC_MAY_LANMAN)
624 server->secType = LANMAN; 599 server->secType = LANMAN;
625/* #ifdef CONFIG_CIFS_EXPERIMENTAL 600/* #ifdef CONFIG_CIFS_EXPERIMENTAL
@@ -1626,6 +1601,8 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1626 int smb_hdr_len; 1601 int smb_hdr_len;
1627 int resp_buf_type = 0; 1602 int resp_buf_type = 0;
1628 1603
1604 *nbytes = 0;
1605
1629 cFYI(1, ("write2 at %lld %d bytes", (long long)offset, count)); 1606 cFYI(1, ("write2 at %lld %d bytes", (long long)offset, count));
1630 1607
1631 if (tcon->ses->capabilities & CAP_LARGE_FILES) { 1608 if (tcon->ses->capabilities & CAP_LARGE_FILES) {
@@ -1682,11 +1659,9 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon,
1682 cifs_stats_inc(&tcon->num_writes); 1659 cifs_stats_inc(&tcon->num_writes);
1683 if (rc) { 1660 if (rc) {
1684 cFYI(1, ("Send error Write2 = %d", rc)); 1661 cFYI(1, ("Send error Write2 = %d", rc));
1685 *nbytes = 0;
1686 } else if (resp_buf_type == 0) { 1662 } else if (resp_buf_type == 0) {
1687 /* presumably this can not happen, but best to be safe */ 1663 /* presumably this can not happen, but best to be safe */
1688 rc = -EIO; 1664 rc = -EIO;
1689 *nbytes = 0;
1690 } else { 1665 } else {
1691 WRITE_RSP *pSMBr = (WRITE_RSP *)iov[0].iov_base; 1666 WRITE_RSP *pSMBr = (WRITE_RSP *)iov[0].iov_base;
1692 *nbytes = le16_to_cpu(pSMBr->CountHigh); 1667 *nbytes = le16_to_cpu(pSMBr->CountHigh);
@@ -2417,8 +2392,7 @@ winCreateHardLinkRetry:
2417 2392
2418int 2393int
2419CIFSSMBUnixQuerySymLink(const int xid, struct cifsTconInfo *tcon, 2394CIFSSMBUnixQuerySymLink(const int xid, struct cifsTconInfo *tcon,
2420 const unsigned char *searchName, 2395 const unsigned char *searchName, char **symlinkinfo,
2421 char *symlinkinfo, const int buflen,
2422 const struct nls_table *nls_codepage) 2396 const struct nls_table *nls_codepage)
2423{ 2397{
2424/* SMB_QUERY_FILE_UNIX_LINK */ 2398/* SMB_QUERY_FILE_UNIX_LINK */
@@ -2428,6 +2402,7 @@ CIFSSMBUnixQuerySymLink(const int xid, struct cifsTconInfo *tcon,
2428 int bytes_returned; 2402 int bytes_returned;
2429 int name_len; 2403 int name_len;
2430 __u16 params, byte_count; 2404 __u16 params, byte_count;
2405 char *data_start;
2431 2406
2432 cFYI(1, ("In QPathSymLinkInfo (Unix) for path %s", searchName)); 2407 cFYI(1, ("In QPathSymLinkInfo (Unix) for path %s", searchName));
2433 2408
@@ -2482,30 +2457,26 @@ querySymLinkRetry:
2482 /* decode response */ 2457 /* decode response */
2483 2458
2484 rc = validate_t2((struct smb_t2_rsp *)pSMBr); 2459 rc = validate_t2((struct smb_t2_rsp *)pSMBr);
2485 if (rc || (pSMBr->ByteCount < 2))
2486 /* BB also check enough total bytes returned */ 2460 /* BB also check enough total bytes returned */
2487 rc = -EIO; /* bad smb */ 2461 if (rc || (pSMBr->ByteCount < 2))
2462 rc = -EIO;
2488 else { 2463 else {
2489 __u16 data_offset = le16_to_cpu(pSMBr->t2.DataOffset); 2464 bool is_unicode;
2490 __u16 count = le16_to_cpu(pSMBr->t2.DataCount); 2465 u16 count = le16_to_cpu(pSMBr->t2.DataCount);
2466
2467 data_start = ((char *) &pSMBr->hdr.Protocol) +
2468 le16_to_cpu(pSMBr->t2.DataOffset);
2469
2470 if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
2471 is_unicode = true;
2472 else
2473 is_unicode = false;
2491 2474
2492 if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) {
2493 name_len = UniStrnlen((wchar_t *) ((char *)
2494 &pSMBr->hdr.Protocol + data_offset),
2495 min_t(const int, buflen, count) / 2);
2496 /* BB FIXME investigate remapping reserved chars here */ 2475 /* BB FIXME investigate remapping reserved chars here */
2497 cifs_strfromUCS_le(symlinkinfo, 2476 *symlinkinfo = cifs_strndup_from_ucs(data_start, count,
2498 (__le16 *) ((char *)&pSMBr->hdr.Protocol 2477 is_unicode, nls_codepage);
2499 + data_offset), 2478 if (!symlinkinfo)
2500 name_len, nls_codepage); 2479 rc = -ENOMEM;
2501 } else {
2502 strncpy(symlinkinfo,
2503 (char *) &pSMBr->hdr.Protocol +
2504 data_offset,
2505 min_t(const int, buflen, count));
2506 }
2507 symlinkinfo[buflen] = 0;
2508 /* just in case so calling code does not go off the end of buffer */
2509 } 2480 }
2510 } 2481 }
2511 cifs_buf_release(pSMB); 2482 cifs_buf_release(pSMB);
@@ -2603,7 +2574,6 @@ validate_ntransact(char *buf, char **ppparm, char **ppdata,
2603 *pparmlen = parm_count; 2574 *pparmlen = parm_count;
2604 return 0; 2575 return 0;
2605} 2576}
2606#endif /* CIFS_EXPERIMENTAL */
2607 2577
2608int 2578int
2609CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, 2579CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
@@ -2613,7 +2583,6 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2613{ 2583{
2614 int rc = 0; 2584 int rc = 0;
2615 int bytes_returned; 2585 int bytes_returned;
2616 int name_len;
2617 struct smb_com_transaction_ioctl_req *pSMB; 2586 struct smb_com_transaction_ioctl_req *pSMB;
2618 struct smb_com_transaction_ioctl_rsp *pSMBr; 2587 struct smb_com_transaction_ioctl_rsp *pSMBr;
2619 2588
@@ -2650,59 +2619,55 @@ CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2650 } else { /* decode response */ 2619 } else { /* decode response */
2651 __u32 data_offset = le32_to_cpu(pSMBr->DataOffset); 2620 __u32 data_offset = le32_to_cpu(pSMBr->DataOffset);
2652 __u32 data_count = le32_to_cpu(pSMBr->DataCount); 2621 __u32 data_count = le32_to_cpu(pSMBr->DataCount);
2653 if ((pSMBr->ByteCount < 2) || (data_offset > 512)) 2622 if ((pSMBr->ByteCount < 2) || (data_offset > 512)) {
2654 /* BB also check enough total bytes returned */ 2623 /* BB also check enough total bytes returned */
2655 rc = -EIO; /* bad smb */ 2624 rc = -EIO; /* bad smb */
2656 else { 2625 goto qreparse_out;
2657 if (data_count && (data_count < 2048)) { 2626 }
2658 char *end_of_smb = 2 /* sizeof byte count */ + 2627 if (data_count && (data_count < 2048)) {
2659 pSMBr->ByteCount + 2628 char *end_of_smb = 2 /* sizeof byte count */ +
2660 (char *)&pSMBr->ByteCount; 2629 pSMBr->ByteCount + (char *)&pSMBr->ByteCount;
2661 2630
2662 struct reparse_data *reparse_buf = 2631 struct reparse_data *reparse_buf =
2663 (struct reparse_data *) 2632 (struct reparse_data *)
2664 ((char *)&pSMBr->hdr.Protocol 2633 ((char *)&pSMBr->hdr.Protocol
2665 + data_offset); 2634 + data_offset);
2666 if ((char *)reparse_buf >= end_of_smb) { 2635 if ((char *)reparse_buf >= end_of_smb) {
2667 rc = -EIO; 2636 rc = -EIO;
2668 goto qreparse_out; 2637 goto qreparse_out;
2669 } 2638 }
2670 if ((reparse_buf->LinkNamesBuf + 2639 if ((reparse_buf->LinkNamesBuf +
2671 reparse_buf->TargetNameOffset + 2640 reparse_buf->TargetNameOffset +
2672 reparse_buf->TargetNameLen) > 2641 reparse_buf->TargetNameLen) > end_of_smb) {
2673 end_of_smb) { 2642 cFYI(1, ("reparse buf beyond SMB"));
2674 cFYI(1, ("reparse buf beyond SMB")); 2643 rc = -EIO;
2675 rc = -EIO; 2644 goto qreparse_out;
2676 goto qreparse_out; 2645 }
2677 }
2678 2646
2679 if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) { 2647 if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) {
2680 name_len = UniStrnlen((wchar_t *) 2648 cifs_from_ucs2(symlinkinfo, (__le16 *)
2681 (reparse_buf->LinkNamesBuf + 2649 (reparse_buf->LinkNamesBuf +
2682 reparse_buf->TargetNameOffset), 2650 reparse_buf->TargetNameOffset),
2683 min(buflen/2, 2651 buflen,
2684 reparse_buf->TargetNameLen / 2)); 2652 reparse_buf->TargetNameLen,
2685 cifs_strfromUCS_le(symlinkinfo, 2653 nls_codepage, 0);
2686 (__le16 *) (reparse_buf->LinkNamesBuf + 2654 } else { /* ASCII names */
2687 reparse_buf->TargetNameOffset), 2655 strncpy(symlinkinfo,
2688 name_len, nls_codepage); 2656 reparse_buf->LinkNamesBuf +
2689 } else { /* ASCII names */ 2657 reparse_buf->TargetNameOffset,
2690 strncpy(symlinkinfo, 2658 min_t(const int, buflen,
2691 reparse_buf->LinkNamesBuf + 2659 reparse_buf->TargetNameLen));
2692 reparse_buf->TargetNameOffset,
2693 min_t(const int, buflen,
2694 reparse_buf->TargetNameLen));
2695 }
2696 } else {
2697 rc = -EIO;
2698 cFYI(1, ("Invalid return data count on "
2699 "get reparse info ioctl"));
2700 } 2660 }
2701 symlinkinfo[buflen] = 0; /* just in case so the caller 2661 } else {
2702 does not go off the end of the buffer */ 2662 rc = -EIO;
2703 cFYI(1, ("readlink result - %s", symlinkinfo)); 2663 cFYI(1, ("Invalid return data count on "
2664 "get reparse info ioctl"));
2704 } 2665 }
2666 symlinkinfo[buflen] = 0; /* just in case so the caller
2667 does not go off the end of the buffer */
2668 cFYI(1, ("readlink result - %s", symlinkinfo));
2705 } 2669 }
2670
2706qreparse_out: 2671qreparse_out:
2707 cifs_buf_release(pSMB); 2672 cifs_buf_release(pSMB);
2708 2673
@@ -2711,6 +2676,7 @@ qreparse_out:
2711 2676
2712 return rc; 2677 return rc;
2713} 2678}
2679#endif /* CIFS_EXPERIMENTAL */
2714 2680
2715#ifdef CONFIG_CIFS_POSIX 2681#ifdef CONFIG_CIFS_POSIX
2716 2682
@@ -3918,7 +3884,7 @@ GetInodeNumberRetry:
3918 } 3884 }
3919 pfinfo = (struct file_internal_info *) 3885 pfinfo = (struct file_internal_info *)
3920 (data_offset + (char *) &pSMBr->hdr.Protocol); 3886 (data_offset + (char *) &pSMBr->hdr.Protocol);
3921 *inode_number = pfinfo->UniqueId; 3887 *inode_number = le64_to_cpu(pfinfo->UniqueId);
3922 } 3888 }
3923 } 3889 }
3924GetInodeNumOut: 3890GetInodeNumOut:
@@ -3928,27 +3894,6 @@ GetInodeNumOut:
3928 return rc; 3894 return rc;
3929} 3895}
3930 3896
3931/* computes length of UCS string converted to host codepage
3932 * @src: UCS string
3933 * @maxlen: length of the input string in UCS characters
3934 * (not in bytes)
3935 *
3936 * return: size of input string in host codepage
3937 */
3938static int hostlen_fromUCS(const __le16 *src, const int maxlen,
3939 const struct nls_table *nls_codepage) {
3940 int i;
3941 int hostlen = 0;
3942 char to[4];
3943 int charlen;
3944 for (i = 0; (i < maxlen) && src[i]; ++i) {
3945 charlen = nls_codepage->uni2char(le16_to_cpu(src[i]),
3946 to, NLS_MAX_CHARSET_SIZE);
3947 hostlen += charlen > 0 ? charlen : 1;
3948 }
3949 return hostlen;
3950}
3951
3952/* parses DFS refferal V3 structure 3897/* parses DFS refferal V3 structure
3953 * caller is responsible for freeing target_nodes 3898 * caller is responsible for freeing target_nodes
3954 * returns: 3899 * returns:
@@ -3994,7 +3939,7 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
3994 3939
3995 cFYI(1, ("num_referrals: %d dfs flags: 0x%x ... \n", 3940 cFYI(1, ("num_referrals: %d dfs flags: 0x%x ... \n",
3996 *num_of_nodes, 3941 *num_of_nodes,
3997 le16_to_cpu(pSMBr->DFSFlags))); 3942 le32_to_cpu(pSMBr->DFSFlags)));
3998 3943
3999 *target_nodes = kzalloc(sizeof(struct dfs_info3_param) * 3944 *target_nodes = kzalloc(sizeof(struct dfs_info3_param) *
4000 *num_of_nodes, GFP_KERNEL); 3945 *num_of_nodes, GFP_KERNEL);
@@ -4010,14 +3955,14 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
4010 int max_len; 3955 int max_len;
4011 struct dfs_info3_param *node = (*target_nodes)+i; 3956 struct dfs_info3_param *node = (*target_nodes)+i;
4012 3957
4013 node->flags = le16_to_cpu(pSMBr->DFSFlags); 3958 node->flags = le32_to_cpu(pSMBr->DFSFlags);
4014 if (is_unicode) { 3959 if (is_unicode) {
4015 __le16 *tmp = kmalloc(strlen(searchName)*2 + 2, 3960 __le16 *tmp = kmalloc(strlen(searchName)*2 + 2,
4016 GFP_KERNEL); 3961 GFP_KERNEL);
4017 cifsConvertToUCS((__le16 *) tmp, searchName, 3962 cifsConvertToUCS((__le16 *) tmp, searchName,
4018 PATH_MAX, nls_codepage, remap); 3963 PATH_MAX, nls_codepage, remap);
4019 node->path_consumed = hostlen_fromUCS(tmp, 3964 node->path_consumed = cifs_ucs2_bytes(tmp,
4020 le16_to_cpu(pSMBr->PathConsumed)/2, 3965 le16_to_cpu(pSMBr->PathConsumed),
4021 nls_codepage); 3966 nls_codepage);
4022 kfree(tmp); 3967 kfree(tmp);
4023 } else 3968 } else
@@ -4029,20 +3974,24 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
4029 /* copy DfsPath */ 3974 /* copy DfsPath */
4030 temp = (char *)ref + le16_to_cpu(ref->DfsPathOffset); 3975 temp = (char *)ref + le16_to_cpu(ref->DfsPathOffset);
4031 max_len = data_end - temp; 3976 max_len = data_end - temp;
4032 rc = cifs_strncpy_to_host(&(node->path_name), temp, 3977 node->path_name = cifs_strndup_from_ucs(temp, max_len,
4033 max_len, is_unicode, nls_codepage); 3978 is_unicode, nls_codepage);
4034 if (rc) 3979 if (IS_ERR(node->path_name)) {
3980 rc = PTR_ERR(node->path_name);
3981 node->path_name = NULL;
4035 goto parse_DFS_referrals_exit; 3982 goto parse_DFS_referrals_exit;
3983 }
4036 3984
4037 /* copy link target UNC */ 3985 /* copy link target UNC */
4038 temp = (char *)ref + le16_to_cpu(ref->NetworkAddressOffset); 3986 temp = (char *)ref + le16_to_cpu(ref->NetworkAddressOffset);
4039 max_len = data_end - temp; 3987 max_len = data_end - temp;
4040 rc = cifs_strncpy_to_host(&(node->node_name), temp, 3988 node->node_name = cifs_strndup_from_ucs(temp, max_len,
4041 max_len, is_unicode, nls_codepage); 3989 is_unicode, nls_codepage);
4042 if (rc) 3990 if (IS_ERR(node->node_name)) {
3991 rc = PTR_ERR(node->node_name);
3992 node->node_name = NULL;
4043 goto parse_DFS_referrals_exit; 3993 goto parse_DFS_referrals_exit;
4044 3994 }
4045 ref += le16_to_cpu(ref->Size);
4046 } 3995 }
4047 3996
4048parse_DFS_referrals_exit: 3997parse_DFS_referrals_exit:
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0de3b5615a22..4aa81a507b74 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * fs/cifs/connect.c 2 * fs/cifs/connect.c
3 * 3 *
4 * Copyright (C) International Business Machines Corp., 2002,2008 4 * Copyright (C) International Business Machines Corp., 2002,2009
5 * Author(s): Steve French (sfrench@us.ibm.com) 5 * Author(s): Steve French (sfrench@us.ibm.com)
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
@@ -32,6 +32,7 @@
32#include <linux/kthread.h> 32#include <linux/kthread.h>
33#include <linux/pagevec.h> 33#include <linux/pagevec.h>
34#include <linux/freezer.h> 34#include <linux/freezer.h>
35#include <linux/namei.h>
35#include <asm/uaccess.h> 36#include <asm/uaccess.h>
36#include <asm/processor.h> 37#include <asm/processor.h>
37#include <net/ipv6.h> 38#include <net/ipv6.h>
@@ -978,6 +979,13 @@ cifs_parse_mount_options(char *options, const char *devname,
978 return 1; 979 return 1;
979 } else if (strnicmp(value, "krb5", 4) == 0) { 980 } else if (strnicmp(value, "krb5", 4) == 0) {
980 vol->secFlg |= CIFSSEC_MAY_KRB5; 981 vol->secFlg |= CIFSSEC_MAY_KRB5;
982#ifdef CONFIG_CIFS_EXPERIMENTAL
983 } else if (strnicmp(value, "ntlmsspi", 8) == 0) {
984 vol->secFlg |= CIFSSEC_MAY_NTLMSSP |
985 CIFSSEC_MUST_SIGN;
986 } else if (strnicmp(value, "ntlmssp", 7) == 0) {
987 vol->secFlg |= CIFSSEC_MAY_NTLMSSP;
988#endif
981 } else if (strnicmp(value, "ntlmv2i", 7) == 0) { 989 } else if (strnicmp(value, "ntlmv2i", 7) == 0) {
982 vol->secFlg |= CIFSSEC_MAY_NTLMV2 | 990 vol->secFlg |= CIFSSEC_MAY_NTLMV2 |
983 CIFSSEC_MUST_SIGN; 991 CIFSSEC_MUST_SIGN;
@@ -2214,9 +2222,58 @@ is_path_accessible(int xid, struct cifsTconInfo *tcon,
2214 return rc; 2222 return rc;
2215} 2223}
2216 2224
2225static void
2226cleanup_volume_info(struct smb_vol **pvolume_info)
2227{
2228 struct smb_vol *volume_info;
2229
2230 if (!pvolume_info && !*pvolume_info)
2231 return;
2232
2233 volume_info = *pvolume_info;
2234 kzfree(volume_info->password);
2235 kfree(volume_info->UNC);
2236 kfree(volume_info->prepath);
2237 kfree(volume_info);
2238 *pvolume_info = NULL;
2239 return;
2240}
2241
2242#ifdef CONFIG_CIFS_DFS_UPCALL
2243/* build_path_to_root returns full path to root when
2244 * we do not have an exiting connection (tcon) */
2245static char *
2246build_unc_path_to_root(const struct smb_vol *volume_info,
2247 const struct cifs_sb_info *cifs_sb)
2248{
2249 char *full_path;
2250
2251 int unc_len = strnlen(volume_info->UNC, MAX_TREE_SIZE + 1);
2252 full_path = kmalloc(unc_len + cifs_sb->prepathlen + 1, GFP_KERNEL);
2253 if (full_path == NULL)
2254 return ERR_PTR(-ENOMEM);
2255
2256 strncpy(full_path, volume_info->UNC, unc_len);
2257 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) {
2258 int i;
2259 for (i = 0; i < unc_len; i++) {
2260 if (full_path[i] == '\\')
2261 full_path[i] = '/';
2262 }
2263 }
2264
2265 if (cifs_sb->prepathlen)
2266 strncpy(full_path + unc_len, cifs_sb->prepath,
2267 cifs_sb->prepathlen);
2268
2269 full_path[unc_len + cifs_sb->prepathlen] = 0; /* add trailing null */
2270 return full_path;
2271}
2272#endif
2273
2217int 2274int
2218cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 2275cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2219 char *mount_data, const char *devname) 2276 char *mount_data_global, const char *devname)
2220{ 2277{
2221 int rc = 0; 2278 int rc = 0;
2222 int xid; 2279 int xid;
@@ -2225,6 +2282,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2225 struct cifsTconInfo *tcon = NULL; 2282 struct cifsTconInfo *tcon = NULL;
2226 struct TCP_Server_Info *srvTcp = NULL; 2283 struct TCP_Server_Info *srvTcp = NULL;
2227 char *full_path; 2284 char *full_path;
2285 char *mount_data = mount_data_global;
2286#ifdef CONFIG_CIFS_DFS_UPCALL
2287 struct dfs_info3_param *referrals = NULL;
2288 unsigned int num_referrals = 0;
2289 int referral_walks_count = 0;
2290try_mount_again:
2291#endif
2292 full_path = NULL;
2228 2293
2229 xid = GetXid(); 2294 xid = GetXid();
2230 2295
@@ -2371,11 +2436,9 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2371 } 2436 }
2372 } 2437 }
2373 2438
2374 /* check for null share name ie connect to dfs root */
2375 if ((strchr(volume_info->UNC + 3, '\\') == NULL) 2439 if ((strchr(volume_info->UNC + 3, '\\') == NULL)
2376 && (strchr(volume_info->UNC + 3, '/') == NULL)) { 2440 && (strchr(volume_info->UNC + 3, '/') == NULL)) {
2377 /* rc = connect_to_dfs_path(...) */ 2441 cERROR(1, ("Missing share name"));
2378 cFYI(1, ("DFS root not supported"));
2379 rc = -ENODEV; 2442 rc = -ENODEV;
2380 goto mount_fail_check; 2443 goto mount_fail_check;
2381 } else { 2444 } else {
@@ -2392,7 +2455,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2392 } 2455 }
2393 } 2456 }
2394 if (rc) 2457 if (rc)
2395 goto mount_fail_check; 2458 goto remote_path_check;
2396 tcon->seal = volume_info->seal; 2459 tcon->seal = volume_info->seal;
2397 write_lock(&cifs_tcp_ses_lock); 2460 write_lock(&cifs_tcp_ses_lock);
2398 list_add(&tcon->tcon_list, &pSesInfo->tcon_list); 2461 list_add(&tcon->tcon_list, &pSesInfo->tcon_list);
@@ -2417,19 +2480,9 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2417 /* BB FIXME fix time_gran to be larger for LANMAN sessions */ 2480 /* BB FIXME fix time_gran to be larger for LANMAN sessions */
2418 sb->s_time_gran = 100; 2481 sb->s_time_gran = 100;
2419 2482
2420mount_fail_check: 2483 if (rc)
2421 /* on error free sesinfo and tcon struct if needed */ 2484 goto remote_path_check;
2422 if (rc) { 2485
2423 /* If find_unc succeeded then rc == 0 so we can not end */
2424 /* up accidently freeing someone elses tcon struct */
2425 if (tcon)
2426 cifs_put_tcon(tcon);
2427 else if (pSesInfo)
2428 cifs_put_smb_ses(pSesInfo);
2429 else
2430 cifs_put_tcp_session(srvTcp);
2431 goto out;
2432 }
2433 cifs_sb->tcon = tcon; 2486 cifs_sb->tcon = tcon;
2434 2487
2435 /* do not care if following two calls succeed - informational */ 2488 /* do not care if following two calls succeed - informational */
@@ -2461,7 +2514,9 @@ mount_fail_check:
2461 cifs_sb->rsize = min(cifs_sb->rsize, 2514 cifs_sb->rsize = min(cifs_sb->rsize,
2462 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE)); 2515 (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE));
2463 2516
2464 if (!rc && cifs_sb->prepathlen) { 2517remote_path_check:
2518 /* check if a whole path (including prepath) is not remote */
2519 if (!rc && cifs_sb->prepathlen && tcon) {
2465 /* build_path_to_root works only when we have a valid tcon */ 2520 /* build_path_to_root works only when we have a valid tcon */
2466 full_path = cifs_build_path_to_root(cifs_sb); 2521 full_path = cifs_build_path_to_root(cifs_sb);
2467 if (full_path == NULL) { 2522 if (full_path == NULL) {
@@ -2469,1079 +2524,91 @@ mount_fail_check:
2469 goto mount_fail_check; 2524 goto mount_fail_check;
2470 } 2525 }
2471 rc = is_path_accessible(xid, tcon, cifs_sb, full_path); 2526 rc = is_path_accessible(xid, tcon, cifs_sb, full_path);
2472 if (rc) { 2527 if (rc != -EREMOTE) {
2473 cERROR(1, ("Path %s in not accessible: %d",
2474 full_path, rc));
2475 kfree(full_path); 2528 kfree(full_path);
2476 goto mount_fail_check; 2529 goto mount_fail_check;
2477 } 2530 }
2478 kfree(full_path); 2531 kfree(full_path);
2479 } 2532 }
2480 2533
2481 /* volume_info->password is freed above when existing session found 2534 /* get referral if needed */
2482 (in which case it is not needed anymore) but when new sesion is created 2535 if (rc == -EREMOTE) {
2483 the password ptr is put in the new session structure (in which case the 2536#ifdef CONFIG_CIFS_DFS_UPCALL
2484 password will be freed at unmount time) */ 2537 if (referral_walks_count > MAX_NESTED_LINKS) {
2485out: 2538 /*
2486 /* zero out password before freeing */ 2539 * BB: when we implement proper loop detection,
2487 if (volume_info) { 2540 * we will remove this check. But now we need it
2488 if (volume_info->password != NULL) { 2541 * to prevent an indefinite loop if 'DFS tree' is
2489 memset(volume_info->password, 0, 2542 * misconfigured (i.e. has loops).
2490 strlen(volume_info->password)); 2543 */
2491 kfree(volume_info->password); 2544 rc = -ELOOP;
2492 } 2545 goto mount_fail_check;
2493 kfree(volume_info->UNC);
2494 kfree(volume_info->prepath);
2495 kfree(volume_info);
2496 }
2497 FreeXid(xid);
2498 return rc;
2499}
2500
2501static int
2502CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
2503 char session_key[CIFS_SESS_KEY_SIZE],
2504 const struct nls_table *nls_codepage)
2505{
2506 struct smb_hdr *smb_buffer;
2507 struct smb_hdr *smb_buffer_response;
2508 SESSION_SETUP_ANDX *pSMB;
2509 SESSION_SETUP_ANDX *pSMBr;
2510 char *bcc_ptr;
2511 char *user;
2512 char *domain;
2513 int rc = 0;
2514 int remaining_words = 0;
2515 int bytes_returned = 0;
2516 int len;
2517 __u32 capabilities;
2518 __u16 count;
2519
2520 cFYI(1, ("In sesssetup"));
2521 if (ses == NULL)
2522 return -EINVAL;
2523 user = ses->userName;
2524 domain = ses->domainName;
2525 smb_buffer = cifs_buf_get();
2526
2527 if (smb_buffer == NULL)
2528 return -ENOMEM;
2529
2530 smb_buffer_response = smb_buffer;
2531 pSMBr = pSMB = (SESSION_SETUP_ANDX *) smb_buffer;
2532
2533 /* send SMBsessionSetup here */
2534 header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX,
2535 NULL /* no tCon exists yet */ , 13 /* wct */ );
2536
2537 smb_buffer->Mid = GetNextMid(ses->server);
2538 pSMB->req_no_secext.AndXCommand = 0xFF;
2539 pSMB->req_no_secext.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
2540 pSMB->req_no_secext.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
2541
2542 if (ses->server->secMode &
2543 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
2544 smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
2545
2546 capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
2547 CAP_LARGE_WRITE_X | CAP_LARGE_READ_X;
2548 if (ses->capabilities & CAP_UNICODE) {
2549 smb_buffer->Flags2 |= SMBFLG2_UNICODE;
2550 capabilities |= CAP_UNICODE;
2551 }
2552 if (ses->capabilities & CAP_STATUS32) {
2553 smb_buffer->Flags2 |= SMBFLG2_ERR_STATUS;
2554 capabilities |= CAP_STATUS32;
2555 }
2556 if (ses->capabilities & CAP_DFS) {
2557 smb_buffer->Flags2 |= SMBFLG2_DFS;
2558 capabilities |= CAP_DFS;
2559 }
2560 pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
2561
2562 pSMB->req_no_secext.CaseInsensitivePasswordLength =
2563 cpu_to_le16(CIFS_SESS_KEY_SIZE);
2564
2565 pSMB->req_no_secext.CaseSensitivePasswordLength =
2566 cpu_to_le16(CIFS_SESS_KEY_SIZE);
2567 bcc_ptr = pByteArea(smb_buffer);
2568 memcpy(bcc_ptr, (char *) session_key, CIFS_SESS_KEY_SIZE);
2569 bcc_ptr += CIFS_SESS_KEY_SIZE;
2570 memcpy(bcc_ptr, (char *) session_key, CIFS_SESS_KEY_SIZE);
2571 bcc_ptr += CIFS_SESS_KEY_SIZE;
2572
2573 if (ses->capabilities & CAP_UNICODE) {
2574 if ((long) bcc_ptr % 2) { /* must be word aligned for Unicode */
2575 *bcc_ptr = 0;
2576 bcc_ptr++;
2577 }
2578 if (user == NULL)
2579 bytes_returned = 0; /* skip null user */
2580 else
2581 bytes_returned =
2582 cifs_strtoUCS((__le16 *) bcc_ptr, user, 100,
2583 nls_codepage);
2584 /* convert number of 16 bit words to bytes */
2585 bcc_ptr += 2 * bytes_returned;
2586 bcc_ptr += 2; /* trailing null */
2587 if (domain == NULL)
2588 bytes_returned =
2589 cifs_strtoUCS((__le16 *) bcc_ptr,
2590 "CIFS_LINUX_DOM", 32, nls_codepage);
2591 else
2592 bytes_returned =
2593 cifs_strtoUCS((__le16 *) bcc_ptr, domain, 64,
2594 nls_codepage);
2595 bcc_ptr += 2 * bytes_returned;
2596 bcc_ptr += 2;
2597 bytes_returned =
2598 cifs_strtoUCS((__le16 *) bcc_ptr, "Linux version ",
2599 32, nls_codepage);
2600 bcc_ptr += 2 * bytes_returned;
2601 bytes_returned =
2602 cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release,
2603 32, nls_codepage);
2604 bcc_ptr += 2 * bytes_returned;
2605 bcc_ptr += 2;
2606 bytes_returned =
2607 cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
2608 64, nls_codepage);
2609 bcc_ptr += 2 * bytes_returned;
2610 bcc_ptr += 2;
2611 } else {
2612 if (user != NULL) {
2613 strncpy(bcc_ptr, user, 200);
2614 bcc_ptr += strnlen(user, 200);
2615 }
2616 *bcc_ptr = 0;
2617 bcc_ptr++;
2618 if (domain == NULL) {
2619 strcpy(bcc_ptr, "CIFS_LINUX_DOM");
2620 bcc_ptr += strlen("CIFS_LINUX_DOM") + 1;
2621 } else {
2622 strncpy(bcc_ptr, domain, 64);
2623 bcc_ptr += strnlen(domain, 64);
2624 *bcc_ptr = 0;
2625 bcc_ptr++;
2626 }
2627 strcpy(bcc_ptr, "Linux version ");
2628 bcc_ptr += strlen("Linux version ");
2629 strcpy(bcc_ptr, utsname()->release);
2630 bcc_ptr += strlen(utsname()->release) + 1;
2631 strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
2632 bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
2633 }
2634 count = (long) bcc_ptr - (long) pByteArea(smb_buffer);
2635 smb_buffer->smb_buf_length += count;
2636 pSMB->req_no_secext.ByteCount = cpu_to_le16(count);
2637
2638 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response,
2639 &bytes_returned, CIFS_LONG_OP);
2640 if (rc) {
2641/* rc = map_smb_to_linux_error(smb_buffer_response); now done in SendReceive */
2642 } else if ((smb_buffer_response->WordCount == 3)
2643 || (smb_buffer_response->WordCount == 4)) {
2644 __u16 action = le16_to_cpu(pSMBr->resp.Action);
2645 __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength);
2646 if (action & GUEST_LOGIN)
2647 cFYI(1, ("Guest login")); /* BB mark SesInfo struct? */
2648 ses->Suid = smb_buffer_response->Uid; /* UID left in wire format
2649 (little endian) */
2650 cFYI(1, ("UID = %d ", ses->Suid));
2651 /* response can have either 3 or 4 word count - Samba sends 3 */
2652 bcc_ptr = pByteArea(smb_buffer_response);
2653 if ((pSMBr->resp.hdr.WordCount == 3)
2654 || ((pSMBr->resp.hdr.WordCount == 4)
2655 && (blob_len < pSMBr->resp.ByteCount))) {
2656 if (pSMBr->resp.hdr.WordCount == 4)
2657 bcc_ptr += blob_len;
2658
2659 if (smb_buffer->Flags2 & SMBFLG2_UNICODE) {
2660 if ((long) (bcc_ptr) % 2) {
2661 remaining_words =
2662 (BCC(smb_buffer_response) - 1) / 2;
2663 /* Unicode strings must be word
2664 aligned */
2665 bcc_ptr++;
2666 } else {
2667 remaining_words =
2668 BCC(smb_buffer_response) / 2;
2669 }
2670 len =
2671 UniStrnlen((wchar_t *) bcc_ptr,
2672 remaining_words - 1);
2673/* We look for obvious messed up bcc or strings in response so we do not go off
2674 the end since (at least) WIN2K and Windows XP have a major bug in not null
2675 terminating last Unicode string in response */
2676 if (ses->serverOS)
2677 kfree(ses->serverOS);
2678 ses->serverOS = kzalloc(2 * (len + 1),
2679 GFP_KERNEL);
2680 if (ses->serverOS == NULL)
2681 goto sesssetup_nomem;
2682 cifs_strfromUCS_le(ses->serverOS,
2683 (__le16 *)bcc_ptr,
2684 len, nls_codepage);
2685 bcc_ptr += 2 * (len + 1);
2686 remaining_words -= len + 1;
2687 ses->serverOS[2 * len] = 0;
2688 ses->serverOS[1 + (2 * len)] = 0;
2689 if (remaining_words > 0) {
2690 len = UniStrnlen((wchar_t *)bcc_ptr,
2691 remaining_words-1);
2692 kfree(ses->serverNOS);
2693 ses->serverNOS = kzalloc(2 * (len + 1),
2694 GFP_KERNEL);
2695 if (ses->serverNOS == NULL)
2696 goto sesssetup_nomem;
2697 cifs_strfromUCS_le(ses->serverNOS,
2698 (__le16 *)bcc_ptr,
2699 len, nls_codepage);
2700 bcc_ptr += 2 * (len + 1);
2701 ses->serverNOS[2 * len] = 0;
2702 ses->serverNOS[1 + (2 * len)] = 0;
2703 if (strncmp(ses->serverNOS,
2704 "NT LAN Manager 4", 16) == 0) {
2705 cFYI(1, ("NT4 server"));
2706 ses->flags |= CIFS_SES_NT4;
2707 }
2708 remaining_words -= len + 1;
2709 if (remaining_words > 0) {
2710 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
2711 /* last string is not always null terminated
2712 (for e.g. for Windows XP & 2000) */
2713 if (ses->serverDomain)
2714 kfree(ses->serverDomain);
2715 ses->serverDomain =
2716 kzalloc(2*(len+1),
2717 GFP_KERNEL);
2718 if (ses->serverDomain == NULL)
2719 goto sesssetup_nomem;
2720 cifs_strfromUCS_le(ses->serverDomain,
2721 (__le16 *)bcc_ptr,
2722 len, nls_codepage);
2723 bcc_ptr += 2 * (len + 1);
2724 ses->serverDomain[2*len] = 0;
2725 ses->serverDomain[1+(2*len)] = 0;
2726 } else { /* else no more room so create
2727 dummy domain string */
2728 if (ses->serverDomain)
2729 kfree(ses->serverDomain);
2730 ses->serverDomain =
2731 kzalloc(2, GFP_KERNEL);
2732 }
2733 } else { /* no room so create dummy domain
2734 and NOS string */
2735
2736 /* if these kcallocs fail not much we
2737 can do, but better to not fail the
2738 sesssetup itself */
2739 kfree(ses->serverDomain);
2740 ses->serverDomain =
2741 kzalloc(2, GFP_KERNEL);
2742 kfree(ses->serverNOS);
2743 ses->serverNOS =
2744 kzalloc(2, GFP_KERNEL);
2745 }
2746 } else { /* ASCII */
2747 len = strnlen(bcc_ptr, 1024);
2748 if (((long) bcc_ptr + len) - (long)
2749 pByteArea(smb_buffer_response)
2750 <= BCC(smb_buffer_response)) {
2751 kfree(ses->serverOS);
2752 ses->serverOS = kzalloc(len + 1,
2753 GFP_KERNEL);
2754 if (ses->serverOS == NULL)
2755 goto sesssetup_nomem;
2756 strncpy(ses->serverOS, bcc_ptr, len);
2757
2758 bcc_ptr += len;
2759 /* null terminate the string */
2760 bcc_ptr[0] = 0;
2761 bcc_ptr++;
2762
2763 len = strnlen(bcc_ptr, 1024);
2764 kfree(ses->serverNOS);
2765 ses->serverNOS = kzalloc(len + 1,
2766 GFP_KERNEL);
2767 if (ses->serverNOS == NULL)
2768 goto sesssetup_nomem;
2769 strncpy(ses->serverNOS, bcc_ptr, len);
2770 bcc_ptr += len;
2771 bcc_ptr[0] = 0;
2772 bcc_ptr++;
2773
2774 len = strnlen(bcc_ptr, 1024);
2775 if (ses->serverDomain)
2776 kfree(ses->serverDomain);
2777 ses->serverDomain = kzalloc(len + 1,
2778 GFP_KERNEL);
2779 if (ses->serverDomain == NULL)
2780 goto sesssetup_nomem;
2781 strncpy(ses->serverDomain, bcc_ptr,
2782 len);
2783 bcc_ptr += len;
2784 bcc_ptr[0] = 0;
2785 bcc_ptr++;
2786 } else
2787 cFYI(1,
2788 ("Variable field of length %d "
2789 "extends beyond end of smb ",
2790 len));
2791 }
2792 } else {
2793 cERROR(1, ("Security Blob Length extends beyond "
2794 "end of SMB"));
2795 } 2546 }
2796 } else { 2547 /* convert forward to back slashes in prepath here if needed */
2797 cERROR(1, ("Invalid Word count %d: ", 2548 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0)
2798 smb_buffer_response->WordCount)); 2549 convert_delimiter(cifs_sb->prepath,
2799 rc = -EIO; 2550 CIFS_DIR_SEP(cifs_sb));
2800 } 2551 full_path = build_unc_path_to_root(volume_info, cifs_sb);
2801sesssetup_nomem: /* do not return an error on nomem for the info strings, 2552 if (IS_ERR(full_path)) {
2802 since that could make reconnection harder, and 2553 rc = PTR_ERR(full_path);
2803 reconnection might be needed to free memory */ 2554 goto mount_fail_check;
2804 cifs_buf_release(smb_buffer);
2805
2806 return rc;
2807}
2808
2809static int
2810CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
2811 struct cifsSesInfo *ses, bool *pNTLMv2_flag,
2812 const struct nls_table *nls_codepage)
2813{
2814 struct smb_hdr *smb_buffer;
2815 struct smb_hdr *smb_buffer_response;
2816 SESSION_SETUP_ANDX *pSMB;
2817 SESSION_SETUP_ANDX *pSMBr;
2818 char *bcc_ptr;
2819 char *domain;
2820 int rc = 0;
2821 int remaining_words = 0;
2822 int bytes_returned = 0;
2823 int len;
2824 int SecurityBlobLength = sizeof(NEGOTIATE_MESSAGE);
2825 PNEGOTIATE_MESSAGE SecurityBlob;
2826 PCHALLENGE_MESSAGE SecurityBlob2;
2827 __u32 negotiate_flags, capabilities;
2828 __u16 count;
2829
2830 cFYI(1, ("In NTLMSSP sesssetup (negotiate)"));
2831 if (ses == NULL)
2832 return -EINVAL;
2833 domain = ses->domainName;
2834 *pNTLMv2_flag = false;
2835 smb_buffer = cifs_buf_get();
2836 if (smb_buffer == NULL) {
2837 return -ENOMEM;
2838 }
2839 smb_buffer_response = smb_buffer;
2840 pSMB = (SESSION_SETUP_ANDX *) smb_buffer;
2841 pSMBr = (SESSION_SETUP_ANDX *) smb_buffer_response;
2842
2843 /* send SMBsessionSetup here */
2844 header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX,
2845 NULL /* no tCon exists yet */ , 12 /* wct */ );
2846
2847 smb_buffer->Mid = GetNextMid(ses->server);
2848 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
2849 pSMB->req.hdr.Flags |= (SMBFLG_CASELESS | SMBFLG_CANONICAL_PATH_FORMAT);
2850
2851 pSMB->req.AndXCommand = 0xFF;
2852 pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
2853 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
2854
2855 if (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
2856 smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
2857
2858 capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
2859 CAP_EXTENDED_SECURITY;
2860 if (ses->capabilities & CAP_UNICODE) {
2861 smb_buffer->Flags2 |= SMBFLG2_UNICODE;
2862 capabilities |= CAP_UNICODE;
2863 }
2864 if (ses->capabilities & CAP_STATUS32) {
2865 smb_buffer->Flags2 |= SMBFLG2_ERR_STATUS;
2866 capabilities |= CAP_STATUS32;
2867 }
2868 if (ses->capabilities & CAP_DFS) {
2869 smb_buffer->Flags2 |= SMBFLG2_DFS;
2870 capabilities |= CAP_DFS;
2871 }
2872 pSMB->req.Capabilities = cpu_to_le32(capabilities);
2873
2874 bcc_ptr = (char *) &pSMB->req.SecurityBlob;
2875 SecurityBlob = (PNEGOTIATE_MESSAGE) bcc_ptr;
2876 strncpy(SecurityBlob->Signature, NTLMSSP_SIGNATURE, 8);
2877 SecurityBlob->MessageType = NtLmNegotiate;
2878 negotiate_flags =
2879 NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_NEGOTIATE_OEM |
2880 NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_NTLM |
2881 NTLMSSP_NEGOTIATE_56 |
2882 /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN | */ NTLMSSP_NEGOTIATE_128;
2883 if (sign_CIFS_PDUs)
2884 negotiate_flags |= NTLMSSP_NEGOTIATE_SIGN;
2885/* if (ntlmv2_support)
2886 negotiate_flags |= NTLMSSP_NEGOTIATE_NTLMV2;*/
2887 /* setup pointers to domain name and workstation name */
2888 bcc_ptr += SecurityBlobLength;
2889
2890 SecurityBlob->WorkstationName.Buffer = 0;
2891 SecurityBlob->WorkstationName.Length = 0;
2892 SecurityBlob->WorkstationName.MaximumLength = 0;
2893
2894 /* Domain not sent on first Sesssetup in NTLMSSP, instead it is sent
2895 along with username on auth request (ie the response to challenge) */
2896 SecurityBlob->DomainName.Buffer = 0;
2897 SecurityBlob->DomainName.Length = 0;
2898 SecurityBlob->DomainName.MaximumLength = 0;
2899 if (ses->capabilities & CAP_UNICODE) {
2900 if ((long) bcc_ptr % 2) {
2901 *bcc_ptr = 0;
2902 bcc_ptr++;
2903 } 2555 }
2904 2556
2905 bytes_returned = 2557 cFYI(1, ("Getting referral for: %s", full_path));
2906 cifs_strtoUCS((__le16 *) bcc_ptr, "Linux version ", 2558 rc = get_dfs_path(xid, pSesInfo , full_path + 1,
2907 32, nls_codepage); 2559 cifs_sb->local_nls, &num_referrals, &referrals,
2908 bcc_ptr += 2 * bytes_returned; 2560 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
2909 bytes_returned = 2561 if (!rc && num_referrals > 0) {
2910 cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, 32, 2562 char *fake_devname = NULL;
2911 nls_codepage); 2563
2912 bcc_ptr += 2 * bytes_returned; 2564 if (mount_data != mount_data_global)
2913 bcc_ptr += 2; /* null terminate Linux version */ 2565 kfree(mount_data);
2914 bytes_returned = 2566 mount_data = cifs_compose_mount_options(
2915 cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS, 2567 cifs_sb->mountdata, full_path + 1,
2916 64, nls_codepage); 2568 referrals, &fake_devname);
2917 bcc_ptr += 2 * bytes_returned; 2569 kfree(fake_devname);
2918 *(bcc_ptr + 1) = 0; 2570 free_dfs_info_array(referrals, num_referrals);
2919 *(bcc_ptr + 2) = 0; 2571
2920 bcc_ptr += 2; /* null terminate network opsys string */ 2572 if (tcon)
2921 *(bcc_ptr + 1) = 0; 2573 cifs_put_tcon(tcon);
2922 *(bcc_ptr + 2) = 0; 2574 else if (pSesInfo)
2923 bcc_ptr += 2; /* null domain */ 2575 cifs_put_smb_ses(pSesInfo);
2924 } else { /* ASCII */ 2576
2925 strcpy(bcc_ptr, "Linux version "); 2577 cleanup_volume_info(&volume_info);
2926 bcc_ptr += strlen("Linux version "); 2578 FreeXid(xid);
2927 strcpy(bcc_ptr, utsname()->release); 2579 kfree(full_path);
2928 bcc_ptr += strlen(utsname()->release) + 1; 2580 referral_walks_count++;
2929 strcpy(bcc_ptr, CIFS_NETWORK_OPSYS); 2581 goto try_mount_again;
2930 bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
2931 bcc_ptr++; /* empty domain field */
2932 *bcc_ptr = 0;
2933 }
2934 SecurityBlob->NegotiateFlags = cpu_to_le32(negotiate_flags);
2935 pSMB->req.SecurityBlobLength = cpu_to_le16(SecurityBlobLength);
2936 count = (long) bcc_ptr - (long) pByteArea(smb_buffer);
2937 smb_buffer->smb_buf_length += count;
2938 pSMB->req.ByteCount = cpu_to_le16(count);
2939
2940 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response,
2941 &bytes_returned, CIFS_LONG_OP);
2942
2943 if (smb_buffer_response->Status.CifsError ==
2944 cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))
2945 rc = 0;
2946
2947 if (rc) {
2948/* rc = map_smb_to_linux_error(smb_buffer_response); *//* done in SendReceive now */
2949 } else if ((smb_buffer_response->WordCount == 3)
2950 || (smb_buffer_response->WordCount == 4)) {
2951 __u16 action = le16_to_cpu(pSMBr->resp.Action);
2952 __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength);
2953
2954 if (action & GUEST_LOGIN)
2955 cFYI(1, ("Guest login"));
2956 /* Do we want to set anything in SesInfo struct when guest login? */
2957
2958 bcc_ptr = pByteArea(smb_buffer_response);
2959 /* response can have either 3 or 4 word count - Samba sends 3 */
2960
2961 SecurityBlob2 = (PCHALLENGE_MESSAGE) bcc_ptr;
2962 if (SecurityBlob2->MessageType != NtLmChallenge) {
2963 cFYI(1, ("Unexpected NTLMSSP message type received %d",
2964 SecurityBlob2->MessageType));
2965 } else if (ses) {
2966 ses->Suid = smb_buffer_response->Uid; /* UID left in le format */
2967 cFYI(1, ("UID = %d", ses->Suid));
2968 if ((pSMBr->resp.hdr.WordCount == 3)
2969 || ((pSMBr->resp.hdr.WordCount == 4)
2970 && (blob_len <
2971 pSMBr->resp.ByteCount))) {
2972
2973 if (pSMBr->resp.hdr.WordCount == 4) {
2974 bcc_ptr += blob_len;
2975 cFYI(1, ("Security Blob Length %d",
2976 blob_len));
2977 }
2978
2979 cFYI(1, ("NTLMSSP Challenge rcvd"));
2980
2981 memcpy(ses->server->cryptKey,
2982 SecurityBlob2->Challenge,
2983 CIFS_CRYPTO_KEY_SIZE);
2984 if (SecurityBlob2->NegotiateFlags &
2985 cpu_to_le32(NTLMSSP_NEGOTIATE_NTLMV2))
2986 *pNTLMv2_flag = true;
2987
2988 if ((SecurityBlob2->NegotiateFlags &
2989 cpu_to_le32(NTLMSSP_NEGOTIATE_ALWAYS_SIGN))
2990 || (sign_CIFS_PDUs > 1))
2991 ses->server->secMode |=
2992 SECMODE_SIGN_REQUIRED;
2993 if ((SecurityBlob2->NegotiateFlags &
2994 cpu_to_le32(NTLMSSP_NEGOTIATE_SIGN)) && (sign_CIFS_PDUs))
2995 ses->server->secMode |=
2996 SECMODE_SIGN_ENABLED;
2997
2998 if (smb_buffer->Flags2 & SMBFLG2_UNICODE) {
2999 if ((long) (bcc_ptr) % 2) {
3000 remaining_words =
3001 (BCC(smb_buffer_response)
3002 - 1) / 2;
3003 /* Must word align unicode strings */
3004 bcc_ptr++;
3005 } else {
3006 remaining_words =
3007 BCC
3008 (smb_buffer_response) / 2;
3009 }
3010 len =
3011 UniStrnlen((wchar_t *) bcc_ptr,
3012 remaining_words - 1);
3013/* We look for obvious messed up bcc or strings in response so we do not go off
3014 the end since (at least) WIN2K and Windows XP have a major bug in not null
3015 terminating last Unicode string in response */
3016 if (ses->serverOS)
3017 kfree(ses->serverOS);
3018 ses->serverOS =
3019 kzalloc(2 * (len + 1), GFP_KERNEL);
3020 cifs_strfromUCS_le(ses->serverOS,
3021 (__le16 *)
3022 bcc_ptr, len,
3023 nls_codepage);
3024 bcc_ptr += 2 * (len + 1);
3025 remaining_words -= len + 1;
3026 ses->serverOS[2 * len] = 0;
3027 ses->serverOS[1 + (2 * len)] = 0;
3028 if (remaining_words > 0) {
3029 len = UniStrnlen((wchar_t *)
3030 bcc_ptr,
3031 remaining_words
3032 - 1);
3033 kfree(ses->serverNOS);
3034 ses->serverNOS =
3035 kzalloc(2 * (len + 1),
3036 GFP_KERNEL);
3037 cifs_strfromUCS_le(ses->
3038 serverNOS,
3039 (__le16 *)
3040 bcc_ptr,
3041 len,
3042 nls_codepage);
3043 bcc_ptr += 2 * (len + 1);
3044 ses->serverNOS[2 * len] = 0;
3045 ses->serverNOS[1 +
3046 (2 * len)] = 0;
3047 remaining_words -= len + 1;
3048 if (remaining_words > 0) {
3049 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
3050 /* last string not always null terminated
3051 (for e.g. for Windows XP & 2000) */
3052 kfree(ses->serverDomain);
3053 ses->serverDomain =
3054 kzalloc(2 *
3055 (len +
3056 1),
3057 GFP_KERNEL);
3058 cifs_strfromUCS_le
3059 (ses->serverDomain,
3060 (__le16 *)bcc_ptr,
3061 len, nls_codepage);
3062 bcc_ptr +=
3063 2 * (len + 1);
3064 ses->serverDomain[2*len]
3065 = 0;
3066 ses->serverDomain
3067 [1 + (2 * len)]
3068 = 0;
3069 } /* else no more room so create dummy domain string */
3070 else {
3071 kfree(ses->serverDomain);
3072 ses->serverDomain =
3073 kzalloc(2,
3074 GFP_KERNEL);
3075 }
3076 } else { /* no room so create dummy domain and NOS string */
3077 kfree(ses->serverDomain);
3078 ses->serverDomain =
3079 kzalloc(2, GFP_KERNEL);
3080 kfree(ses->serverNOS);
3081 ses->serverNOS =
3082 kzalloc(2, GFP_KERNEL);
3083 }
3084 } else { /* ASCII */
3085 len = strnlen(bcc_ptr, 1024);
3086 if (((long) bcc_ptr + len) - (long)
3087 pByteArea(smb_buffer_response)
3088 <= BCC(smb_buffer_response)) {
3089 if (ses->serverOS)
3090 kfree(ses->serverOS);
3091 ses->serverOS =
3092 kzalloc(len + 1,
3093 GFP_KERNEL);
3094 strncpy(ses->serverOS,
3095 bcc_ptr, len);
3096
3097 bcc_ptr += len;
3098 bcc_ptr[0] = 0; /* null terminate string */
3099 bcc_ptr++;
3100
3101 len = strnlen(bcc_ptr, 1024);
3102 kfree(ses->serverNOS);
3103 ses->serverNOS =
3104 kzalloc(len + 1,
3105 GFP_KERNEL);
3106 strncpy(ses->serverNOS, bcc_ptr, len);
3107 bcc_ptr += len;
3108 bcc_ptr[0] = 0;
3109 bcc_ptr++;
3110
3111 len = strnlen(bcc_ptr, 1024);
3112 kfree(ses->serverDomain);
3113 ses->serverDomain =
3114 kzalloc(len + 1,
3115 GFP_KERNEL);
3116 strncpy(ses->serverDomain,
3117 bcc_ptr, len);
3118 bcc_ptr += len;
3119 bcc_ptr[0] = 0;
3120 bcc_ptr++;
3121 } else
3122 cFYI(1,
3123 ("field of length %d "
3124 "extends beyond end of smb",
3125 len));
3126 }
3127 } else {
3128 cERROR(1, ("Security Blob Length extends beyond"
3129 " end of SMB"));
3130 }
3131 } else {
3132 cERROR(1, ("No session structure passed in."));
3133 } 2582 }
3134 } else { 2583#else /* No DFS support, return error on mount */
3135 cERROR(1, ("Invalid Word count %d:", 2584 rc = -EOPNOTSUPP;
3136 smb_buffer_response->WordCount)); 2585#endif
3137 rc = -EIO;
3138 }
3139
3140 cifs_buf_release(smb_buffer);
3141
3142 return rc;
3143}
3144static int
3145CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
3146 char *ntlm_session_key, bool ntlmv2_flag,
3147 const struct nls_table *nls_codepage)
3148{
3149 struct smb_hdr *smb_buffer;
3150 struct smb_hdr *smb_buffer_response;
3151 SESSION_SETUP_ANDX *pSMB;
3152 SESSION_SETUP_ANDX *pSMBr;
3153 char *bcc_ptr;
3154 char *user;
3155 char *domain;
3156 int rc = 0;
3157 int remaining_words = 0;
3158 int bytes_returned = 0;
3159 int len;
3160 int SecurityBlobLength = sizeof(AUTHENTICATE_MESSAGE);
3161 PAUTHENTICATE_MESSAGE SecurityBlob;
3162 __u32 negotiate_flags, capabilities;
3163 __u16 count;
3164
3165 cFYI(1, ("In NTLMSSPSessSetup (Authenticate)"));
3166 if (ses == NULL)
3167 return -EINVAL;
3168 user = ses->userName;
3169 domain = ses->domainName;
3170 smb_buffer = cifs_buf_get();
3171 if (smb_buffer == NULL) {
3172 return -ENOMEM;
3173 }
3174 smb_buffer_response = smb_buffer;
3175 pSMB = (SESSION_SETUP_ANDX *)smb_buffer;
3176 pSMBr = (SESSION_SETUP_ANDX *)smb_buffer_response;
3177
3178 /* send SMBsessionSetup here */
3179 header_assemble(smb_buffer, SMB_COM_SESSION_SETUP_ANDX,
3180 NULL /* no tCon exists yet */ , 12 /* wct */ );
3181
3182 smb_buffer->Mid = GetNextMid(ses->server);
3183 pSMB->req.hdr.Flags |= (SMBFLG_CASELESS | SMBFLG_CANONICAL_PATH_FORMAT);
3184 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
3185 pSMB->req.AndXCommand = 0xFF;
3186 pSMB->req.MaxBufferSize = cpu_to_le16(ses->server->maxBuf);
3187 pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
3188
3189 pSMB->req.hdr.Uid = ses->Suid;
3190
3191 if (ses->server->secMode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
3192 smb_buffer->Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
3193
3194 capabilities = CAP_LARGE_FILES | CAP_NT_SMBS | CAP_LEVEL_II_OPLOCKS |
3195 CAP_EXTENDED_SECURITY;
3196 if (ses->capabilities & CAP_UNICODE) {
3197 smb_buffer->Flags2 |= SMBFLG2_UNICODE;
3198 capabilities |= CAP_UNICODE;
3199 }
3200 if (ses->capabilities & CAP_STATUS32) {
3201 smb_buffer->Flags2 |= SMBFLG2_ERR_STATUS;
3202 capabilities |= CAP_STATUS32;
3203 } 2586 }
3204 if (ses->capabilities & CAP_DFS) {
3205 smb_buffer->Flags2 |= SMBFLG2_DFS;
3206 capabilities |= CAP_DFS;
3207 }
3208 pSMB->req.Capabilities = cpu_to_le32(capabilities);
3209
3210 bcc_ptr = (char *)&pSMB->req.SecurityBlob;
3211 SecurityBlob = (PAUTHENTICATE_MESSAGE)bcc_ptr;
3212 strncpy(SecurityBlob->Signature, NTLMSSP_SIGNATURE, 8);
3213 SecurityBlob->MessageType = NtLmAuthenticate;
3214 bcc_ptr += SecurityBlobLength;
3215 negotiate_flags = NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_REQUEST_TARGET |
3216 NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_TARGET_INFO |
3217 0x80000000 | NTLMSSP_NEGOTIATE_128;
3218 if (sign_CIFS_PDUs)
3219 negotiate_flags |= /* NTLMSSP_NEGOTIATE_ALWAYS_SIGN |*/ NTLMSSP_NEGOTIATE_SIGN;
3220 if (ntlmv2_flag)
3221 negotiate_flags |= NTLMSSP_NEGOTIATE_NTLMV2;
3222
3223/* setup pointers to domain name and workstation name */
3224
3225 SecurityBlob->WorkstationName.Buffer = 0;
3226 SecurityBlob->WorkstationName.Length = 0;
3227 SecurityBlob->WorkstationName.MaximumLength = 0;
3228 SecurityBlob->SessionKey.Length = 0;
3229 SecurityBlob->SessionKey.MaximumLength = 0;
3230 SecurityBlob->SessionKey.Buffer = 0;
3231
3232 SecurityBlob->LmChallengeResponse.Length = 0;
3233 SecurityBlob->LmChallengeResponse.MaximumLength = 0;
3234 SecurityBlob->LmChallengeResponse.Buffer = 0;
3235
3236 SecurityBlob->NtChallengeResponse.Length =
3237 cpu_to_le16(CIFS_SESS_KEY_SIZE);
3238 SecurityBlob->NtChallengeResponse.MaximumLength =
3239 cpu_to_le16(CIFS_SESS_KEY_SIZE);
3240 memcpy(bcc_ptr, ntlm_session_key, CIFS_SESS_KEY_SIZE);
3241 SecurityBlob->NtChallengeResponse.Buffer =
3242 cpu_to_le32(SecurityBlobLength);
3243 SecurityBlobLength += CIFS_SESS_KEY_SIZE;
3244 bcc_ptr += CIFS_SESS_KEY_SIZE;
3245 2587
3246 if (ses->capabilities & CAP_UNICODE) { 2588mount_fail_check:
3247 if (domain == NULL) { 2589 /* on error free sesinfo and tcon struct if needed */
3248 SecurityBlob->DomainName.Buffer = 0;
3249 SecurityBlob->DomainName.Length = 0;
3250 SecurityBlob->DomainName.MaximumLength = 0;
3251 } else {
3252 __u16 ln = cifs_strtoUCS((__le16 *) bcc_ptr, domain, 64,
3253 nls_codepage);
3254 ln *= 2;
3255 SecurityBlob->DomainName.MaximumLength =
3256 cpu_to_le16(ln);
3257 SecurityBlob->DomainName.Buffer =
3258 cpu_to_le32(SecurityBlobLength);
3259 bcc_ptr += ln;
3260 SecurityBlobLength += ln;
3261 SecurityBlob->DomainName.Length = cpu_to_le16(ln);
3262 }
3263 if (user == NULL) {
3264 SecurityBlob->UserName.Buffer = 0;
3265 SecurityBlob->UserName.Length = 0;
3266 SecurityBlob->UserName.MaximumLength = 0;
3267 } else {
3268 __u16 ln = cifs_strtoUCS((__le16 *) bcc_ptr, user, 64,
3269 nls_codepage);
3270 ln *= 2;
3271 SecurityBlob->UserName.MaximumLength =
3272 cpu_to_le16(ln);
3273 SecurityBlob->UserName.Buffer =
3274 cpu_to_le32(SecurityBlobLength);
3275 bcc_ptr += ln;
3276 SecurityBlobLength += ln;
3277 SecurityBlob->UserName.Length = cpu_to_le16(ln);
3278 }
3279
3280 /* SecurityBlob->WorkstationName.Length =
3281 cifs_strtoUCS((__le16 *) bcc_ptr, "AMACHINE",64, nls_codepage);
3282 SecurityBlob->WorkstationName.Length *= 2;
3283 SecurityBlob->WorkstationName.MaximumLength =
3284 cpu_to_le16(SecurityBlob->WorkstationName.Length);
3285 SecurityBlob->WorkstationName.Buffer =
3286 cpu_to_le32(SecurityBlobLength);
3287 bcc_ptr += SecurityBlob->WorkstationName.Length;
3288 SecurityBlobLength += SecurityBlob->WorkstationName.Length;
3289 SecurityBlob->WorkstationName.Length =
3290 cpu_to_le16(SecurityBlob->WorkstationName.Length); */
3291
3292 if ((long) bcc_ptr % 2) {
3293 *bcc_ptr = 0;
3294 bcc_ptr++;
3295 }
3296 bytes_returned =
3297 cifs_strtoUCS((__le16 *) bcc_ptr, "Linux version ",
3298 32, nls_codepage);
3299 bcc_ptr += 2 * bytes_returned;
3300 bytes_returned =
3301 cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, 32,
3302 nls_codepage);
3303 bcc_ptr += 2 * bytes_returned;
3304 bcc_ptr += 2; /* null term version string */
3305 bytes_returned =
3306 cifs_strtoUCS((__le16 *) bcc_ptr, CIFS_NETWORK_OPSYS,
3307 64, nls_codepage);
3308 bcc_ptr += 2 * bytes_returned;
3309 *(bcc_ptr + 1) = 0;
3310 *(bcc_ptr + 2) = 0;
3311 bcc_ptr += 2; /* null terminate network opsys string */
3312 *(bcc_ptr + 1) = 0;
3313 *(bcc_ptr + 2) = 0;
3314 bcc_ptr += 2; /* null domain */
3315 } else { /* ASCII */
3316 if (domain == NULL) {
3317 SecurityBlob->DomainName.Buffer = 0;
3318 SecurityBlob->DomainName.Length = 0;
3319 SecurityBlob->DomainName.MaximumLength = 0;
3320 } else {
3321 __u16 ln;
3322 negotiate_flags |= NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED;
3323 strncpy(bcc_ptr, domain, 63);
3324 ln = strnlen(domain, 64);
3325 SecurityBlob->DomainName.MaximumLength =
3326 cpu_to_le16(ln);
3327 SecurityBlob->DomainName.Buffer =
3328 cpu_to_le32(SecurityBlobLength);
3329 bcc_ptr += ln;
3330 SecurityBlobLength += ln;
3331 SecurityBlob->DomainName.Length = cpu_to_le16(ln);
3332 }
3333 if (user == NULL) {
3334 SecurityBlob->UserName.Buffer = 0;
3335 SecurityBlob->UserName.Length = 0;
3336 SecurityBlob->UserName.MaximumLength = 0;
3337 } else {
3338 __u16 ln;
3339 strncpy(bcc_ptr, user, 63);
3340 ln = strnlen(user, 64);
3341 SecurityBlob->UserName.MaximumLength = cpu_to_le16(ln);
3342 SecurityBlob->UserName.Buffer =
3343 cpu_to_le32(SecurityBlobLength);
3344 bcc_ptr += ln;
3345 SecurityBlobLength += ln;
3346 SecurityBlob->UserName.Length = cpu_to_le16(ln);
3347 }
3348 /* BB fill in our workstation name if known BB */
3349
3350 strcpy(bcc_ptr, "Linux version ");
3351 bcc_ptr += strlen("Linux version ");
3352 strcpy(bcc_ptr, utsname()->release);
3353 bcc_ptr += strlen(utsname()->release) + 1;
3354 strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
3355 bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
3356 bcc_ptr++; /* null domain */
3357 *bcc_ptr = 0;
3358 }
3359 SecurityBlob->NegotiateFlags = cpu_to_le32(negotiate_flags);
3360 pSMB->req.SecurityBlobLength = cpu_to_le16(SecurityBlobLength);
3361 count = (long) bcc_ptr - (long) pByteArea(smb_buffer);
3362 smb_buffer->smb_buf_length += count;
3363 pSMB->req.ByteCount = cpu_to_le16(count);
3364
3365 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response,
3366 &bytes_returned, CIFS_LONG_OP);
3367 if (rc) { 2590 if (rc) {
3368/* rc = map_smb_to_linux_error(smb_buffer_response) done in SendReceive now */ 2591 if (mount_data != mount_data_global)
3369 } else if ((smb_buffer_response->WordCount == 3) || 2592 kfree(mount_data);
3370 (smb_buffer_response->WordCount == 4)) { 2593 /* If find_unc succeeded then rc == 0 so we can not end */
3371 __u16 action = le16_to_cpu(pSMBr->resp.Action); 2594 /* up accidently freeing someone elses tcon struct */
3372 __u16 blob_len = le16_to_cpu(pSMBr->resp.SecurityBlobLength); 2595 if (tcon)
3373 if (action & GUEST_LOGIN) 2596 cifs_put_tcon(tcon);
3374 cFYI(1, ("Guest login")); /* BB Should we set anything 2597 else if (pSesInfo)
3375 in SesInfo struct ? */ 2598 cifs_put_smb_ses(pSesInfo);
3376/* if (SecurityBlob2->MessageType != NtLm??) { 2599 else
3377 cFYI("Unexpected message type on auth response is %d")); 2600 cifs_put_tcp_session(srvTcp);
3378 } */ 2601 goto out;
3379
3380 if (ses) {
3381 cFYI(1,
3382 ("Check challenge UID %d vs auth response UID %d",
3383 ses->Suid, smb_buffer_response->Uid));
3384 /* UID left in wire format */
3385 ses->Suid = smb_buffer_response->Uid;
3386 bcc_ptr = pByteArea(smb_buffer_response);
3387 /* response can have either 3 or 4 word count - Samba sends 3 */
3388 if ((pSMBr->resp.hdr.WordCount == 3)
3389 || ((pSMBr->resp.hdr.WordCount == 4)
3390 && (blob_len <
3391 pSMBr->resp.ByteCount))) {
3392 if (pSMBr->resp.hdr.WordCount == 4) {
3393 bcc_ptr +=
3394 blob_len;
3395 cFYI(1,
3396 ("Security Blob Length %d ",
3397 blob_len));
3398 }
3399
3400 cFYI(1,
3401 ("NTLMSSP response to Authenticate "));
3402
3403 if (smb_buffer->Flags2 & SMBFLG2_UNICODE) {
3404 if ((long) (bcc_ptr) % 2) {
3405 remaining_words =
3406 (BCC(smb_buffer_response)
3407 - 1) / 2;
3408 bcc_ptr++; /* Unicode strings must be word aligned */
3409 } else {
3410 remaining_words = BCC(smb_buffer_response) / 2;
3411 }
3412 len = UniStrnlen((wchar_t *) bcc_ptr,
3413 remaining_words - 1);
3414/* We look for obvious messed up bcc or strings in response so we do not go off
3415 the end since (at least) WIN2K and Windows XP have a major bug in not null
3416 terminating last Unicode string in response */
3417 if (ses->serverOS)
3418 kfree(ses->serverOS);
3419 ses->serverOS =
3420 kzalloc(2 * (len + 1), GFP_KERNEL);
3421 cifs_strfromUCS_le(ses->serverOS,
3422 (__le16 *)
3423 bcc_ptr, len,
3424 nls_codepage);
3425 bcc_ptr += 2 * (len + 1);
3426 remaining_words -= len + 1;
3427 ses->serverOS[2 * len] = 0;
3428 ses->serverOS[1 + (2 * len)] = 0;
3429 if (remaining_words > 0) {
3430 len = UniStrnlen((wchar_t *)
3431 bcc_ptr,
3432 remaining_words
3433 - 1);
3434 kfree(ses->serverNOS);
3435 ses->serverNOS =
3436 kzalloc(2 * (len + 1),
3437 GFP_KERNEL);
3438 cifs_strfromUCS_le(ses->
3439 serverNOS,
3440 (__le16 *)
3441 bcc_ptr,
3442 len,
3443 nls_codepage);
3444 bcc_ptr += 2 * (len + 1);
3445 ses->serverNOS[2 * len] = 0;
3446 ses->serverNOS[1+(2*len)] = 0;
3447 remaining_words -= len + 1;
3448 if (remaining_words > 0) {
3449 len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words);
3450 /* last string not always null terminated (e.g. for Windows XP & 2000) */
3451 if (ses->serverDomain)
3452 kfree(ses->serverDomain);
3453 ses->serverDomain =
3454 kzalloc(2 *
3455 (len +
3456 1),
3457 GFP_KERNEL);
3458 cifs_strfromUCS_le
3459 (ses->
3460 serverDomain,
3461 (__le16 *)
3462 bcc_ptr, len,
3463 nls_codepage);
3464 bcc_ptr +=
3465 2 * (len + 1);
3466 ses->
3467 serverDomain[2
3468 * len]
3469 = 0;
3470 ses->
3471 serverDomain[1
3472 +
3473 (2
3474 *
3475 len)]
3476 = 0;
3477 } /* else no more room so create dummy domain string */
3478 else {
3479 if (ses->serverDomain)
3480 kfree(ses->serverDomain);
3481 ses->serverDomain = kzalloc(2,GFP_KERNEL);
3482 }
3483 } else { /* no room so create dummy domain and NOS string */
3484 if (ses->serverDomain)
3485 kfree(ses->serverDomain);
3486 ses->serverDomain = kzalloc(2, GFP_KERNEL);
3487 kfree(ses->serverNOS);
3488 ses->serverNOS = kzalloc(2, GFP_KERNEL);
3489 }
3490 } else { /* ASCII */
3491 len = strnlen(bcc_ptr, 1024);
3492 if (((long) bcc_ptr + len) -
3493 (long) pByteArea(smb_buffer_response)
3494 <= BCC(smb_buffer_response)) {
3495 if (ses->serverOS)
3496 kfree(ses->serverOS);
3497 ses->serverOS = kzalloc(len + 1, GFP_KERNEL);
3498 strncpy(ses->serverOS,bcc_ptr, len);
3499
3500 bcc_ptr += len;
3501 bcc_ptr[0] = 0; /* null terminate the string */
3502 bcc_ptr++;
3503
3504 len = strnlen(bcc_ptr, 1024);
3505 kfree(ses->serverNOS);
3506 ses->serverNOS = kzalloc(len+1,
3507 GFP_KERNEL);
3508 strncpy(ses->serverNOS,
3509 bcc_ptr, len);
3510 bcc_ptr += len;
3511 bcc_ptr[0] = 0;
3512 bcc_ptr++;
3513
3514 len = strnlen(bcc_ptr, 1024);
3515 if (ses->serverDomain)
3516 kfree(ses->serverDomain);
3517 ses->serverDomain =
3518 kzalloc(len+1,
3519 GFP_KERNEL);
3520 strncpy(ses->serverDomain,
3521 bcc_ptr, len);
3522 bcc_ptr += len;
3523 bcc_ptr[0] = 0;
3524 bcc_ptr++;
3525 } else
3526 cFYI(1, ("field of length %d "
3527 "extends beyond end of smb ",
3528 len));
3529 }
3530 } else {
3531 cERROR(1, ("Security Blob extends beyond end "
3532 "of SMB"));
3533 }
3534 } else {
3535 cERROR(1, ("No session structure passed in."));
3536 }
3537 } else {
3538 cERROR(1, ("Invalid Word count %d: ",
3539 smb_buffer_response->WordCount));
3540 rc = -EIO;
3541 } 2602 }
3542 2603
3543 cifs_buf_release(smb_buffer); 2604 /* volume_info->password is freed above when existing session found
3544 2605 (in which case it is not needed anymore) but when new sesion is created
2606 the password ptr is put in the new session structure (in which case the
2607 password will be freed at unmount time) */
2608out:
2609 /* zero out password before freeing */
2610 cleanup_volume_info(&volume_info);
2611 FreeXid(xid);
3545 return rc; 2612 return rc;
3546} 2613}
3547 2614
@@ -3556,7 +2623,7 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3556 TCONX_RSP *pSMBr; 2623 TCONX_RSP *pSMBr;
3557 unsigned char *bcc_ptr; 2624 unsigned char *bcc_ptr;
3558 int rc = 0; 2625 int rc = 0;
3559 int length; 2626 int length, bytes_left;
3560 __u16 count; 2627 __u16 count;
3561 2628
3562 if (ses == NULL) 2629 if (ses == NULL)
@@ -3644,14 +2711,22 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3644 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length, 2711 rc = SendReceive(xid, ses, smb_buffer, smb_buffer_response, &length,
3645 CIFS_STD_OP); 2712 CIFS_STD_OP);
3646 2713
3647 /* if (rc) rc = map_smb_to_linux_error(smb_buffer_response); */
3648 /* above now done in SendReceive */ 2714 /* above now done in SendReceive */
3649 if ((rc == 0) && (tcon != NULL)) { 2715 if ((rc == 0) && (tcon != NULL)) {
2716 bool is_unicode;
2717
3650 tcon->tidStatus = CifsGood; 2718 tcon->tidStatus = CifsGood;
3651 tcon->need_reconnect = false; 2719 tcon->need_reconnect = false;
3652 tcon->tid = smb_buffer_response->Tid; 2720 tcon->tid = smb_buffer_response->Tid;
3653 bcc_ptr = pByteArea(smb_buffer_response); 2721 bcc_ptr = pByteArea(smb_buffer_response);
3654 length = strnlen(bcc_ptr, BCC(smb_buffer_response) - 2); 2722 bytes_left = BCC(smb_buffer_response);
2723 length = strnlen(bcc_ptr, bytes_left - 2);
2724 if (smb_buffer->Flags2 & SMBFLG2_UNICODE)
2725 is_unicode = true;
2726 else
2727 is_unicode = false;
2728
2729
3655 /* skip service field (NB: this field is always ASCII) */ 2730 /* skip service field (NB: this field is always ASCII) */
3656 if (length == 3) { 2731 if (length == 3) {
3657 if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') && 2732 if ((bcc_ptr[0] == 'I') && (bcc_ptr[1] == 'P') &&
@@ -3666,40 +2741,16 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3666 } 2741 }
3667 } 2742 }
3668 bcc_ptr += length + 1; 2743 bcc_ptr += length + 1;
2744 bytes_left -= (length + 1);
3669 strncpy(tcon->treeName, tree, MAX_TREE_SIZE); 2745 strncpy(tcon->treeName, tree, MAX_TREE_SIZE);
3670 if (smb_buffer->Flags2 & SMBFLG2_UNICODE) { 2746
3671 length = UniStrnlen((wchar_t *) bcc_ptr, 512); 2747 /* mostly informational -- no need to fail on error here */
3672 if ((bcc_ptr + (2 * length)) - 2748 tcon->nativeFileSystem = cifs_strndup_from_ucs(bcc_ptr,
3673 pByteArea(smb_buffer_response) <= 2749 bytes_left, is_unicode,
3674 BCC(smb_buffer_response)) { 2750 nls_codepage);
3675 kfree(tcon->nativeFileSystem); 2751
3676 tcon->nativeFileSystem = 2752 cFYI(1, ("nativeFileSystem=%s", tcon->nativeFileSystem));
3677 kzalloc(2*(length + 1), GFP_KERNEL); 2753
3678 if (tcon->nativeFileSystem)
3679 cifs_strfromUCS_le(
3680 tcon->nativeFileSystem,
3681 (__le16 *) bcc_ptr,
3682 length, nls_codepage);
3683 bcc_ptr += 2 * length;
3684 bcc_ptr[0] = 0; /* null terminate the string */
3685 bcc_ptr[1] = 0;
3686 bcc_ptr += 2;
3687 }
3688 /* else do not bother copying these information fields*/
3689 } else {
3690 length = strnlen(bcc_ptr, 1024);
3691 if ((bcc_ptr + length) -
3692 pByteArea(smb_buffer_response) <=
3693 BCC(smb_buffer_response)) {
3694 kfree(tcon->nativeFileSystem);
3695 tcon->nativeFileSystem =
3696 kzalloc(length + 1, GFP_KERNEL);
3697 if (tcon->nativeFileSystem)
3698 strncpy(tcon->nativeFileSystem, bcc_ptr,
3699 length);
3700 }
3701 /* else do not bother copying these information fields*/
3702 }
3703 if ((smb_buffer_response->WordCount == 3) || 2754 if ((smb_buffer_response->WordCount == 3) ||
3704 (smb_buffer_response->WordCount == 7)) 2755 (smb_buffer_response->WordCount == 7))
3705 /* field is in same location */ 2756 /* field is in same location */
@@ -3738,8 +2789,6 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3738 struct nls_table *nls_info) 2789 struct nls_table *nls_info)
3739{ 2790{
3740 int rc = 0; 2791 int rc = 0;
3741 char ntlm_session_key[CIFS_SESS_KEY_SIZE];
3742 bool ntlmv2_flag = false;
3743 int first_time = 0; 2792 int first_time = 0;
3744 struct TCP_Server_Info *server = pSesInfo->server; 2793 struct TCP_Server_Info *server = pSesInfo->server;
3745 2794
@@ -3771,83 +2820,19 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3771 pSesInfo->capabilities = server->capabilities; 2820 pSesInfo->capabilities = server->capabilities;
3772 if (linuxExtEnabled == 0) 2821 if (linuxExtEnabled == 0)
3773 pSesInfo->capabilities &= (~CAP_UNIX); 2822 pSesInfo->capabilities &= (~CAP_UNIX);
3774 /* pSesInfo->sequence_number = 0;*/ 2823
3775 cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", 2824 cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
3776 server->secMode, server->capabilities, server->timeAdj)); 2825 server->secMode, server->capabilities, server->timeAdj));
3777 2826
3778 if (experimEnabled < 2) 2827 rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info);
3779 rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info);
3780 else if (extended_security
3781 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3782 && (server->secType == NTLMSSP)) {
3783 rc = -EOPNOTSUPP;
3784 } else if (extended_security
3785 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3786 && (server->secType == RawNTLMSSP)) {
3787 cFYI(1, ("NTLMSSP sesssetup"));
3788 rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag,
3789 nls_info);
3790 if (!rc) {
3791 if (ntlmv2_flag) {
3792 char *v2_response;
3793 cFYI(1, ("more secure NTLM ver2 hash"));
3794 if (CalcNTLMv2_partial_mac_key(pSesInfo,
3795 nls_info)) {
3796 rc = -ENOMEM;
3797 goto ss_err_exit;
3798 } else
3799 v2_response = kmalloc(16 + 64 /* blob*/,
3800 GFP_KERNEL);
3801 if (v2_response) {
3802 CalcNTLMv2_response(pSesInfo,
3803 v2_response);
3804 /* if (first_time)
3805 cifs_calculate_ntlmv2_mac_key */
3806 kfree(v2_response);
3807 /* BB Put dummy sig in SessSetup PDU? */
3808 } else {
3809 rc = -ENOMEM;
3810 goto ss_err_exit;
3811 }
3812
3813 } else {
3814 SMBNTencrypt(pSesInfo->password,
3815 server->cryptKey,
3816 ntlm_session_key);
3817
3818 if (first_time)
3819 cifs_calculate_mac_key(
3820 &server->mac_signing_key,
3821 ntlm_session_key,
3822 pSesInfo->password);
3823 }
3824 /* for better security the weaker lanman hash not sent
3825 in AuthSessSetup so we no longer calculate it */
3826
3827 rc = CIFSNTLMSSPAuthSessSetup(xid, pSesInfo,
3828 ntlm_session_key,
3829 ntlmv2_flag,
3830 nls_info);
3831 }
3832 } else { /* old style NTLM 0.12 session setup */
3833 SMBNTencrypt(pSesInfo->password, server->cryptKey,
3834 ntlm_session_key);
3835
3836 if (first_time)
3837 cifs_calculate_mac_key(&server->mac_signing_key,
3838 ntlm_session_key,
3839 pSesInfo->password);
3840
3841 rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info);
3842 }
3843 if (rc) { 2828 if (rc) {
3844 cERROR(1, ("Send error in SessSetup = %d", rc)); 2829 cERROR(1, ("Send error in SessSetup = %d", rc));
3845 } else { 2830 } else {
3846 cFYI(1, ("CIFS Session Established successfully")); 2831 cFYI(1, ("CIFS Session Established successfully"));
3847 spin_lock(&GlobalMid_Lock); 2832 spin_lock(&GlobalMid_Lock);
3848 pSesInfo->status = CifsGood; 2833 pSesInfo->status = CifsGood;
3849 pSesInfo->need_reconnect = false; 2834 pSesInfo->need_reconnect = false;
3850 spin_unlock(&GlobalMid_Lock); 2835 spin_unlock(&GlobalMid_Lock);
3851 } 2836 }
3852 2837
3853ss_err_exit: 2838ss_err_exit:
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 54dce78fbb73..11431ed72a7f 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -129,12 +129,62 @@ cifs_bp_rename_retry:
129 return full_path; 129 return full_path;
130} 130}
131 131
132static void
133cifs_fill_fileinfo(struct inode *newinode, __u16 fileHandle,
134 struct cifsTconInfo *tcon, bool write_only)
135{
136 int oplock = 0;
137 struct cifsFileInfo *pCifsFile;
138 struct cifsInodeInfo *pCifsInode;
139
140 pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
141
142 if (pCifsFile == NULL)
143 return;
144
145 if (oplockEnabled)
146 oplock = REQ_OPLOCK;
147
148 pCifsFile->netfid = fileHandle;
149 pCifsFile->pid = current->tgid;
150 pCifsFile->pInode = newinode;
151 pCifsFile->invalidHandle = false;
152 pCifsFile->closePend = false;
153 mutex_init(&pCifsFile->fh_mutex);
154 mutex_init(&pCifsFile->lock_mutex);
155 INIT_LIST_HEAD(&pCifsFile->llist);
156 atomic_set(&pCifsFile->wrtPending, 0);
157
158 /* set the following in open now
159 pCifsFile->pfile = file; */
160 write_lock(&GlobalSMBSeslock);
161 list_add(&pCifsFile->tlist, &tcon->openFileList);
162 pCifsInode = CIFS_I(newinode);
163 if (pCifsInode) {
164 /* if readable file instance put first in list*/
165 if (write_only)
166 list_add_tail(&pCifsFile->flist,
167 &pCifsInode->openFileList);
168 else
169 list_add(&pCifsFile->flist, &pCifsInode->openFileList);
170
171 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
172 pCifsInode->clientCanCacheAll = true;
173 pCifsInode->clientCanCacheRead = true;
174 cFYI(1, ("Exclusive Oplock inode %p", newinode));
175 } else if ((oplock & 0xF) == OPLOCK_READ)
176 pCifsInode->clientCanCacheRead = true;
177 }
178 write_unlock(&GlobalSMBSeslock);
179}
180
132int cifs_posix_open(char *full_path, struct inode **pinode, 181int cifs_posix_open(char *full_path, struct inode **pinode,
133 struct super_block *sb, int mode, int oflags, 182 struct super_block *sb, int mode, int oflags,
134 int *poplock, __u16 *pnetfid, int xid) 183 int *poplock, __u16 *pnetfid, int xid)
135{ 184{
136 int rc; 185 int rc;
137 __u32 oplock; 186 __u32 oplock;
187 bool write_only = false;
138 FILE_UNIX_BASIC_INFO *presp_data; 188 FILE_UNIX_BASIC_INFO *presp_data;
139 __u32 posix_flags = 0; 189 __u32 posix_flags = 0;
140 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 190 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -172,6 +222,8 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
172 if (oflags & O_DIRECT) 222 if (oflags & O_DIRECT)
173 posix_flags |= SMB_O_DIRECT; 223 posix_flags |= SMB_O_DIRECT;
174 224
225 if (!(oflags & FMODE_READ))
226 write_only = true;
175 227
176 rc = CIFSPOSIXCreate(xid, cifs_sb->tcon, posix_flags, mode, 228 rc = CIFSPOSIXCreate(xid, cifs_sb->tcon, posix_flags, mode,
177 pnetfid, presp_data, &oplock, full_path, 229 pnetfid, presp_data, &oplock, full_path,
@@ -187,8 +239,10 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
187 if (!pinode) 239 if (!pinode)
188 goto posix_open_ret; /* caller does not need info */ 240 goto posix_open_ret; /* caller does not need info */
189 241
190 if (*pinode == NULL) 242 if (*pinode == NULL) {
191 *pinode = cifs_new_inode(sb, &presp_data->UniqueId); 243 __u64 unique_id = le64_to_cpu(presp_data->UniqueId);
244 *pinode = cifs_new_inode(sb, &unique_id);
245 }
192 /* else an inode was passed in. Update its info, don't create one */ 246 /* else an inode was passed in. Update its info, don't create one */
193 247
194 /* We do not need to close the file if new_inode fails since 248 /* We do not need to close the file if new_inode fails since
@@ -198,6 +252,8 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
198 252
199 posix_fill_in_inode(*pinode, presp_data, 1); 253 posix_fill_in_inode(*pinode, presp_data, 1);
200 254
255 cifs_fill_fileinfo(*pinode, *pnetfid, cifs_sb->tcon, write_only);
256
201posix_open_ret: 257posix_open_ret:
202 kfree(presp_data); 258 kfree(presp_data);
203 return rc; 259 return rc;
@@ -225,6 +281,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
225 int create_options = CREATE_NOT_DIR; 281 int create_options = CREATE_NOT_DIR;
226 int oplock = 0; 282 int oplock = 0;
227 int oflags; 283 int oflags;
284 bool posix_create = false;
228 /* 285 /*
229 * BB below access is probably too much for mknod to request 286 * BB below access is probably too much for mknod to request
230 * but we have to do query and setpathinfo so requesting 287 * but we have to do query and setpathinfo so requesting
@@ -239,7 +296,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
239 char *full_path = NULL; 296 char *full_path = NULL;
240 FILE_ALL_INFO *buf = NULL; 297 FILE_ALL_INFO *buf = NULL;
241 struct inode *newinode = NULL; 298 struct inode *newinode = NULL;
242 struct cifsInodeInfo *pCifsInode;
243 int disposition = FILE_OVERWRITE_IF; 299 int disposition = FILE_OVERWRITE_IF;
244 bool write_only = false; 300 bool write_only = false;
245 301
@@ -273,11 +329,13 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
273 negotation. EREMOTE indicates DFS junction, which is not 329 negotation. EREMOTE indicates DFS junction, which is not
274 handled in posix open */ 330 handled in posix open */
275 331
276 if ((rc == 0) && (newinode == NULL)) 332 if (rc == 0) {
277 goto cifs_create_get_file_info; /* query inode info */ 333 posix_create = true;
278 else if (rc == 0) /* success, no need to query */ 334 if (newinode == NULL) /* query inode info */
279 goto cifs_create_set_dentry; 335 goto cifs_create_get_file_info;
280 else if ((rc != -EIO) && (rc != -EREMOTE) && 336 else /* success, no need to query */
337 goto cifs_create_set_dentry;
338 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
281 (rc != -EOPNOTSUPP)) /* path not found or net err */ 339 (rc != -EOPNOTSUPP)) /* path not found or net err */
282 goto cifs_create_out; 340 goto cifs_create_out;
283 /* else fallthrough to retry, using older open call, this is 341 /* else fallthrough to retry, using older open call, this is
@@ -409,45 +467,9 @@ cifs_create_set_dentry:
409 if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) { 467 if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) {
410 /* mknod case - do not leave file open */ 468 /* mknod case - do not leave file open */
411 CIFSSMBClose(xid, tcon, fileHandle); 469 CIFSSMBClose(xid, tcon, fileHandle);
412 } else if (newinode) { 470 } else if (!(posix_create) && (newinode)) {
413 struct cifsFileInfo *pCifsFile = 471 cifs_fill_fileinfo(newinode, fileHandle,
414 kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); 472 cifs_sb->tcon, write_only);
415
416 if (pCifsFile == NULL)
417 goto cifs_create_out;
418 pCifsFile->netfid = fileHandle;
419 pCifsFile->pid = current->tgid;
420 pCifsFile->pInode = newinode;
421 pCifsFile->invalidHandle = false;
422 pCifsFile->closePend = false;
423 init_MUTEX(&pCifsFile->fh_sem);
424 mutex_init(&pCifsFile->lock_mutex);
425 INIT_LIST_HEAD(&pCifsFile->llist);
426 atomic_set(&pCifsFile->wrtPending, 0);
427
428 /* set the following in open now
429 pCifsFile->pfile = file; */
430 write_lock(&GlobalSMBSeslock);
431 list_add(&pCifsFile->tlist, &tcon->openFileList);
432 pCifsInode = CIFS_I(newinode);
433 if (pCifsInode) {
434 /* if readable file instance put first in list*/
435 if (write_only) {
436 list_add_tail(&pCifsFile->flist,
437 &pCifsInode->openFileList);
438 } else {
439 list_add(&pCifsFile->flist,
440 &pCifsInode->openFileList);
441 }
442 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
443 pCifsInode->clientCanCacheAll = true;
444 pCifsInode->clientCanCacheRead = true;
445 cFYI(1, ("Exclusive Oplock inode %p",
446 newinode));
447 } else if ((oplock & 0xF) == OPLOCK_READ)
448 pCifsInode->clientCanCacheRead = true;
449 }
450 write_unlock(&GlobalSMBSeslock);
451 } 473 }
452cifs_create_out: 474cifs_create_out:
453 kfree(buf); 475 kfree(buf);
@@ -580,17 +602,21 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
580 return rc; 602 return rc;
581} 603}
582 604
583
584struct dentry * 605struct dentry *
585cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, 606cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
586 struct nameidata *nd) 607 struct nameidata *nd)
587{ 608{
588 int xid; 609 int xid;
589 int rc = 0; /* to get around spurious gcc warning, set to zero here */ 610 int rc = 0; /* to get around spurious gcc warning, set to zero here */
611 int oplock = 0;
612 int mode;
613 __u16 fileHandle = 0;
614 bool posix_open = false;
590 struct cifs_sb_info *cifs_sb; 615 struct cifs_sb_info *cifs_sb;
591 struct cifsTconInfo *pTcon; 616 struct cifsTconInfo *pTcon;
592 struct inode *newInode = NULL; 617 struct inode *newInode = NULL;
593 char *full_path = NULL; 618 char *full_path = NULL;
619 struct file *filp;
594 620
595 xid = GetXid(); 621 xid = GetXid();
596 622
@@ -632,12 +658,37 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
632 } 658 }
633 cFYI(1, ("Full path: %s inode = 0x%p", full_path, direntry->d_inode)); 659 cFYI(1, ("Full path: %s inode = 0x%p", full_path, direntry->d_inode));
634 660
635 if (pTcon->unix_ext) 661 if (pTcon->unix_ext) {
636 rc = cifs_get_inode_info_unix(&newInode, full_path, 662 if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
637 parent_dir_inode->i_sb, xid); 663 (nd->flags & LOOKUP_OPEN)) {
638 else 664 if (!((nd->intent.open.flags & O_CREAT) &&
665 (nd->intent.open.flags & O_EXCL))) {
666 mode = nd->intent.open.create_mode &
667 ~current_umask();
668 rc = cifs_posix_open(full_path, &newInode,
669 parent_dir_inode->i_sb, mode,
670 nd->intent.open.flags, &oplock,
671 &fileHandle, xid);
672 /*
673 * This code works around a bug in
674 * samba posix open in samba versions 3.3.1
675 * and earlier where create works
676 * but open fails with invalid parameter.
677 * If either of these error codes are
678 * returned, follow the normal lookup.
679 * Otherwise, the error during posix open
680 * is handled.
681 */
682 if ((rc != -EINVAL) && (rc != -EOPNOTSUPP))
683 posix_open = true;
684 }
685 }
686 if (!posix_open)
687 rc = cifs_get_inode_info_unix(&newInode, full_path,
688 parent_dir_inode->i_sb, xid);
689 } else
639 rc = cifs_get_inode_info(&newInode, full_path, NULL, 690 rc = cifs_get_inode_info(&newInode, full_path, NULL,
640 parent_dir_inode->i_sb, xid, NULL); 691 parent_dir_inode->i_sb, xid, NULL);
641 692
642 if ((rc == 0) && (newInode != NULL)) { 693 if ((rc == 0) && (newInode != NULL)) {
643 if (pTcon->nocase) 694 if (pTcon->nocase)
@@ -645,7 +696,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
645 else 696 else
646 direntry->d_op = &cifs_dentry_ops; 697 direntry->d_op = &cifs_dentry_ops;
647 d_add(direntry, newInode); 698 d_add(direntry, newInode);
648 699 if (posix_open)
700 filp = lookup_instantiate_filp(nd, direntry, NULL);
649 /* since paths are not looked up by component - the parent 701 /* since paths are not looked up by component - the parent
650 directories are presumed to be good here */ 702 directories are presumed to be good here */
651 renew_parental_timestamps(direntry); 703 renew_parental_timestamps(direntry);
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 1e0c1bd8f2e4..df4a306f697e 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -78,7 +78,7 @@ dns_resolver_instantiate(struct key *key, const void *data,
78 } 78 }
79 79
80 key->type_data.x[0] = datalen; 80 key->type_data.x[0] = datalen;
81 rcu_assign_pointer(key->payload.data, ip); 81 key->payload.data = ip;
82 82
83 return rc; 83 return rc;
84} 84}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 81747acca4c4..38c06f826575 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -46,7 +46,7 @@ static inline struct cifsFileInfo *cifs_init_private(
46 memset(private_data, 0, sizeof(struct cifsFileInfo)); 46 memset(private_data, 0, sizeof(struct cifsFileInfo));
47 private_data->netfid = netfid; 47 private_data->netfid = netfid;
48 private_data->pid = current->tgid; 48 private_data->pid = current->tgid;
49 init_MUTEX(&private_data->fh_sem); 49 mutex_init(&private_data->fh_mutex);
50 mutex_init(&private_data->lock_mutex); 50 mutex_init(&private_data->lock_mutex);
51 INIT_LIST_HEAD(&private_data->llist); 51 INIT_LIST_HEAD(&private_data->llist);
52 private_data->pfile = file; /* needed for writepage */ 52 private_data->pfile = file; /* needed for writepage */
@@ -129,15 +129,12 @@ static inline int cifs_posix_open_inode_helper(struct inode *inode,
129 struct file *file, struct cifsInodeInfo *pCifsInode, 129 struct file *file, struct cifsInodeInfo *pCifsInode,
130 struct cifsFileInfo *pCifsFile, int oplock, u16 netfid) 130 struct cifsFileInfo *pCifsFile, int oplock, u16 netfid)
131{ 131{
132 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
133/* struct timespec temp; */ /* BB REMOVEME BB */
134 132
135 file->private_data = kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); 133 file->private_data = kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
136 if (file->private_data == NULL) 134 if (file->private_data == NULL)
137 return -ENOMEM; 135 return -ENOMEM;
138 pCifsFile = cifs_init_private(file->private_data, inode, file, netfid); 136 pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
139 write_lock(&GlobalSMBSeslock); 137 write_lock(&GlobalSMBSeslock);
140 list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList);
141 138
142 pCifsInode = CIFS_I(file->f_path.dentry->d_inode); 139 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
143 if (pCifsInode == NULL) { 140 if (pCifsInode == NULL) {
@@ -145,17 +142,6 @@ static inline int cifs_posix_open_inode_helper(struct inode *inode,
145 return -EINVAL; 142 return -EINVAL;
146 } 143 }
147 144
148 /* want handles we can use to read with first
149 in the list so we do not have to walk the
150 list to search for one in write_begin */
151 if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
152 list_add_tail(&pCifsFile->flist,
153 &pCifsInode->openFileList);
154 } else {
155 list_add(&pCifsFile->flist,
156 &pCifsInode->openFileList);
157 }
158
159 if (pCifsInode->clientCanCacheRead) { 145 if (pCifsInode->clientCanCacheRead) {
160 /* we have the inode open somewhere else 146 /* we have the inode open somewhere else
161 no need to discard cache data */ 147 no need to discard cache data */
@@ -284,35 +270,32 @@ int cifs_open(struct inode *inode, struct file *file)
284 cifs_sb = CIFS_SB(inode->i_sb); 270 cifs_sb = CIFS_SB(inode->i_sb);
285 tcon = cifs_sb->tcon; 271 tcon = cifs_sb->tcon;
286 272
287 if (file->f_flags & O_CREAT) { 273 /* search inode for this file and fill in file->private_data */
288 /* search inode for this file and fill in file->private_data */ 274 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
289 pCifsInode = CIFS_I(file->f_path.dentry->d_inode); 275 read_lock(&GlobalSMBSeslock);
290 read_lock(&GlobalSMBSeslock); 276 list_for_each(tmp, &pCifsInode->openFileList) {
291 list_for_each(tmp, &pCifsInode->openFileList) { 277 pCifsFile = list_entry(tmp, struct cifsFileInfo,
292 pCifsFile = list_entry(tmp, struct cifsFileInfo, 278 flist);
293 flist); 279 if ((pCifsFile->pfile == NULL) &&
294 if ((pCifsFile->pfile == NULL) && 280 (pCifsFile->pid == current->tgid)) {
295 (pCifsFile->pid == current->tgid)) { 281 /* mode set in cifs_create */
296 /* mode set in cifs_create */ 282
297 283 /* needed for writepage */
298 /* needed for writepage */ 284 pCifsFile->pfile = file;
299 pCifsFile->pfile = file; 285
300 286 file->private_data = pCifsFile;
301 file->private_data = pCifsFile; 287 break;
302 break;
303 }
304 }
305 read_unlock(&GlobalSMBSeslock);
306 if (file->private_data != NULL) {
307 rc = 0;
308 FreeXid(xid);
309 return rc;
310 } else {
311 if (file->f_flags & O_EXCL)
312 cERROR(1, ("could not find file instance for "
313 "new file %p", file));
314 } 288 }
315 } 289 }
290 read_unlock(&GlobalSMBSeslock);
291
292 if (file->private_data != NULL) {
293 rc = 0;
294 FreeXid(xid);
295 return rc;
296 } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
297 cERROR(1, ("could not find file instance for "
298 "new file %p", file));
316 299
317 full_path = build_path_from_dentry(file->f_path.dentry); 300 full_path = build_path_from_dentry(file->f_path.dentry);
318 if (full_path == NULL) { 301 if (full_path == NULL) {
@@ -500,9 +483,9 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
500 return -EBADF; 483 return -EBADF;
501 484
502 xid = GetXid(); 485 xid = GetXid();
503 down(&pCifsFile->fh_sem); 486 mutex_unlock(&pCifsFile->fh_mutex);
504 if (!pCifsFile->invalidHandle) { 487 if (!pCifsFile->invalidHandle) {
505 up(&pCifsFile->fh_sem); 488 mutex_lock(&pCifsFile->fh_mutex);
506 FreeXid(xid); 489 FreeXid(xid);
507 return 0; 490 return 0;
508 } 491 }
@@ -533,7 +516,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush)
533 if (full_path == NULL) { 516 if (full_path == NULL) {
534 rc = -ENOMEM; 517 rc = -ENOMEM;
535reopen_error_exit: 518reopen_error_exit:
536 up(&pCifsFile->fh_sem); 519 mutex_lock(&pCifsFile->fh_mutex);
537 FreeXid(xid); 520 FreeXid(xid);
538 return rc; 521 return rc;
539 } 522 }
@@ -575,14 +558,14 @@ reopen_error_exit:
575 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 558 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
576 CIFS_MOUNT_MAP_SPECIAL_CHR); 559 CIFS_MOUNT_MAP_SPECIAL_CHR);
577 if (rc) { 560 if (rc) {
578 up(&pCifsFile->fh_sem); 561 mutex_lock(&pCifsFile->fh_mutex);
579 cFYI(1, ("cifs_open returned 0x%x", rc)); 562 cFYI(1, ("cifs_open returned 0x%x", rc));
580 cFYI(1, ("oplock: %d", oplock)); 563 cFYI(1, ("oplock: %d", oplock));
581 } else { 564 } else {
582reopen_success: 565reopen_success:
583 pCifsFile->netfid = netfid; 566 pCifsFile->netfid = netfid;
584 pCifsFile->invalidHandle = false; 567 pCifsFile->invalidHandle = false;
585 up(&pCifsFile->fh_sem); 568 mutex_lock(&pCifsFile->fh_mutex);
586 pCifsInode = CIFS_I(inode); 569 pCifsInode = CIFS_I(inode);
587 if (pCifsInode) { 570 if (pCifsInode) {
588 if (can_flush) { 571 if (can_flush) {
@@ -971,6 +954,40 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
971 return rc; 954 return rc;
972} 955}
973 956
957/*
958 * Set the timeout on write requests past EOF. For some servers (Windows)
959 * these calls can be very long.
960 *
961 * If we're writing >10M past the EOF we give a 180s timeout. Anything less
962 * than that gets a 45s timeout. Writes not past EOF get 15s timeouts.
963 * The 10M cutoff is totally arbitrary. A better scheme for this would be
964 * welcome if someone wants to suggest one.
965 *
966 * We may be able to do a better job with this if there were some way to
967 * declare that a file should be sparse.
968 */
969static int
970cifs_write_timeout(struct cifsInodeInfo *cifsi, loff_t offset)
971{
972 if (offset <= cifsi->server_eof)
973 return CIFS_STD_OP;
974 else if (offset > (cifsi->server_eof + (10 * 1024 * 1024)))
975 return CIFS_VLONG_OP;
976 else
977 return CIFS_LONG_OP;
978}
979
980/* update the file size (if needed) after a write */
981static void
982cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
983 unsigned int bytes_written)
984{
985 loff_t end_of_write = offset + bytes_written;
986
987 if (end_of_write > cifsi->server_eof)
988 cifsi->server_eof = end_of_write;
989}
990
974ssize_t cifs_user_write(struct file *file, const char __user *write_data, 991ssize_t cifs_user_write(struct file *file, const char __user *write_data,
975 size_t write_size, loff_t *poffset) 992 size_t write_size, loff_t *poffset)
976{ 993{
@@ -981,6 +998,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
981 struct cifsTconInfo *pTcon; 998 struct cifsTconInfo *pTcon;
982 int xid, long_op; 999 int xid, long_op;
983 struct cifsFileInfo *open_file; 1000 struct cifsFileInfo *open_file;
1001 struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
984 1002
985 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 1003 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
986 1004
@@ -1000,11 +1018,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
1000 1018
1001 xid = GetXid(); 1019 xid = GetXid();
1002 1020
1003 if (*poffset > file->f_path.dentry->d_inode->i_size) 1021 long_op = cifs_write_timeout(cifsi, *poffset);
1004 long_op = CIFS_VLONG_OP; /* writes past EOF take long time */
1005 else
1006 long_op = CIFS_LONG_OP;
1007
1008 for (total_written = 0; write_size > total_written; 1022 for (total_written = 0; write_size > total_written;
1009 total_written += bytes_written) { 1023 total_written += bytes_written) {
1010 rc = -EAGAIN; 1024 rc = -EAGAIN;
@@ -1048,8 +1062,10 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
1048 FreeXid(xid); 1062 FreeXid(xid);
1049 return rc; 1063 return rc;
1050 } 1064 }
1051 } else 1065 } else {
1066 cifs_update_eof(cifsi, *poffset, bytes_written);
1052 *poffset += bytes_written; 1067 *poffset += bytes_written;
1068 }
1053 long_op = CIFS_STD_OP; /* subsequent writes fast - 1069 long_op = CIFS_STD_OP; /* subsequent writes fast -
1054 15 seconds is plenty */ 1070 15 seconds is plenty */
1055 } 1071 }
@@ -1085,6 +1101,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
1085 struct cifsTconInfo *pTcon; 1101 struct cifsTconInfo *pTcon;
1086 int xid, long_op; 1102 int xid, long_op;
1087 struct cifsFileInfo *open_file; 1103 struct cifsFileInfo *open_file;
1104 struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
1088 1105
1089 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 1106 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1090 1107
@@ -1099,11 +1116,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
1099 1116
1100 xid = GetXid(); 1117 xid = GetXid();
1101 1118
1102 if (*poffset > file->f_path.dentry->d_inode->i_size) 1119 long_op = cifs_write_timeout(cifsi, *poffset);
1103 long_op = CIFS_VLONG_OP; /* writes past EOF can be slow */
1104 else
1105 long_op = CIFS_LONG_OP;
1106
1107 for (total_written = 0; write_size > total_written; 1120 for (total_written = 0; write_size > total_written;
1108 total_written += bytes_written) { 1121 total_written += bytes_written) {
1109 rc = -EAGAIN; 1122 rc = -EAGAIN;
@@ -1166,8 +1179,10 @@ static ssize_t cifs_write(struct file *file, const char *write_data,
1166 FreeXid(xid); 1179 FreeXid(xid);
1167 return rc; 1180 return rc;
1168 } 1181 }
1169 } else 1182 } else {
1183 cifs_update_eof(cifsi, *poffset, bytes_written);
1170 *poffset += bytes_written; 1184 *poffset += bytes_written;
1185 }
1171 long_op = CIFS_STD_OP; /* subsequent writes fast - 1186 long_op = CIFS_STD_OP; /* subsequent writes fast -
1172 15 seconds is plenty */ 1187 15 seconds is plenty */
1173 } 1188 }
@@ -1380,11 +1395,12 @@ static int cifs_writepages(struct address_space *mapping,
1380 int nr_pages; 1395 int nr_pages;
1381 __u64 offset = 0; 1396 __u64 offset = 0;
1382 struct cifsFileInfo *open_file; 1397 struct cifsFileInfo *open_file;
1398 struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
1383 struct page *page; 1399 struct page *page;
1384 struct pagevec pvec; 1400 struct pagevec pvec;
1385 int rc = 0; 1401 int rc = 0;
1386 int scanned = 0; 1402 int scanned = 0;
1387 int xid; 1403 int xid, long_op;
1388 1404
1389 cifs_sb = CIFS_SB(mapping->host->i_sb); 1405 cifs_sb = CIFS_SB(mapping->host->i_sb);
1390 1406
@@ -1528,12 +1544,15 @@ retry:
1528 cERROR(1, ("No writable handles for inode")); 1544 cERROR(1, ("No writable handles for inode"));
1529 rc = -EBADF; 1545 rc = -EBADF;
1530 } else { 1546 } else {
1547 long_op = cifs_write_timeout(cifsi, offset);
1531 rc = CIFSSMBWrite2(xid, cifs_sb->tcon, 1548 rc = CIFSSMBWrite2(xid, cifs_sb->tcon,
1532 open_file->netfid, 1549 open_file->netfid,
1533 bytes_to_write, offset, 1550 bytes_to_write, offset,
1534 &bytes_written, iov, n_iov, 1551 &bytes_written, iov, n_iov,
1535 CIFS_LONG_OP); 1552 long_op);
1536 atomic_dec(&open_file->wrtPending); 1553 atomic_dec(&open_file->wrtPending);
1554 cifs_update_eof(cifsi, offset, bytes_written);
1555
1537 if (rc || bytes_written < bytes_to_write) { 1556 if (rc || bytes_written < bytes_to_write) {
1538 cERROR(1, ("Write2 ret %d, wrote %d", 1557 cERROR(1, ("Write2 ret %d, wrote %d",
1539 rc, bytes_written)); 1558 rc, bytes_written));
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index f121a80fdd6f..9c869a6dcba1 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -143,6 +143,7 @@ static void cifs_unix_info_to_inode(struct inode *inode,
143 143
144 inode->i_nlink = le64_to_cpu(info->Nlinks); 144 inode->i_nlink = le64_to_cpu(info->Nlinks);
145 145
146 cifsInfo->server_eof = end_of_file;
146 spin_lock(&inode->i_lock); 147 spin_lock(&inode->i_lock);
147 if (is_size_safe_to_change(cifsInfo, end_of_file)) { 148 if (is_size_safe_to_change(cifsInfo, end_of_file)) {
148 /* 149 /*
@@ -276,7 +277,8 @@ int cifs_get_inode_info_unix(struct inode **pinode,
276 277
277 /* get new inode */ 278 /* get new inode */
278 if (*pinode == NULL) { 279 if (*pinode == NULL) {
279 *pinode = cifs_new_inode(sb, &find_data.UniqueId); 280 __u64 unique_id = le64_to_cpu(find_data.UniqueId);
281 *pinode = cifs_new_inode(sb, &unique_id);
280 if (*pinode == NULL) { 282 if (*pinode == NULL) {
281 rc = -ENOMEM; 283 rc = -ENOMEM;
282 goto cgiiu_exit; 284 goto cgiiu_exit;
@@ -605,12 +607,12 @@ int cifs_get_inode_info(struct inode **pinode,
605 inode->i_mode |= S_IFREG; 607 inode->i_mode |= S_IFREG;
606 } 608 }
607 609
610 cifsInfo->server_eof = le64_to_cpu(pfindData->EndOfFile);
608 spin_lock(&inode->i_lock); 611 spin_lock(&inode->i_lock);
609 if (is_size_safe_to_change(cifsInfo, 612 if (is_size_safe_to_change(cifsInfo, cifsInfo->server_eof)) {
610 le64_to_cpu(pfindData->EndOfFile))) {
611 /* can not safely shrink the file size here if the 613 /* can not safely shrink the file size here if the
612 client is writing to it due to potential races */ 614 client is writing to it due to potential races */
613 i_size_write(inode, le64_to_cpu(pfindData->EndOfFile)); 615 i_size_write(inode, cifsInfo->server_eof);
614 616
615 /* 512 bytes (2**9) is the fake blocksize that must be 617 /* 512 bytes (2**9) is the fake blocksize that must be
616 used for this calculation */ 618 used for this calculation */
@@ -960,13 +962,21 @@ undo_setattr:
960 goto out_close; 962 goto out_close;
961} 963}
962 964
965
966/*
967 * If dentry->d_inode is null (usually meaning the cached dentry
968 * is a negative dentry) then we would attempt a standard SMB delete, but
969 * if that fails we can not attempt the fall back mechanisms on EACESS
970 * but will return the EACESS to the caller. Note that the VFS does not call
971 * unlink on negative dentries currently.
972 */
963int cifs_unlink(struct inode *dir, struct dentry *dentry) 973int cifs_unlink(struct inode *dir, struct dentry *dentry)
964{ 974{
965 int rc = 0; 975 int rc = 0;
966 int xid; 976 int xid;
967 char *full_path = NULL; 977 char *full_path = NULL;
968 struct inode *inode = dentry->d_inode; 978 struct inode *inode = dentry->d_inode;
969 struct cifsInodeInfo *cifsInode = CIFS_I(inode); 979 struct cifsInodeInfo *cifs_inode;
970 struct super_block *sb = dir->i_sb; 980 struct super_block *sb = dir->i_sb;
971 struct cifs_sb_info *cifs_sb = CIFS_SB(sb); 981 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
972 struct cifsTconInfo *tcon = cifs_sb->tcon; 982 struct cifsTconInfo *tcon = cifs_sb->tcon;
@@ -1010,7 +1020,7 @@ psx_del_no_retry:
1010 rc = cifs_rename_pending_delete(full_path, dentry, xid); 1020 rc = cifs_rename_pending_delete(full_path, dentry, xid);
1011 if (rc == 0) 1021 if (rc == 0)
1012 drop_nlink(inode); 1022 drop_nlink(inode);
1013 } else if (rc == -EACCES && dosattr == 0) { 1023 } else if ((rc == -EACCES) && (dosattr == 0) && inode) {
1014 attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); 1024 attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
1015 if (attrs == NULL) { 1025 if (attrs == NULL) {
1016 rc = -ENOMEM; 1026 rc = -ENOMEM;
@@ -1018,7 +1028,8 @@ psx_del_no_retry:
1018 } 1028 }
1019 1029
1020 /* try to reset dos attributes */ 1030 /* try to reset dos attributes */
1021 origattr = cifsInode->cifsAttrs; 1031 cifs_inode = CIFS_I(inode);
1032 origattr = cifs_inode->cifsAttrs;
1022 if (origattr == 0) 1033 if (origattr == 0)
1023 origattr |= ATTR_NORMAL; 1034 origattr |= ATTR_NORMAL;
1024 dosattr = origattr & ~ATTR_READONLY; 1035 dosattr = origattr & ~ATTR_READONLY;
@@ -1039,13 +1050,13 @@ psx_del_no_retry:
1039 1050
1040out_reval: 1051out_reval:
1041 if (inode) { 1052 if (inode) {
1042 cifsInode = CIFS_I(inode); 1053 cifs_inode = CIFS_I(inode);
1043 cifsInode->time = 0; /* will force revalidate to get info 1054 cifs_inode->time = 0; /* will force revalidate to get info
1044 when needed */ 1055 when needed */
1045 inode->i_ctime = current_fs_time(sb); 1056 inode->i_ctime = current_fs_time(sb);
1046 } 1057 }
1047 dir->i_ctime = dir->i_mtime = current_fs_time(sb); 1058 dir->i_ctime = dir->i_mtime = current_fs_time(sb);
1048 cifsInode = CIFS_I(dir); 1059 cifs_inode = CIFS_I(dir);
1049 CIFS_I(dir)->time = 0; /* force revalidate of dir as well */ 1060 CIFS_I(dir)->time = 0; /* force revalidate of dir as well */
1050 1061
1051 kfree(full_path); 1062 kfree(full_path);
@@ -1138,6 +1149,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1138 cFYI(1, ("posix mkdir returned 0x%x", rc)); 1149 cFYI(1, ("posix mkdir returned 0x%x", rc));
1139 d_drop(direntry); 1150 d_drop(direntry);
1140 } else { 1151 } else {
1152 __u64 unique_id;
1141 if (pInfo->Type == cpu_to_le32(-1)) { 1153 if (pInfo->Type == cpu_to_le32(-1)) {
1142 /* no return info, go query for it */ 1154 /* no return info, go query for it */
1143 kfree(pInfo); 1155 kfree(pInfo);
@@ -1151,8 +1163,8 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1151 else 1163 else
1152 direntry->d_op = &cifs_dentry_ops; 1164 direntry->d_op = &cifs_dentry_ops;
1153 1165
1154 newinode = cifs_new_inode(inode->i_sb, 1166 unique_id = le64_to_cpu(pInfo->UniqueId);
1155 &pInfo->UniqueId); 1167 newinode = cifs_new_inode(inode->i_sb, &unique_id);
1156 if (newinode == NULL) { 1168 if (newinode == NULL) {
1157 kfree(pInfo); 1169 kfree(pInfo);
1158 goto mkdir_get_info; 1170 goto mkdir_get_info;
@@ -1450,7 +1462,8 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
1450 checking the UniqueId via FILE_INTERNAL_INFO */ 1462 checking the UniqueId via FILE_INTERNAL_INFO */
1451 1463
1452unlink_target: 1464unlink_target:
1453 if ((rc == -EACCES) || (rc == -EEXIST)) { 1465 /* Try unlinking the target dentry if it's not negative */
1466 if (target_dentry->d_inode && (rc == -EACCES || rc == -EEXIST)) {
1454 tmprc = cifs_unlink(target_dir, target_dentry); 1467 tmprc = cifs_unlink(target_dir, target_dentry);
1455 if (tmprc) 1468 if (tmprc)
1456 goto cifs_rename_exit; 1469 goto cifs_rename_exit;
@@ -1753,6 +1766,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1753 } 1766 }
1754 1767
1755 if (rc == 0) { 1768 if (rc == 0) {
1769 cifsInode->server_eof = attrs->ia_size;
1756 rc = cifs_vmtruncate(inode, attrs->ia_size); 1770 rc = cifs_vmtruncate(inode, attrs->ia_size);
1757 cifs_truncate_page(inode->i_mapping, inode->i_size); 1771 cifs_truncate_page(inode->i_mapping, inode->i_size);
1758 } 1772 }
@@ -1792,20 +1806,21 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1792 goto out; 1806 goto out;
1793 } 1807 }
1794 1808
1795 if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) { 1809 /*
1796 /* 1810 * Attempt to flush data before changing attributes. We need to do
1797 Flush data before changing file size or changing the last 1811 * this for ATTR_SIZE and ATTR_MTIME for sure, and if we change the
1798 write time of the file on the server. If the 1812 * ownership or mode then we may also need to do this. Here, we take
1799 flush returns error, store it to report later and continue. 1813 * the safe way out and just do the flush on all setattr requests. If
1800 BB: This should be smarter. Why bother flushing pages that 1814 * the flush returns error, store it to report later and continue.
1801 will be truncated anyway? Also, should we error out here if 1815 *
1802 the flush returns error? 1816 * BB: This should be smarter. Why bother flushing pages that
1803 */ 1817 * will be truncated anyway? Also, should we error out here if
1804 rc = filemap_write_and_wait(inode->i_mapping); 1818 * the flush returns error?
1805 if (rc != 0) { 1819 */
1806 cifsInode->write_behind_rc = rc; 1820 rc = filemap_write_and_wait(inode->i_mapping);
1807 rc = 0; 1821 if (rc != 0) {
1808 } 1822 cifsInode->write_behind_rc = rc;
1823 rc = 0;
1809 } 1824 }
1810 1825
1811 if (attrs->ia_valid & ATTR_SIZE) { 1826 if (attrs->ia_valid & ATTR_SIZE) {
@@ -1903,20 +1918,21 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
1903 return -ENOMEM; 1918 return -ENOMEM;
1904 } 1919 }
1905 1920
1906 if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) { 1921 /*
1907 /* 1922 * Attempt to flush data before changing attributes. We need to do
1908 Flush data before changing file size or changing the last 1923 * this for ATTR_SIZE and ATTR_MTIME for sure, and if we change the
1909 write time of the file on the server. If the 1924 * ownership or mode then we may also need to do this. Here, we take
1910 flush returns error, store it to report later and continue. 1925 * the safe way out and just do the flush on all setattr requests. If
1911 BB: This should be smarter. Why bother flushing pages that 1926 * the flush returns error, store it to report later and continue.
1912 will be truncated anyway? Also, should we error out here if 1927 *
1913 the flush returns error? 1928 * BB: This should be smarter. Why bother flushing pages that
1914 */ 1929 * will be truncated anyway? Also, should we error out here if
1915 rc = filemap_write_and_wait(inode->i_mapping); 1930 * the flush returns error?
1916 if (rc != 0) { 1931 */
1917 cifsInode->write_behind_rc = rc; 1932 rc = filemap_write_and_wait(inode->i_mapping);
1918 rc = 0; 1933 if (rc != 0) {
1919 } 1934 cifsInode->write_behind_rc = rc;
1935 rc = 0;
1920 } 1936 }
1921 1937
1922 if (attrs->ia_valid & ATTR_SIZE) { 1938 if (attrs->ia_valid & ATTR_SIZE) {
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 63f644000ce5..ea9d11e3dcbb 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -119,16 +119,11 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
119 full_path = build_path_from_dentry(direntry); 119 full_path = build_path_from_dentry(direntry);
120 120
121 if (!full_path) 121 if (!full_path)
122 goto out_no_free; 122 goto out;
123 123
124 cFYI(1, ("Full path: %s inode = 0x%p", full_path, inode)); 124 cFYI(1, ("Full path: %s inode = 0x%p", full_path, inode));
125 cifs_sb = CIFS_SB(inode->i_sb); 125 cifs_sb = CIFS_SB(inode->i_sb);
126 pTcon = cifs_sb->tcon; 126 pTcon = cifs_sb->tcon;
127 target_path = kmalloc(PATH_MAX, GFP_KERNEL);
128 if (!target_path) {
129 target_path = ERR_PTR(-ENOMEM);
130 goto out;
131 }
132 127
133 /* We could change this to: 128 /* We could change this to:
134 if (pTcon->unix_ext) 129 if (pTcon->unix_ext)
@@ -138,8 +133,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
138 133
139 if (pTcon->ses->capabilities & CAP_UNIX) 134 if (pTcon->ses->capabilities & CAP_UNIX)
140 rc = CIFSSMBUnixQuerySymLink(xid, pTcon, full_path, 135 rc = CIFSSMBUnixQuerySymLink(xid, pTcon, full_path,
141 target_path, 136 &target_path,
142 PATH_MAX-1,
143 cifs_sb->local_nls); 137 cifs_sb->local_nls);
144 else { 138 else {
145 /* BB add read reparse point symlink code here */ 139 /* BB add read reparse point symlink code here */
@@ -148,22 +142,16 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
148 /* BB Add MAC style xsymlink check here if enabled */ 142 /* BB Add MAC style xsymlink check here if enabled */
149 } 143 }
150 144
151 if (rc == 0) { 145 if (rc != 0) {
152
153/* BB Add special case check for Samba DFS symlinks */
154
155 target_path[PATH_MAX-1] = 0;
156 } else {
157 kfree(target_path); 146 kfree(target_path);
158 target_path = ERR_PTR(rc); 147 target_path = ERR_PTR(rc);
159 } 148 }
160 149
161out:
162 kfree(full_path); 150 kfree(full_path);
163out_no_free: 151out:
164 FreeXid(xid); 152 FreeXid(xid);
165 nd_set_link(nd, target_path); 153 nd_set_link(nd, target_path);
166 return NULL; /* No cookie */ 154 return NULL;
167} 155}
168 156
169int 157int
@@ -224,98 +212,6 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
224 return rc; 212 return rc;
225} 213}
226 214
227int
228cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen)
229{
230 struct inode *inode = direntry->d_inode;
231 int rc = -EACCES;
232 int xid;
233 int oplock = 0;
234 struct cifs_sb_info *cifs_sb;
235 struct cifsTconInfo *pTcon;
236 char *full_path = NULL;
237 char *tmpbuffer;
238 int len;
239 __u16 fid;
240
241 xid = GetXid();
242 cifs_sb = CIFS_SB(inode->i_sb);
243 pTcon = cifs_sb->tcon;
244
245/* BB would it be safe against deadlock to grab this sem
246 even though rename itself grabs the sem and calls lookup? */
247/* mutex_lock(&inode->i_sb->s_vfs_rename_mutex);*/
248 full_path = build_path_from_dentry(direntry);
249/* mutex_unlock(&inode->i_sb->s_vfs_rename_mutex);*/
250
251 if (full_path == NULL) {
252 FreeXid(xid);
253 return -ENOMEM;
254 }
255
256 cFYI(1,
257 ("Full path: %s inode = 0x%p pBuffer = 0x%p buflen = %d",
258 full_path, inode, pBuffer, buflen));
259 if (buflen > PATH_MAX)
260 len = PATH_MAX;
261 else
262 len = buflen;
263 tmpbuffer = kmalloc(len, GFP_KERNEL);
264 if (tmpbuffer == NULL) {
265 kfree(full_path);
266 FreeXid(xid);
267 return -ENOMEM;
268 }
269
270/* BB add read reparse point symlink code and
271 Unix extensions symlink code here BB */
272/* We could disable this based on pTcon->unix_ext flag instead ... but why? */
273 if (cifs_sb->tcon->ses->capabilities & CAP_UNIX)
274 rc = CIFSSMBUnixQuerySymLink(xid, pTcon, full_path,
275 tmpbuffer,
276 len - 1,
277 cifs_sb->local_nls);
278 else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) {
279 cERROR(1, ("SFU style symlinks not implemented yet"));
280 /* add open and read as in fs/cifs/inode.c */
281 } else {
282 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, GENERIC_READ,
283 OPEN_REPARSE_POINT, &fid, &oplock, NULL,
284 cifs_sb->local_nls,
285 cifs_sb->mnt_cifs_flags &
286 CIFS_MOUNT_MAP_SPECIAL_CHR);
287 if (!rc) {
288 rc = CIFSSMBQueryReparseLinkInfo(xid, pTcon, full_path,
289 tmpbuffer,
290 len - 1,
291 fid,
292 cifs_sb->local_nls);
293 if (CIFSSMBClose(xid, pTcon, fid)) {
294 cFYI(1, ("Error closing junction point "
295 "(open for ioctl)"));
296 }
297 /* If it is a DFS junction earlier we would have gotten
298 PATH_NOT_COVERED returned from server so we do
299 not need to request the DFS info here */
300 }
301 }
302 /* BB Anything else to do to handle recursive links? */
303 /* BB Should we be using page ops here? */
304
305 /* BB null terminate returned string in pBuffer? BB */
306 if (rc == 0) {
307 rc = vfs_readlink(direntry, pBuffer, len, tmpbuffer);
308 cFYI(1,
309 ("vfs_readlink called from cifs_readlink returned %d",
310 rc));
311 }
312
313 kfree(tmpbuffer);
314 kfree(full_path);
315 FreeXid(xid);
316 return rc;
317}
318
319void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie) 215void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie)
320{ 216{
321 char *p = nd_get_link(nd); 217 char *p = nd_get_link(nd);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 4c89c572891a..e079a9190ec4 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -635,77 +635,6 @@ dump_smb(struct smb_hdr *smb_buf, int smb_buf_length)
635 return; 635 return;
636} 636}
637 637
638/* Windows maps these to the user defined 16 bit Unicode range since they are
639 reserved symbols (along with \ and /), otherwise illegal to store
640 in filenames in NTFS */
641#define UNI_ASTERIK (__u16) ('*' + 0xF000)
642#define UNI_QUESTION (__u16) ('?' + 0xF000)
643#define UNI_COLON (__u16) (':' + 0xF000)
644#define UNI_GRTRTHAN (__u16) ('>' + 0xF000)
645#define UNI_LESSTHAN (__u16) ('<' + 0xF000)
646#define UNI_PIPE (__u16) ('|' + 0xF000)
647#define UNI_SLASH (__u16) ('\\' + 0xF000)
648
649/* Convert 16 bit Unicode pathname from wire format to string in current code
650 page. Conversion may involve remapping up the seven characters that are
651 only legal in POSIX-like OS (if they are present in the string). Path
652 names are little endian 16 bit Unicode on the wire */
653int
654cifs_convertUCSpath(char *target, const __le16 *source, int maxlen,
655 const struct nls_table *cp)
656{
657 int i, j, len;
658 __u16 src_char;
659
660 for (i = 0, j = 0; i < maxlen; i++) {
661 src_char = le16_to_cpu(source[i]);
662 switch (src_char) {
663 case 0:
664 goto cUCS_out; /* BB check this BB */
665 case UNI_COLON:
666 target[j] = ':';
667 break;
668 case UNI_ASTERIK:
669 target[j] = '*';
670 break;
671 case UNI_QUESTION:
672 target[j] = '?';
673 break;
674 /* BB We can not handle remapping slash until
675 all the calls to build_path_from_dentry
676 are modified, as they use slash as separator BB */
677 /* case UNI_SLASH:
678 target[j] = '\\';
679 break;*/
680 case UNI_PIPE:
681 target[j] = '|';
682 break;
683 case UNI_GRTRTHAN:
684 target[j] = '>';
685 break;
686 case UNI_LESSTHAN:
687 target[j] = '<';
688 break;
689 default:
690 len = cp->uni2char(src_char, &target[j],
691 NLS_MAX_CHARSET_SIZE);
692 if (len > 0) {
693 j += len;
694 continue;
695 } else {
696 target[j] = '?';
697 }
698 }
699 j++;
700 /* make sure we do not overrun callers allocated temp buffer */
701 if (j >= (2 * NAME_MAX))
702 break;
703 }
704cUCS_out:
705 target[j] = 0;
706 return j;
707}
708
709/* Convert 16 bit Unicode pathname to wire format from string in current code 638/* Convert 16 bit Unicode pathname to wire format from string in current code
710 page. Conversion may involve remapping up the seven characters that are 639 page. Conversion may involve remapping up the seven characters that are
711 only legal in POSIX-like OS (if they are present in the string). Path 640 only legal in POSIX-like OS (if they are present in the string). Path
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 8703d68f5b20..e2fe998989a3 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -79,6 +79,7 @@ static const struct smb_to_posix_error mapping_table_ERRDOS[] = {
79 {ErrQuota, -EDQUOT}, 79 {ErrQuota, -EDQUOT},
80 {ErrNotALink, -ENOLINK}, 80 {ErrNotALink, -ENOLINK},
81 {ERRnetlogonNotStarted, -ENOPROTOOPT}, 81 {ERRnetlogonNotStarted, -ENOPROTOOPT},
82 {ERRsymlink, -EOPNOTSUPP},
82 {ErrTooManyLinks, -EMLINK}, 83 {ErrTooManyLinks, -EMLINK},
83 {0, 0} 84 {0, 0}
84}; 85};
@@ -714,6 +715,7 @@ static const struct {
714 ERRDOS, ERRnoaccess, 0xc000028f}, { 715 ERRDOS, ERRnoaccess, 0xc000028f}, {
715 ERRDOS, ERRnoaccess, 0xc0000290}, { 716 ERRDOS, ERRnoaccess, 0xc0000290}, {
716 ERRDOS, ERRbadfunc, 0xc000029c}, { 717 ERRDOS, ERRbadfunc, 0xc000029c}, {
718 ERRDOS, ERRsymlink, NT_STATUS_STOPPED_ON_SYMLINK}, {
717 ERRDOS, ERRinvlevel, 0x007c0001}, }; 719 ERRDOS, ERRinvlevel, 0x007c0001}, };
718 720
719/***************************************************************************** 721/*****************************************************************************
diff --git a/fs/cifs/nterr.h b/fs/cifs/nterr.h
index 588abbb9d08c..257267367d41 100644
--- a/fs/cifs/nterr.h
+++ b/fs/cifs/nterr.h
@@ -35,8 +35,6 @@ struct nt_err_code_struct {
35extern const struct nt_err_code_struct nt_errs[]; 35extern const struct nt_err_code_struct nt_errs[];
36 36
37/* Win32 Status codes. */ 37/* Win32 Status codes. */
38
39#define STATUS_BUFFER_OVERFLOW 0x80000005
40#define STATUS_MORE_ENTRIES 0x0105 38#define STATUS_MORE_ENTRIES 0x0105
41#define ERROR_INVALID_PARAMETER 0x0057 39#define ERROR_INVALID_PARAMETER 0x0057
42#define ERROR_INSUFFICIENT_BUFFER 0x007a 40#define ERROR_INSUFFICIENT_BUFFER 0x007a
@@ -50,6 +48,13 @@ extern const struct nt_err_code_struct nt_errs[];
50#define STATUS_SOME_UNMAPPED 0x0107 48#define STATUS_SOME_UNMAPPED 0x0107
51#define STATUS_BUFFER_OVERFLOW 0x80000005 49#define STATUS_BUFFER_OVERFLOW 0x80000005
52#define NT_STATUS_NO_MORE_ENTRIES 0x8000001a 50#define NT_STATUS_NO_MORE_ENTRIES 0x8000001a
51#define NT_STATUS_MEDIA_CHANGED 0x8000001c
52#define NT_STATUS_END_OF_MEDIA 0x8000001e
53#define NT_STATUS_MEDIA_CHECK 0x80000020
54#define NT_STATUS_NO_DATA_DETECTED 0x8000001c
55#define NT_STATUS_STOPPED_ON_SYMLINK 0x8000002d
56#define NT_STATUS_DEVICE_REQUIRES_CLEANING 0x80000288
57#define NT_STATUS_DEVICE_DOOR_OPEN 0x80000288
53#define NT_STATUS_UNSUCCESSFUL 0xC0000000 | 0x0001 58#define NT_STATUS_UNSUCCESSFUL 0xC0000000 | 0x0001
54#define NT_STATUS_NOT_IMPLEMENTED 0xC0000000 | 0x0002 59#define NT_STATUS_NOT_IMPLEMENTED 0xC0000000 | 0x0002
55#define NT_STATUS_INVALID_INFO_CLASS 0xC0000000 | 0x0003 60#define NT_STATUS_INVALID_INFO_CLASS 0xC0000000 | 0x0003
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index c377d8065d99..49c9a4e75319 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -27,29 +27,39 @@
27#define UnknownMessage cpu_to_le32(8) 27#define UnknownMessage cpu_to_le32(8)
28 28
29/* Negotiate Flags */ 29/* Negotiate Flags */
30#define NTLMSSP_NEGOTIATE_UNICODE 0x01 /* Text strings are in unicode */ 30#define NTLMSSP_NEGOTIATE_UNICODE 0x01 /* Text strings are unicode */
31#define NTLMSSP_NEGOTIATE_OEM 0x02 /* Text strings are in OEM */ 31#define NTLMSSP_NEGOTIATE_OEM 0x02 /* Text strings are in OEM */
32#define NTLMSSP_REQUEST_TARGET 0x04 /* Server return its auth realm */ 32#define NTLMSSP_REQUEST_TARGET 0x04 /* Srv returns its auth realm */
33#define NTLMSSP_NEGOTIATE_SIGN 0x0010 /* Request signature capability */ 33/* define reserved9 0x08 */
34#define NTLMSSP_NEGOTIATE_SEAL 0x0020 /* Request confidentiality */ 34#define NTLMSSP_NEGOTIATE_SIGN 0x0010 /* Request signing capability */
35#define NTLMSSP_NEGOTIATE_DGRAM 0x0040 35#define NTLMSSP_NEGOTIATE_SEAL 0x0020 /* Request confidentiality */
36#define NTLMSSP_NEGOTIATE_LM_KEY 0x0080 /* Sign/seal use LM session key */ 36#define NTLMSSP_NEGOTIATE_DGRAM 0x0040
37#define NTLMSSP_NEGOTIATE_NTLM 0x0200 /* NTLM authentication */ 37#define NTLMSSP_NEGOTIATE_LM_KEY 0x0080 /* Use LM session key */
38#define NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED 0x1000 38/* defined reserved 8 0x0100 */
39#define NTLMSSP_NEGOTIATE_NTLM 0x0200 /* NTLM authentication */
40#define NTLMSSP_NEGOTIATE_NT_ONLY 0x0400 /* Lanman not allowed */
41#define NTLMSSP_ANONYMOUS 0x0800
42#define NTLMSSP_NEGOTIATE_DOMAIN_SUPPLIED 0x1000 /* reserved6 */
39#define NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED 0x2000 43#define NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED 0x2000
40#define NTLMSSP_NEGOTIATE_LOCAL_CALL 0x4000 /* client/server on same machine */ 44#define NTLMSSP_NEGOTIATE_LOCAL_CALL 0x4000 /* client/server same machine */
41#define NTLMSSP_NEGOTIATE_ALWAYS_SIGN 0x8000 /* Sign for all security levels */ 45#define NTLMSSP_NEGOTIATE_ALWAYS_SIGN 0x8000 /* Sign. All security levels */
42#define NTLMSSP_TARGET_TYPE_DOMAIN 0x10000 46#define NTLMSSP_TARGET_TYPE_DOMAIN 0x10000
43#define NTLMSSP_TARGET_TYPE_SERVER 0x20000 47#define NTLMSSP_TARGET_TYPE_SERVER 0x20000
44#define NTLMSSP_TARGET_TYPE_SHARE 0x40000 48#define NTLMSSP_TARGET_TYPE_SHARE 0x40000
45#define NTLMSSP_NEGOTIATE_NTLMV2 0x80000 49#define NTLMSSP_NEGOTIATE_EXTENDED_SEC 0x80000 /* NB:not related to NTLMv2 pwd*/
46#define NTLMSSP_REQUEST_INIT_RESP 0x100000 50/* #define NTLMSSP_REQUEST_INIT_RESP 0x100000 */
47#define NTLMSSP_REQUEST_ACCEPT_RESP 0x200000 51#define NTLMSSP_NEGOTIATE_IDENTIFY 0x100000
48#define NTLMSSP_REQUEST_NOT_NT_KEY 0x400000 52#define NTLMSSP_REQUEST_ACCEPT_RESP 0x200000 /* reserved5 */
53#define NTLMSSP_REQUEST_NON_NT_KEY 0x400000
49#define NTLMSSP_NEGOTIATE_TARGET_INFO 0x800000 54#define NTLMSSP_NEGOTIATE_TARGET_INFO 0x800000
50#define NTLMSSP_NEGOTIATE_128 0x20000000 55/* #define reserved4 0x1000000 */
51#define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000 56#define NTLMSSP_NEGOTIATE_VERSION 0x2000000 /* we do not set */
52#define NTLMSSP_NEGOTIATE_56 0x80000000 57/* #define reserved3 0x4000000 */
58/* #define reserved2 0x8000000 */
59/* #define reserved1 0x10000000 */
60#define NTLMSSP_NEGOTIATE_128 0x20000000
61#define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000
62#define NTLMSSP_NEGOTIATE_56 0x80000000
53 63
54/* Although typedefs are not commonly used for structure definitions */ 64/* Although typedefs are not commonly used for structure definitions */
55/* in the Linux kernel, in this particular case they are useful */ 65/* in the Linux kernel, in this particular case they are useful */
@@ -60,32 +70,36 @@
60typedef struct _SECURITY_BUFFER { 70typedef struct _SECURITY_BUFFER {
61 __le16 Length; 71 __le16 Length;
62 __le16 MaximumLength; 72 __le16 MaximumLength;
63 __le32 Buffer; /* offset to buffer */ 73 __le32 BufferOffset; /* offset to buffer */
64} __attribute__((packed)) SECURITY_BUFFER; 74} __attribute__((packed)) SECURITY_BUFFER;
65 75
66typedef struct _NEGOTIATE_MESSAGE { 76typedef struct _NEGOTIATE_MESSAGE {
67 __u8 Signature[sizeof(NTLMSSP_SIGNATURE)]; 77 __u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
68 __le32 MessageType; /* 1 */ 78 __le32 MessageType; /* NtLmNegotiate = 1 */
69 __le32 NegotiateFlags; 79 __le32 NegotiateFlags;
70 SECURITY_BUFFER DomainName; /* RFC 1001 style and ASCII */ 80 SECURITY_BUFFER DomainName; /* RFC 1001 style and ASCII */
71 SECURITY_BUFFER WorkstationName; /* RFC 1001 and ASCII */ 81 SECURITY_BUFFER WorkstationName; /* RFC 1001 and ASCII */
82 /* SECURITY_BUFFER for version info not present since we
83 do not set the version is present flag */
72 char DomainString[0]; 84 char DomainString[0];
73 /* followed by WorkstationString */ 85 /* followed by WorkstationString */
74} __attribute__((packed)) NEGOTIATE_MESSAGE, *PNEGOTIATE_MESSAGE; 86} __attribute__((packed)) NEGOTIATE_MESSAGE, *PNEGOTIATE_MESSAGE;
75 87
76typedef struct _CHALLENGE_MESSAGE { 88typedef struct _CHALLENGE_MESSAGE {
77 __u8 Signature[sizeof(NTLMSSP_SIGNATURE)]; 89 __u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
78 __le32 MessageType; /* 2 */ 90 __le32 MessageType; /* NtLmChallenge = 2 */
79 SECURITY_BUFFER TargetName; 91 SECURITY_BUFFER TargetName;
80 __le32 NegotiateFlags; 92 __le32 NegotiateFlags;
81 __u8 Challenge[CIFS_CRYPTO_KEY_SIZE]; 93 __u8 Challenge[CIFS_CRYPTO_KEY_SIZE];
82 __u8 Reserved[8]; 94 __u8 Reserved[8];
83 SECURITY_BUFFER TargetInfoArray; 95 SECURITY_BUFFER TargetInfoArray;
96 /* SECURITY_BUFFER for version info not present since we
97 do not set the version is present flag */
84} __attribute__((packed)) CHALLENGE_MESSAGE, *PCHALLENGE_MESSAGE; 98} __attribute__((packed)) CHALLENGE_MESSAGE, *PCHALLENGE_MESSAGE;
85 99
86typedef struct _AUTHENTICATE_MESSAGE { 100typedef struct _AUTHENTICATE_MESSAGE {
87 __u8 Signature[sizeof (NTLMSSP_SIGNATURE)]; 101 __u8 Signature[sizeof(NTLMSSP_SIGNATURE)];
88 __le32 MessageType; /* 3 */ 102 __le32 MessageType; /* NtLmsAuthenticate = 3 */
89 SECURITY_BUFFER LmChallengeResponse; 103 SECURITY_BUFFER LmChallengeResponse;
90 SECURITY_BUFFER NtChallengeResponse; 104 SECURITY_BUFFER NtChallengeResponse;
91 SECURITY_BUFFER DomainName; 105 SECURITY_BUFFER DomainName;
@@ -93,5 +107,7 @@ typedef struct _AUTHENTICATE_MESSAGE {
93 SECURITY_BUFFER WorkstationName; 107 SECURITY_BUFFER WorkstationName;
94 SECURITY_BUFFER SessionKey; 108 SECURITY_BUFFER SessionKey;
95 __le32 NegotiateFlags; 109 __le32 NegotiateFlags;
110 /* SECURITY_BUFFER for version info not present since we
111 do not set the version is present flag */
96 char UserString[0]; 112 char UserString[0];
97} __attribute__((packed)) AUTHENTICATE_MESSAGE, *PAUTHENTICATE_MESSAGE; 113} __attribute__((packed)) AUTHENTICATE_MESSAGE, *PAUTHENTICATE_MESSAGE;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index c2c01ff4c32c..964e097c8203 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -31,6 +31,13 @@
31#include "cifs_fs_sb.h" 31#include "cifs_fs_sb.h"
32#include "cifsfs.h" 32#include "cifsfs.h"
33 33
34/*
35 * To be safe - for UCS to UTF-8 with strings loaded with the rare long
36 * characters alloc more to account for such multibyte target UTF-8
37 * characters.
38 */
39#define UNICODE_NAME_MAX ((4 * NAME_MAX) + 2)
40
34#ifdef CONFIG_CIFS_DEBUG2 41#ifdef CONFIG_CIFS_DEBUG2
35static void dump_cifs_file_struct(struct file *file, char *label) 42static void dump_cifs_file_struct(struct file *file, char *label)
36{ 43{
@@ -239,6 +246,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
239 if (atomic_read(&cifsInfo->inUse) == 0) 246 if (atomic_read(&cifsInfo->inUse) == 0)
240 atomic_set(&cifsInfo->inUse, 1); 247 atomic_set(&cifsInfo->inUse, 1);
241 248
249 cifsInfo->server_eof = end_of_file;
242 spin_lock(&tmp_inode->i_lock); 250 spin_lock(&tmp_inode->i_lock);
243 if (is_size_safe_to_change(cifsInfo, end_of_file)) { 251 if (is_size_safe_to_change(cifsInfo, end_of_file)) {
244 /* can not safely change the file size here if the 252 /* can not safely change the file size here if the
@@ -375,6 +383,7 @@ static void unix_fill_in_inode(struct inode *tmp_inode,
375 tmp_inode->i_gid = le64_to_cpu(pfindData->Gid); 383 tmp_inode->i_gid = le64_to_cpu(pfindData->Gid);
376 tmp_inode->i_nlink = le64_to_cpu(pfindData->Nlinks); 384 tmp_inode->i_nlink = le64_to_cpu(pfindData->Nlinks);
377 385
386 cifsInfo->server_eof = end_of_file;
378 spin_lock(&tmp_inode->i_lock); 387 spin_lock(&tmp_inode->i_lock);
379 if (is_size_safe_to_change(cifsInfo, end_of_file)) { 388 if (is_size_safe_to_change(cifsInfo, end_of_file)) {
380 /* can not safely change the file size here if the 389 /* can not safely change the file size here if the
@@ -436,6 +445,38 @@ static void unix_fill_in_inode(struct inode *tmp_inode,
436 } 445 }
437} 446}
438 447
448/* BB eventually need to add the following helper function to
449 resolve NT_STATUS_STOPPED_ON_SYMLINK return code when
450 we try to do FindFirst on (NTFS) directory symlinks */
451/*
452int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb,
453 int xid)
454{
455 __u16 fid;
456 int len;
457 int oplock = 0;
458 int rc;
459 struct cifsTconInfo *ptcon = cifs_sb->tcon;
460 char *tmpbuffer;
461
462 rc = CIFSSMBOpen(xid, ptcon, full_path, FILE_OPEN, GENERIC_READ,
463 OPEN_REPARSE_POINT, &fid, &oplock, NULL,
464 cifs_sb->local_nls,
465 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
466 if (!rc) {
467 tmpbuffer = kmalloc(maxpath);
468 rc = CIFSSMBQueryReparseLinkInfo(xid, ptcon, full_path,
469 tmpbuffer,
470 maxpath -1,
471 fid,
472 cifs_sb->local_nls);
473 if (CIFSSMBClose(xid, ptcon, fid)) {
474 cFYI(1, ("Error closing temporary reparsepoint open)"));
475 }
476 }
477}
478 */
479
439static int initiate_cifs_search(const int xid, struct file *file) 480static int initiate_cifs_search(const int xid, struct file *file)
440{ 481{
441 int rc = 0; 482 int rc = 0;
@@ -491,7 +532,10 @@ ffirst_retry:
491 CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb)); 532 CIFS_MOUNT_MAP_SPECIAL_CHR, CIFS_DIR_SEP(cifs_sb));
492 if (rc == 0) 533 if (rc == 0)
493 cifsFile->invalidHandle = false; 534 cifsFile->invalidHandle = false;
494 if ((rc == -EOPNOTSUPP) && 535 /* BB add following call to handle readdir on new NTFS symlink errors
536 else if STATUS_STOPPED_ON_SYMLINK
537 call get_symlink_reparse_path and retry with new path */
538 else if ((rc == -EOPNOTSUPP) &&
495 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) { 539 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) {
496 cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; 540 cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM;
497 goto ffirst_retry; 541 goto ffirst_retry;
@@ -820,7 +864,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
820/* inode num, inode type and filename returned */ 864/* inode num, inode type and filename returned */
821static int cifs_get_name_from_search_buf(struct qstr *pqst, 865static int cifs_get_name_from_search_buf(struct qstr *pqst,
822 char *current_entry, __u16 level, unsigned int unicode, 866 char *current_entry, __u16 level, unsigned int unicode,
823 struct cifs_sb_info *cifs_sb, int max_len, __u64 *pinum) 867 struct cifs_sb_info *cifs_sb, unsigned int max_len, __u64 *pinum)
824{ 868{
825 int rc = 0; 869 int rc = 0;
826 unsigned int len = 0; 870 unsigned int len = 0;
@@ -840,7 +884,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
840 len = strnlen(filename, PATH_MAX); 884 len = strnlen(filename, PATH_MAX);
841 } 885 }
842 886
843 *pinum = pFindData->UniqueId; 887 *pinum = le64_to_cpu(pFindData->UniqueId);
844 } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { 888 } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) {
845 FILE_DIRECTORY_INFO *pFindData = 889 FILE_DIRECTORY_INFO *pFindData =
846 (FILE_DIRECTORY_INFO *)current_entry; 890 (FILE_DIRECTORY_INFO *)current_entry;
@@ -856,7 +900,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
856 (SEARCH_ID_FULL_DIR_INFO *)current_entry; 900 (SEARCH_ID_FULL_DIR_INFO *)current_entry;
857 filename = &pFindData->FileName[0]; 901 filename = &pFindData->FileName[0];
858 len = le32_to_cpu(pFindData->FileNameLength); 902 len = le32_to_cpu(pFindData->FileNameLength);
859 *pinum = pFindData->UniqueId; 903 *pinum = le64_to_cpu(pFindData->UniqueId);
860 } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { 904 } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) {
861 FILE_BOTH_DIRECTORY_INFO *pFindData = 905 FILE_BOTH_DIRECTORY_INFO *pFindData =
862 (FILE_BOTH_DIRECTORY_INFO *)current_entry; 906 (FILE_BOTH_DIRECTORY_INFO *)current_entry;
@@ -879,14 +923,12 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
879 } 923 }
880 924
881 if (unicode) { 925 if (unicode) {
882 /* BB fixme - test with long names */ 926 pqst->len = cifs_from_ucs2((char *) pqst->name,
883 /* Note converted filename can be longer than in unicode */ 927 (__le16 *) filename,
884 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) 928 UNICODE_NAME_MAX,
885 pqst->len = cifs_convertUCSpath((char *)pqst->name, 929 min(len, max_len), nlt,
886 (__le16 *)filename, len/2, nlt); 930 cifs_sb->mnt_cifs_flags &
887 else 931 CIFS_MOUNT_MAP_SPECIAL_CHR);
888 pqst->len = cifs_strfromUCS_le((char *)pqst->name,
889 (__le16 *)filename, len/2, nlt);
890 } else { 932 } else {
891 pqst->name = filename; 933 pqst->name = filename;
892 pqst->len = len; 934 pqst->len = len;
@@ -896,8 +938,8 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
896 return rc; 938 return rc;
897} 939}
898 940
899static int cifs_filldir(char *pfindEntry, struct file *file, 941static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
900 filldir_t filldir, void *direntry, char *scratch_buf, int max_len) 942 void *direntry, char *scratch_buf, unsigned int max_len)
901{ 943{
902 int rc = 0; 944 int rc = 0;
903 struct qstr qstring; 945 struct qstr qstring;
@@ -994,7 +1036,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
994 int num_to_fill = 0; 1036 int num_to_fill = 0;
995 char *tmp_buf = NULL; 1037 char *tmp_buf = NULL;
996 char *end_of_smb; 1038 char *end_of_smb;
997 int max_len; 1039 unsigned int max_len;
998 1040
999 xid = GetXid(); 1041 xid = GetXid();
1000 1042
@@ -1068,11 +1110,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
1068 cifsFile->srch_inf.ntwrk_buf_start); 1110 cifsFile->srch_inf.ntwrk_buf_start);
1069 end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; 1111 end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len;
1070 1112
1071 /* To be safe - for UCS to UTF-8 with strings loaded 1113 tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL);
1072 with the rare long characters alloc more to account for
1073 such multibyte target UTF-8 characters. cifs_unicode.c,
1074 which actually does the conversion, has the same limit */
1075 tmp_buf = kmalloc((2 * NAME_MAX) + 4, GFP_KERNEL);
1076 for (i = 0; (i < num_to_fill) && (rc == 0); i++) { 1114 for (i = 0; (i < num_to_fill) && (rc == 0); i++) {
1077 if (current_entry == NULL) { 1115 if (current_entry == NULL) {
1078 /* evaluate whether this case is an error */ 1116 /* evaluate whether this case is an error */
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 5c68b4282be9..897a052270f9 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * SMB/CIFS session setup handling routines 4 * SMB/CIFS session setup handling routines
5 * 5 *
6 * Copyright (c) International Business Machines Corp., 2006, 2007 6 * Copyright (c) International Business Machines Corp., 2006, 2009
7 * Author(s): Steve French (sfrench@us.ibm.com) 7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * 8 *
9 * This library is free software; you can redistribute it and/or modify 9 * This library is free software; you can redistribute it and/or modify
@@ -111,7 +111,7 @@ static __le16 get_next_vcnum(struct cifsSesInfo *ses)
111get_vc_num_exit: 111get_vc_num_exit:
112 write_unlock(&cifs_tcp_ses_lock); 112 write_unlock(&cifs_tcp_ses_lock);
113 113
114 return le16_to_cpu(vcnum); 114 return cpu_to_le16(vcnum);
115} 115}
116 116
117static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB) 117static __u32 cifs_ssetup_hdr(struct cifsSesInfo *ses, SESSION_SETUP_ANDX *pSMB)
@@ -277,85 +277,51 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
277 *pbcc_area = bcc_ptr; 277 *pbcc_area = bcc_ptr;
278} 278}
279 279
280static int decode_unicode_ssetup(char **pbcc_area, int bleft, 280static void
281 struct cifsSesInfo *ses, 281decode_unicode_ssetup(char **pbcc_area, int bleft, struct cifsSesInfo *ses,
282 const struct nls_table *nls_cp) 282 const struct nls_table *nls_cp)
283{ 283{
284 int rc = 0; 284 int len;
285 int words_left, len;
286 char *data = *pbcc_area; 285 char *data = *pbcc_area;
287 286
288
289
290 cFYI(1, ("bleft %d", bleft)); 287 cFYI(1, ("bleft %d", bleft));
291 288
292 289 /*
293 /* SMB header is unaligned, so cifs servers word align start of 290 * Windows servers do not always double null terminate their final
294 Unicode strings */ 291 * Unicode string. Check to see if there are an uneven number of bytes
295 data++; 292 * left. If so, then add an extra NULL pad byte to the end of the
296 bleft--; /* Windows servers do not always double null terminate 293 * response.
297 their final Unicode string - in which case we 294 *
298 now will not attempt to decode the byte of junk 295 * See section 2.7.2 in "Implementing CIFS" for details
299 which follows it */ 296 */
300 297 if (bleft % 2) {
301 words_left = bleft / 2; 298 data[bleft] = 0;
302 299 ++bleft;
303 /* save off server operating system */ 300 }
304 len = UniStrnlen((wchar_t *) data, words_left);
305
306/* We look for obvious messed up bcc or strings in response so we do not go off
307 the end since (at least) WIN2K and Windows XP have a major bug in not null
308 terminating last Unicode string in response */
309 if (len >= words_left)
310 return rc;
311 301
312 kfree(ses->serverOS); 302 kfree(ses->serverOS);
313 /* UTF-8 string will not grow more than four times as big as UCS-16 */ 303 ses->serverOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp);
314 ses->serverOS = kzalloc((4 * len) + 2 /* trailing null */, GFP_KERNEL); 304 cFYI(1, ("serverOS=%s", ses->serverOS));
315 if (ses->serverOS != NULL) 305 len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2;
316 cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len, nls_cp); 306 data += len;
317 data += 2 * (len + 1); 307 bleft -= len;
318 words_left -= len + 1; 308 if (bleft <= 0)
319 309 return;
320 /* save off server network operating system */
321 len = UniStrnlen((wchar_t *) data, words_left);
322
323 if (len >= words_left)
324 return rc;
325 310
326 kfree(ses->serverNOS); 311 kfree(ses->serverNOS);
327 ses->serverNOS = kzalloc((4 * len) + 2 /* trailing null */, GFP_KERNEL); 312 ses->serverNOS = cifs_strndup_from_ucs(data, bleft, true, nls_cp);
328 if (ses->serverNOS != NULL) { 313 cFYI(1, ("serverNOS=%s", ses->serverNOS));
329 cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len, 314 len = (UniStrnlen((wchar_t *) data, bleft / 2) * 2) + 2;
330 nls_cp); 315 data += len;
331 if (strncmp(ses->serverNOS, "NT LAN Manager 4", 16) == 0) { 316 bleft -= len;
332 cFYI(1, ("NT4 server")); 317 if (bleft <= 0)
333 ses->flags |= CIFS_SES_NT4; 318 return;
334 }
335 }
336 data += 2 * (len + 1);
337 words_left -= len + 1;
338
339 /* save off server domain */
340 len = UniStrnlen((wchar_t *) data, words_left);
341
342 if (len > words_left)
343 return rc;
344 319
345 kfree(ses->serverDomain); 320 kfree(ses->serverDomain);
346 ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */ 321 ses->serverDomain = cifs_strndup_from_ucs(data, bleft, true, nls_cp);
347 if (ses->serverDomain != NULL) { 322 cFYI(1, ("serverDomain=%s", ses->serverDomain));
348 cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len,
349 nls_cp);
350 ses->serverDomain[2*len] = 0;
351 ses->serverDomain[(2*len) + 1] = 0;
352 }
353 data += 2 * (len + 1);
354 words_left -= len + 1;
355 323
356 cFYI(1, ("words left: %d", words_left)); 324 return;
357
358 return rc;
359} 325}
360 326
361static int decode_ascii_ssetup(char **pbcc_area, int bleft, 327static int decode_ascii_ssetup(char **pbcc_area, int bleft,
@@ -412,6 +378,186 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft,
412 return rc; 378 return rc;
413} 379}
414 380
381static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
382 struct cifsSesInfo *ses)
383{
384 CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr;
385
386 if (blob_len < sizeof(CHALLENGE_MESSAGE)) {
387 cERROR(1, ("challenge blob len %d too small", blob_len));
388 return -EINVAL;
389 }
390
391 if (memcmp(pblob->Signature, "NTLMSSP", 8)) {
392 cERROR(1, ("blob signature incorrect %s", pblob->Signature));
393 return -EINVAL;
394 }
395 if (pblob->MessageType != NtLmChallenge) {
396 cERROR(1, ("Incorrect message type %d", pblob->MessageType));
397 return -EINVAL;
398 }
399
400 memcpy(ses->server->cryptKey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE);
401 /* BB we could decode pblob->NegotiateFlags; some may be useful */
402 /* In particular we can examine sign flags */
403 /* BB spec says that if AvId field of MsvAvTimestamp is populated then
404 we must set the MIC field of the AUTHENTICATE_MESSAGE */
405
406 return 0;
407}
408
409#ifdef CONFIG_CIFS_EXPERIMENTAL
410/* BB Move to ntlmssp.c eventually */
411
412/* We do not malloc the blob, it is passed in pbuffer, because
413 it is fixed size, and small, making this approach cleaner */
414static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
415 struct cifsSesInfo *ses)
416{
417 NEGOTIATE_MESSAGE *sec_blob = (NEGOTIATE_MESSAGE *)pbuffer;
418 __u32 flags;
419
420 memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
421 sec_blob->MessageType = NtLmNegotiate;
422
423 /* BB is NTLMV2 session security format easier to use here? */
424 flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET |
425 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
426 NTLMSSP_NEGOTIATE_NT_ONLY | NTLMSSP_NEGOTIATE_NTLM;
427 if (ses->server->secMode &
428 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
429 flags |= NTLMSSP_NEGOTIATE_SIGN;
430 if (ses->server->secMode & SECMODE_SIGN_REQUIRED)
431 flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
432
433 sec_blob->NegotiateFlags |= cpu_to_le32(flags);
434
435 sec_blob->WorkstationName.BufferOffset = 0;
436 sec_blob->WorkstationName.Length = 0;
437 sec_blob->WorkstationName.MaximumLength = 0;
438
439 /* Domain name is sent on the Challenge not Negotiate NTLMSSP request */
440 sec_blob->DomainName.BufferOffset = 0;
441 sec_blob->DomainName.Length = 0;
442 sec_blob->DomainName.MaximumLength = 0;
443}
444
445/* We do not malloc the blob, it is passed in pbuffer, because its
446 maximum possible size is fixed and small, making this approach cleaner.
447 This function returns the length of the data in the blob */
448static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
449 struct cifsSesInfo *ses,
450 const struct nls_table *nls_cp, int first)
451{
452 AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
453 __u32 flags;
454 unsigned char *tmp;
455 char ntlm_session_key[CIFS_SESS_KEY_SIZE];
456
457 memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
458 sec_blob->MessageType = NtLmAuthenticate;
459
460 flags = NTLMSSP_NEGOTIATE_56 |
461 NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO |
462 NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
463 NTLMSSP_NEGOTIATE_NT_ONLY | NTLMSSP_NEGOTIATE_NTLM;
464 if (ses->server->secMode &
465 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
466 flags |= NTLMSSP_NEGOTIATE_SIGN;
467 if (ses->server->secMode & SECMODE_SIGN_REQUIRED)
468 flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
469
470 tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE);
471 sec_blob->NegotiateFlags |= cpu_to_le32(flags);
472
473 sec_blob->LmChallengeResponse.BufferOffset =
474 cpu_to_le32(sizeof(AUTHENTICATE_MESSAGE));
475 sec_blob->LmChallengeResponse.Length = 0;
476 sec_blob->LmChallengeResponse.MaximumLength = 0;
477
478 /* calculate session key, BB what about adding similar ntlmv2 path? */
479 SMBNTencrypt(ses->password, ses->server->cryptKey, ntlm_session_key);
480 if (first)
481 cifs_calculate_mac_key(&ses->server->mac_signing_key,
482 ntlm_session_key, ses->password);
483
484 memcpy(tmp, ntlm_session_key, CIFS_SESS_KEY_SIZE);
485 sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
486 sec_blob->NtChallengeResponse.Length = cpu_to_le16(CIFS_SESS_KEY_SIZE);
487 sec_blob->NtChallengeResponse.MaximumLength =
488 cpu_to_le16(CIFS_SESS_KEY_SIZE);
489
490 tmp += CIFS_SESS_KEY_SIZE;
491
492 if (ses->domainName == NULL) {
493 sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
494 sec_blob->DomainName.Length = 0;
495 sec_blob->DomainName.MaximumLength = 0;
496 tmp += 2;
497 } else {
498 int len;
499 len = cifs_strtoUCS((__le16 *)tmp, ses->domainName,
500 MAX_USERNAME_SIZE, nls_cp);
501 len *= 2; /* unicode is 2 bytes each */
502 len += 2; /* trailing null */
503 sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
504 sec_blob->DomainName.Length = cpu_to_le16(len);
505 sec_blob->DomainName.MaximumLength = cpu_to_le16(len);
506 tmp += len;
507 }
508
509 if (ses->userName == NULL) {
510 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
511 sec_blob->UserName.Length = 0;
512 sec_blob->UserName.MaximumLength = 0;
513 tmp += 2;
514 } else {
515 int len;
516 len = cifs_strtoUCS((__le16 *)tmp, ses->userName,
517 MAX_USERNAME_SIZE, nls_cp);
518 len *= 2; /* unicode is 2 bytes each */
519 len += 2; /* trailing null */
520 sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
521 sec_blob->UserName.Length = cpu_to_le16(len);
522 sec_blob->UserName.MaximumLength = cpu_to_le16(len);
523 tmp += len;
524 }
525
526 sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - pbuffer);
527 sec_blob->WorkstationName.Length = 0;
528 sec_blob->WorkstationName.MaximumLength = 0;
529 tmp += 2;
530
531 sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
532 sec_blob->SessionKey.Length = 0;
533 sec_blob->SessionKey.MaximumLength = 0;
534 return tmp - pbuffer;
535}
536
537
538static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB,
539 struct cifsSesInfo *ses)
540{
541 build_ntlmssp_negotiate_blob(&pSMB->req.SecurityBlob[0], ses);
542 pSMB->req.SecurityBlobLength = cpu_to_le16(sizeof(NEGOTIATE_MESSAGE));
543
544 return;
545}
546
547static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB,
548 struct cifsSesInfo *ses,
549 const struct nls_table *nls, int first_time)
550{
551 int bloblen;
552
553 bloblen = build_ntlmssp_auth_blob(&pSMB->req.SecurityBlob[0], ses, nls,
554 first_time);
555 pSMB->req.SecurityBlobLength = cpu_to_le16(bloblen);
556
557 return bloblen;
558}
559#endif
560
415int 561int
416CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, 562CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
417 const struct nls_table *nls_cp) 563 const struct nls_table *nls_cp)
@@ -430,6 +576,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
430 __u16 action; 576 __u16 action;
431 int bytes_remaining; 577 int bytes_remaining;
432 struct key *spnego_key = NULL; 578 struct key *spnego_key = NULL;
579 __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
433 580
434 if (ses == NULL) 581 if (ses == NULL)
435 return -EINVAL; 582 return -EINVAL;
@@ -437,6 +584,10 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
437 type = ses->server->secType; 584 type = ses->server->secType;
438 585
439 cFYI(1, ("sess setup type %d", type)); 586 cFYI(1, ("sess setup type %d", type));
587ssetup_ntlmssp_authenticate:
588 if (phase == NtLmChallenge)
589 phase = NtLmAuthenticate; /* if ntlmssp, now final phase */
590
440 if (type == LANMAN) { 591 if (type == LANMAN) {
441#ifndef CONFIG_CIFS_WEAK_PW_HASH 592#ifndef CONFIG_CIFS_WEAK_PW_HASH
442 /* LANMAN and plaintext are less secure and off by default. 593 /* LANMAN and plaintext are less secure and off by default.
@@ -650,9 +801,53 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
650 goto ssetup_exit; 801 goto ssetup_exit;
651#endif /* CONFIG_CIFS_UPCALL */ 802#endif /* CONFIG_CIFS_UPCALL */
652 } else { 803 } else {
804#ifdef CONFIG_CIFS_EXPERIMENTAL
805 if ((experimEnabled > 1) && (type == RawNTLMSSP)) {
806 if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) {
807 cERROR(1, ("NTLMSSP requires Unicode support"));
808 rc = -ENOSYS;
809 goto ssetup_exit;
810 }
811
812 cFYI(1, ("ntlmssp session setup phase %d", phase));
813 pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
814 capabilities |= CAP_EXTENDED_SECURITY;
815 pSMB->req.Capabilities |= cpu_to_le32(capabilities);
816 if (phase == NtLmNegotiate) {
817 setup_ntlmssp_neg_req(pSMB, ses);
818 iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
819 } else if (phase == NtLmAuthenticate) {
820 int blob_len;
821 blob_len = setup_ntlmssp_auth_req(pSMB, ses,
822 nls_cp,
823 first_time);
824 iov[1].iov_len = blob_len;
825 /* Make sure that we tell the server that we
826 are using the uid that it just gave us back
827 on the response (challenge) */
828 smb_buf->Uid = ses->Suid;
829 } else {
830 cERROR(1, ("invalid phase %d", phase));
831 rc = -ENOSYS;
832 goto ssetup_exit;
833 }
834 iov[1].iov_base = &pSMB->req.SecurityBlob[0];
835 /* unicode strings must be word aligned */
836 if ((iov[0].iov_len + iov[1].iov_len) % 2) {
837 *bcc_ptr = 0;
838 bcc_ptr++;
839 }
840 unicode_oslm_strings(&bcc_ptr, nls_cp);
841 } else {
842 cERROR(1, ("secType %d not supported!", type));
843 rc = -ENOSYS;
844 goto ssetup_exit;
845 }
846#else
653 cERROR(1, ("secType %d not supported!", type)); 847 cERROR(1, ("secType %d not supported!", type));
654 rc = -ENOSYS; 848 rc = -ENOSYS;
655 goto ssetup_exit; 849 goto ssetup_exit;
850#endif
656 } 851 }
657 852
658 iov[2].iov_base = str_area; 853 iov[2].iov_base = str_area;
@@ -668,12 +863,23 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
668 /* SMB request buf freed in SendReceive2 */ 863 /* SMB request buf freed in SendReceive2 */
669 864
670 cFYI(1, ("ssetup rc from sendrecv2 is %d", rc)); 865 cFYI(1, ("ssetup rc from sendrecv2 is %d", rc));
671 if (rc)
672 goto ssetup_exit;
673 866
674 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base; 867 pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base;
675 smb_buf = (struct smb_hdr *)iov[0].iov_base; 868 smb_buf = (struct smb_hdr *)iov[0].iov_base;
676 869
870 if ((type == RawNTLMSSP) && (smb_buf->Status.CifsError ==
871 cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))) {
872 if (phase != NtLmNegotiate) {
873 cERROR(1, ("Unexpected more processing error"));
874 goto ssetup_exit;
875 }
876 /* NTLMSSP Negotiate sent now processing challenge (response) */
877 phase = NtLmChallenge; /* process ntlmssp challenge */
878 rc = 0; /* MORE_PROC rc is not an error here, but expected */
879 }
880 if (rc)
881 goto ssetup_exit;
882
677 if ((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) { 883 if ((smb_buf->WordCount != 3) && (smb_buf->WordCount != 4)) {
678 rc = -EIO; 884 rc = -EIO;
679 cERROR(1, ("bad word count %d", smb_buf->WordCount)); 885 cERROR(1, ("bad word count %d", smb_buf->WordCount));
@@ -692,22 +898,33 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time,
692 if (smb_buf->WordCount == 4) { 898 if (smb_buf->WordCount == 4) {
693 __u16 blob_len; 899 __u16 blob_len;
694 blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); 900 blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength);
695 bcc_ptr += blob_len;
696 if (blob_len > bytes_remaining) { 901 if (blob_len > bytes_remaining) {
697 cERROR(1, ("bad security blob length %d", blob_len)); 902 cERROR(1, ("bad security blob length %d", blob_len));
698 rc = -EINVAL; 903 rc = -EINVAL;
699 goto ssetup_exit; 904 goto ssetup_exit;
700 } 905 }
906 if (phase == NtLmChallenge) {
907 rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses);
908 /* now goto beginning for ntlmssp authenticate phase */
909 if (rc)
910 goto ssetup_exit;
911 }
912 bcc_ptr += blob_len;
701 bytes_remaining -= blob_len; 913 bytes_remaining -= blob_len;
702 } 914 }
703 915
704 /* BB check if Unicode and decode strings */ 916 /* BB check if Unicode and decode strings */
705 if (smb_buf->Flags2 & SMBFLG2_UNICODE) 917 if (smb_buf->Flags2 & SMBFLG2_UNICODE) {
706 rc = decode_unicode_ssetup(&bcc_ptr, bytes_remaining, 918 /* unicode string area must be word-aligned */
707 ses, nls_cp); 919 if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) {
708 else 920 ++bcc_ptr;
921 --bytes_remaining;
922 }
923 decode_unicode_ssetup(&bcc_ptr, bytes_remaining, ses, nls_cp);
924 } else {
709 rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining, 925 rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining,
710 ses, nls_cp); 926 ses, nls_cp);
927 }
711 928
712ssetup_exit: 929ssetup_exit:
713 if (spnego_key) { 930 if (spnego_key) {
@@ -721,5 +938,9 @@ ssetup_exit:
721 } else if (resp_buf_type == CIFS_LARGE_BUFFER) 938 } else if (resp_buf_type == CIFS_LARGE_BUFFER)
722 cifs_buf_release(iov[0].iov_base); 939 cifs_buf_release(iov[0].iov_base);
723 940
941 /* if ntlmssp, and negotiate succeeded, proceed to authenticate phase */
942 if ((phase == NtLmChallenge) && (rc == 0))
943 goto ssetup_ntlmssp_authenticate;
944
724 return rc; 945 return rc;
725} 946}
diff --git a/fs/cifs/smberr.h b/fs/cifs/smberr.h
index 7f50e8577c1c..c5084d27db7c 100644
--- a/fs/cifs/smberr.h
+++ b/fs/cifs/smberr.h
@@ -110,6 +110,7 @@
110 110
111/* Below errors are used internally (do not come over the wire) for passthrough 111/* Below errors are used internally (do not come over the wire) for passthrough
112 from STATUS codes to POSIX only */ 112 from STATUS codes to POSIX only */
113#define ERRsymlink 0xFFFD
113#define ErrTooManyLinks 0xFFFE 114#define ErrTooManyLinks 0xFFFE
114 115
115/* Following error codes may be generated with the ERRSRV error class.*/ 116/* Following error codes may be generated with the ERRSRV error class.*/
diff --git a/fs/compat.c b/fs/compat.c
index 3f84d5f15889..681ed81e6be0 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -181,22 +181,24 @@ asmlinkage long compat_sys_newstat(char __user * filename,
181 struct compat_stat __user *statbuf) 181 struct compat_stat __user *statbuf)
182{ 182{
183 struct kstat stat; 183 struct kstat stat;
184 int error = vfs_stat_fd(AT_FDCWD, filename, &stat); 184 int error;
185 185
186 if (!error) 186 error = vfs_stat(filename, &stat);
187 error = cp_compat_stat(&stat, statbuf); 187 if (error)
188 return error; 188 return error;
189 return cp_compat_stat(&stat, statbuf);
189} 190}
190 191
191asmlinkage long compat_sys_newlstat(char __user * filename, 192asmlinkage long compat_sys_newlstat(char __user * filename,
192 struct compat_stat __user *statbuf) 193 struct compat_stat __user *statbuf)
193{ 194{
194 struct kstat stat; 195 struct kstat stat;
195 int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); 196 int error;
196 197
197 if (!error) 198 error = vfs_lstat(filename, &stat);
198 error = cp_compat_stat(&stat, statbuf); 199 if (error)
199 return error; 200 return error;
201 return cp_compat_stat(&stat, statbuf);
200} 202}
201 203
202#ifndef __ARCH_WANT_STAT64 204#ifndef __ARCH_WANT_STAT64
@@ -204,21 +206,12 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename,
204 struct compat_stat __user *statbuf, int flag) 206 struct compat_stat __user *statbuf, int flag)
205{ 207{
206 struct kstat stat; 208 struct kstat stat;
207 int error = -EINVAL; 209 int error;
208
209 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
210 goto out;
211
212 if (flag & AT_SYMLINK_NOFOLLOW)
213 error = vfs_lstat_fd(dfd, filename, &stat);
214 else
215 error = vfs_stat_fd(dfd, filename, &stat);
216
217 if (!error)
218 error = cp_compat_stat(&stat, statbuf);
219 210
220out: 211 error = vfs_fstatat(dfd, filename, &stat, flag);
221 return error; 212 if (error)
213 return error;
214 return cp_compat_stat(&stat, statbuf);
222} 215}
223#endif 216#endif
224 217
@@ -1483,6 +1476,7 @@ int compat_do_execve(char * filename,
1483 struct linux_binprm *bprm; 1476 struct linux_binprm *bprm;
1484 struct file *file; 1477 struct file *file;
1485 struct files_struct *displaced; 1478 struct files_struct *displaced;
1479 bool clear_in_exec;
1486 int retval; 1480 int retval;
1487 1481
1488 retval = unshare_files(&displaced); 1482 retval = unshare_files(&displaced);
@@ -1505,8 +1499,9 @@ int compat_do_execve(char * filename,
1505 goto out_unlock; 1499 goto out_unlock;
1506 1500
1507 retval = check_unsafe_exec(bprm); 1501 retval = check_unsafe_exec(bprm);
1508 if (retval) 1502 if (retval < 0)
1509 goto out_unlock; 1503 goto out_unlock;
1504 clear_in_exec = retval;
1510 1505
1511 file = open_exec(filename); 1506 file = open_exec(filename);
1512 retval = PTR_ERR(file); 1507 retval = PTR_ERR(file);
@@ -1553,9 +1548,7 @@ int compat_do_execve(char * filename,
1553 goto out; 1548 goto out;
1554 1549
1555 /* execve succeeded */ 1550 /* execve succeeded */
1556 write_lock(&current->fs->lock);
1557 current->fs->in_exec = 0; 1551 current->fs->in_exec = 0;
1558 write_unlock(&current->fs->lock);
1559 current->in_execve = 0; 1552 current->in_execve = 0;
1560 mutex_unlock(&current->cred_exec_mutex); 1553 mutex_unlock(&current->cred_exec_mutex);
1561 acct_update_integrals(current); 1554 acct_update_integrals(current);
@@ -1575,9 +1568,8 @@ out_file:
1575 } 1568 }
1576 1569
1577out_unmark: 1570out_unmark:
1578 write_lock(&current->fs->lock); 1571 if (clear_in_exec)
1579 current->fs->in_exec = 0; 1572 current->fs->in_exec = 0;
1580 write_unlock(&current->fs->lock);
1581 1573
1582out_unlock: 1574out_unlock:
1583 current->in_execve = 0; 1575 current->in_execve = 0;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 3e87ce443ea2..b83f6bcfa51a 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -58,7 +58,6 @@
58#include <linux/i2c.h> 58#include <linux/i2c.h>
59#include <linux/i2c-dev.h> 59#include <linux/i2c-dev.h>
60#include <linux/atalk.h> 60#include <linux/atalk.h>
61#include <linux/loop.h>
62 61
63#include <net/bluetooth/bluetooth.h> 62#include <net/bluetooth/bluetooth.h>
64#include <net/bluetooth/hci.h> 63#include <net/bluetooth/hci.h>
@@ -68,6 +67,7 @@
68#include <linux/gigaset_dev.h> 67#include <linux/gigaset_dev.h>
69 68
70#ifdef CONFIG_BLOCK 69#ifdef CONFIG_BLOCK
70#include <linux/loop.h>
71#include <scsi/scsi.h> 71#include <scsi/scsi.h>
72#include <scsi/scsi_ioctl.h> 72#include <scsi/scsi_ioctl.h>
73#include <scsi/sg.h> 73#include <scsi/sg.h>
@@ -2660,6 +2660,8 @@ HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
2660HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl) 2660HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
2661/* block stuff */ 2661/* block stuff */
2662#ifdef CONFIG_BLOCK 2662#ifdef CONFIG_BLOCK
2663/* loop */
2664IGNORE_IOCTL(LOOP_CLR_FD)
2663/* Raw devices */ 2665/* Raw devices */
2664HANDLE_IOCTL(RAW_SETBIND, raw_ioctl) 2666HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
2665HANDLE_IOCTL(RAW_GETBIND, raw_ioctl) 2667HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
@@ -2728,9 +2730,6 @@ HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans)
2728IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32) 2730IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32)
2729IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32) 2731IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32)
2730 2732
2731/* loop */
2732IGNORE_IOCTL(LOOP_CLR_FD)
2733
2734#ifdef CONFIG_SPARC 2733#ifdef CONFIG_SPARC
2735/* Sparc framebuffers, handled in sbusfb_compat_ioctl() */ 2734/* Sparc framebuffers, handled in sbusfb_compat_ioctl() */
2736IGNORE_IOCTL(FBIOGTYPE) 2735IGNORE_IOCTL(FBIOGTYPE)
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 932a92b31483..c8afa6b1d91d 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -135,7 +135,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
135 struct path path; 135 struct path path;
136 struct configfs_dirent *sd; 136 struct configfs_dirent *sd;
137 struct config_item *parent_item; 137 struct config_item *parent_item;
138 struct config_item *target_item; 138 struct config_item *target_item = NULL;
139 struct config_item_type *type; 139 struct config_item_type *type;
140 140
141 ret = -EPERM; /* What lack-of-symlink returns */ 141 ret = -EPERM; /* What lack-of-symlink returns */
diff --git a/fs/dcache.c b/fs/dcache.c
index 761d30be2683..1fcffebfb44f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2149,7 +2149,6 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2149 int result; 2149 int result;
2150 unsigned long seq; 2150 unsigned long seq;
2151 2151
2152 /* FIXME: This is old behavior, needed? Please check callers. */
2153 if (new_dentry == old_dentry) 2152 if (new_dentry == old_dentry)
2154 return 1; 2153 return 1;
2155 2154
diff --git a/fs/direct-io.c b/fs/direct-io.c
index da258e7249cc..05763bbc2050 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -307,8 +307,6 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev,
307 struct bio *bio; 307 struct bio *bio;
308 308
309 bio = bio_alloc(GFP_KERNEL, nr_vecs); 309 bio = bio_alloc(GFP_KERNEL, nr_vecs);
310 if (bio == NULL)
311 return -ENOMEM;
312 310
313 bio->bi_bdev = bdev; 311 bio->bi_bdev = bdev;
314 bio->bi_sector = first_sector; 312 bio->bi_sector = first_sector;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 8b65f289ee00..b91851f1cda3 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -483,15 +483,7 @@ int ecryptfs_encrypt_page(struct page *page)
483 ecryptfs_inode = page->mapping->host; 483 ecryptfs_inode = page->mapping->host;
484 crypt_stat = 484 crypt_stat =
485 &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); 485 &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
486 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 486 BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
487 rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page,
488 0, PAGE_CACHE_SIZE);
489 if (rc)
490 printk(KERN_ERR "%s: Error attempting to copy "
491 "page at index [%ld]\n", __func__,
492 page->index);
493 goto out;
494 }
495 enc_extent_page = alloc_page(GFP_USER); 487 enc_extent_page = alloc_page(GFP_USER);
496 if (!enc_extent_page) { 488 if (!enc_extent_page) {
497 rc = -ENOMEM; 489 rc = -ENOMEM;
@@ -620,16 +612,7 @@ int ecryptfs_decrypt_page(struct page *page)
620 ecryptfs_inode = page->mapping->host; 612 ecryptfs_inode = page->mapping->host;
621 crypt_stat = 613 crypt_stat =
622 &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); 614 &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
623 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 615 BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
624 rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
625 PAGE_CACHE_SIZE,
626 ecryptfs_inode);
627 if (rc)
628 printk(KERN_ERR "%s: Error attempting to copy "
629 "page at index [%ld]\n", __func__,
630 page->index);
631 goto out;
632 }
633 enc_extent_page = alloc_page(GFP_USER); 616 enc_extent_page = alloc_page(GFP_USER);
634 if (!enc_extent_page) { 617 if (!enc_extent_page) {
635 rc = -ENOMEM; 618 rc = -ENOMEM;
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 064c5820e4e5..00b30a2d5466 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -269,6 +269,7 @@ struct ecryptfs_crypt_stat {
269#define ECRYPTFS_ENCRYPT_FILENAMES 0x00000800 269#define ECRYPTFS_ENCRYPT_FILENAMES 0x00000800
270#define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00001000 270#define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00001000
271#define ECRYPTFS_ENCFN_USE_FEK 0x00002000 271#define ECRYPTFS_ENCFN_USE_FEK 0x00002000
272#define ECRYPTFS_UNLINK_SIGS 0x00004000
272 u32 flags; 273 u32 flags;
273 unsigned int file_version; 274 unsigned int file_version;
274 size_t iv_bytes; 275 size_t iv_bytes;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 55b3145b8072..2f0945d63297 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -379,9 +379,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
379 goto out_d_drop; 379 goto out_d_drop;
380 } 380 }
381 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); 381 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
382 mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
382 lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, 383 lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
383 lower_dir_dentry, 384 lower_dir_dentry,
384 ecryptfs_dentry->d_name.len); 385 ecryptfs_dentry->d_name.len);
386 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
385 if (IS_ERR(lower_dentry)) { 387 if (IS_ERR(lower_dentry)) {
386 rc = PTR_ERR(lower_dentry); 388 rc = PTR_ERR(lower_dentry);
387 printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " 389 printk(KERN_ERR "%s: lookup_one_len() returned [%d] on "
@@ -406,9 +408,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
406 "filename; rc = [%d]\n", __func__, rc); 408 "filename; rc = [%d]\n", __func__, rc);
407 goto out_d_drop; 409 goto out_d_drop;
408 } 410 }
411 mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
409 lower_dentry = lookup_one_len(encrypted_and_encoded_name, 412 lower_dentry = lookup_one_len(encrypted_and_encoded_name,
410 lower_dir_dentry, 413 lower_dir_dentry,
411 encrypted_and_encoded_name_size - 1); 414 encrypted_and_encoded_name_size - 1);
415 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
412 if (IS_ERR(lower_dentry)) { 416 if (IS_ERR(lower_dentry)) {
413 rc = PTR_ERR(lower_dentry); 417 rc = PTR_ERR(lower_dentry);
414 printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " 418 printk(KERN_ERR "%s: lookup_one_len() returned [%d] on "
@@ -636,8 +640,9 @@ static int
636ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) 640ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
637{ 641{
638 char *lower_buf; 642 char *lower_buf;
643 size_t lower_bufsiz;
639 struct dentry *lower_dentry; 644 struct dentry *lower_dentry;
640 struct ecryptfs_crypt_stat *crypt_stat; 645 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
641 char *plaintext_name; 646 char *plaintext_name;
642 size_t plaintext_name_size; 647 size_t plaintext_name_size;
643 mm_segment_t old_fs; 648 mm_segment_t old_fs;
@@ -648,12 +653,21 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
648 rc = -EINVAL; 653 rc = -EINVAL;
649 goto out; 654 goto out;
650 } 655 }
651 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 656 mount_crypt_stat = &ecryptfs_superblock_to_private(
657 dentry->d_sb)->mount_crypt_stat;
658 /*
659 * If the lower filename is encrypted, it will result in a significantly
660 * longer name. If needed, truncate the name after decode and decrypt.
661 */
662 if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
663 lower_bufsiz = PATH_MAX;
664 else
665 lower_bufsiz = bufsiz;
652 /* Released in this function */ 666 /* Released in this function */
653 lower_buf = kmalloc(bufsiz, GFP_KERNEL); 667 lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL);
654 if (lower_buf == NULL) { 668 if (lower_buf == NULL) {
655 printk(KERN_ERR "%s: Out of memory whilst attempting to " 669 printk(KERN_ERR "%s: Out of memory whilst attempting to "
656 "kmalloc [%d] bytes\n", __func__, bufsiz); 670 "kmalloc [%zd] bytes\n", __func__, lower_bufsiz);
657 rc = -ENOMEM; 671 rc = -ENOMEM;
658 goto out; 672 goto out;
659 } 673 }
@@ -661,7 +675,7 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
661 set_fs(get_ds()); 675 set_fs(get_ds());
662 rc = lower_dentry->d_inode->i_op->readlink(lower_dentry, 676 rc = lower_dentry->d_inode->i_op->readlink(lower_dentry,
663 (char __user *)lower_buf, 677 (char __user *)lower_buf,
664 bufsiz); 678 lower_bufsiz);
665 set_fs(old_fs); 679 set_fs(old_fs);
666 if (rc >= 0) { 680 if (rc >= 0) {
667 rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name, 681 rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name,
@@ -674,7 +688,9 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
674 rc); 688 rc);
675 goto out_free_lower_buf; 689 goto out_free_lower_buf;
676 } 690 }
677 rc = copy_to_user(buf, plaintext_name, plaintext_name_size); 691 /* Check for bufsiz <= 0 done in sys_readlinkat() */
692 rc = copy_to_user(buf, plaintext_name,
693 min((size_t) bufsiz, plaintext_name_size));
678 if (rc) 694 if (rc)
679 rc = -EFAULT; 695 rc = -EFAULT;
680 else 696 else
@@ -814,6 +830,13 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
814 size_t num_zeros = (PAGE_CACHE_SIZE 830 size_t num_zeros = (PAGE_CACHE_SIZE
815 - (new_length & ~PAGE_CACHE_MASK)); 831 - (new_length & ~PAGE_CACHE_MASK));
816 832
833 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
834 rc = vmtruncate(inode, new_length);
835 if (rc)
836 goto out_free;
837 rc = vmtruncate(lower_dentry->d_inode, new_length);
838 goto out_free;
839 }
817 if (num_zeros) { 840 if (num_zeros) {
818 char *zeros_virt; 841 char *zeros_virt;
819 842
@@ -915,8 +938,6 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
915 } 938 }
916 rc = 0; 939 rc = 0;
917 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 940 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
918 mutex_unlock(&crypt_stat->cs_mutex);
919 goto out;
920 } 941 }
921 } 942 }
922 mutex_unlock(&crypt_stat->cs_mutex); 943 mutex_unlock(&crypt_stat->cs_mutex);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index aed56c25539b..ccabd5faa04d 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -190,14 +190,14 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
190 init_special_inode(inode, lower_inode->i_mode, 190 init_special_inode(inode, lower_inode->i_mode,
191 lower_inode->i_rdev); 191 lower_inode->i_rdev);
192 dentry->d_op = &ecryptfs_dops; 192 dentry->d_op = &ecryptfs_dops;
193 if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD)
194 d_add(dentry, inode);
195 else
196 d_instantiate(dentry, inode);
197 fsstack_copy_attr_all(inode, lower_inode, NULL); 193 fsstack_copy_attr_all(inode, lower_inode, NULL);
198 /* This size will be overwritten for real files w/ headers and 194 /* This size will be overwritten for real files w/ headers and
199 * other metadata */ 195 * other metadata */
200 fsstack_copy_inode_size(inode, lower_inode); 196 fsstack_copy_inode_size(inode, lower_inode);
197 if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD)
198 d_add(dentry, inode);
199 else
200 d_instantiate(dentry, inode);
201out: 201out:
202 return rc; 202 return rc;
203} 203}
@@ -208,7 +208,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
208 ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, 208 ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata,
209 ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig, 209 ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig,
210 ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes, 210 ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes,
211 ecryptfs_opt_err }; 211 ecryptfs_opt_unlink_sigs, ecryptfs_opt_err };
212 212
213static const match_table_t tokens = { 213static const match_table_t tokens = {
214 {ecryptfs_opt_sig, "sig=%s"}, 214 {ecryptfs_opt_sig, "sig=%s"},
@@ -222,6 +222,7 @@ static const match_table_t tokens = {
222 {ecryptfs_opt_fnek_sig, "ecryptfs_fnek_sig=%s"}, 222 {ecryptfs_opt_fnek_sig, "ecryptfs_fnek_sig=%s"},
223 {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"}, 223 {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"},
224 {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"}, 224 {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"},
225 {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"},
225 {ecryptfs_opt_err, NULL} 226 {ecryptfs_opt_err, NULL}
226}; 227};
227 228
@@ -402,6 +403,9 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
402 fn_cipher_key_bytes; 403 fn_cipher_key_bytes;
403 fn_cipher_key_bytes_set = 1; 404 fn_cipher_key_bytes_set = 1;
404 break; 405 break;
406 case ecryptfs_opt_unlink_sigs:
407 mount_crypt_stat->flags |= ECRYPTFS_UNLINK_SIGS;
408 break;
405 case ecryptfs_opt_err: 409 case ecryptfs_opt_err:
406 default: 410 default:
407 printk(KERN_WARNING 411 printk(KERN_WARNING
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 295e7fa56755..f1c17e87c5fb 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -133,45 +133,6 @@ out:
133 return rc; 133 return rc;
134} 134}
135 135
136static int
137ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type,
138 struct ecryptfs_msg_ctx **msg_ctx);
139
140/**
141 * ecryptfs_send_raw_message
142 * @msg_type: Message type
143 * @daemon: Daemon struct for recipient of message
144 *
145 * A raw message is one that does not include an ecryptfs_message
146 * struct. It simply has a type.
147 *
148 * Must be called with ecryptfs_daemon_hash_mux held.
149 *
150 * Returns zero on success; non-zero otherwise
151 */
152static int ecryptfs_send_raw_message(u8 msg_type,
153 struct ecryptfs_daemon *daemon)
154{
155 struct ecryptfs_msg_ctx *msg_ctx;
156 int rc;
157
158 rc = ecryptfs_send_message_locked(NULL, 0, msg_type, &msg_ctx);
159 if (rc) {
160 printk(KERN_ERR "%s: Error whilst attempting to send "
161 "message to ecryptfsd; rc = [%d]\n", __func__, rc);
162 goto out;
163 }
164 /* Raw messages are logically context-free (e.g., no
165 * reply is expected), so we set the state of the
166 * ecryptfs_msg_ctx object to indicate that it should
167 * be freed as soon as the message is sent. */
168 mutex_lock(&msg_ctx->mux);
169 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY;
170 mutex_unlock(&msg_ctx->mux);
171out:
172 return rc;
173}
174
175/** 136/**
176 * ecryptfs_spawn_daemon - Create and initialize a new daemon struct 137 * ecryptfs_spawn_daemon - Create and initialize a new daemon struct
177 * @daemon: Pointer to set to newly allocated daemon struct 138 * @daemon: Pointer to set to newly allocated daemon struct
@@ -212,49 +173,6 @@ out:
212} 173}
213 174
214/** 175/**
215 * ecryptfs_process_helo
216 * @euid: The user ID owner of the message
217 * @user_ns: The namespace in which @euid applies
218 * @pid: The process ID for the userspace program that sent the
219 * message
220 *
221 * Adds the euid and pid values to the daemon euid hash. If an euid
222 * already has a daemon pid registered, the daemon will be
223 * unregistered before the new daemon is put into the hash list.
224 * Returns zero after adding a new daemon to the hash list;
225 * non-zero otherwise.
226 */
227int ecryptfs_process_helo(uid_t euid, struct user_namespace *user_ns,
228 struct pid *pid)
229{
230 struct ecryptfs_daemon *new_daemon;
231 struct ecryptfs_daemon *old_daemon;
232 int rc;
233
234 mutex_lock(&ecryptfs_daemon_hash_mux);
235 rc = ecryptfs_find_daemon_by_euid(&old_daemon, euid, user_ns);
236 if (rc != 0) {
237 printk(KERN_WARNING "Received request from user [%d] "
238 "to register daemon [0x%p]; unregistering daemon "
239 "[0x%p]\n", euid, pid, old_daemon->pid);
240 rc = ecryptfs_send_raw_message(ECRYPTFS_MSG_QUIT, old_daemon);
241 if (rc)
242 printk(KERN_WARNING "Failed to send QUIT "
243 "message to daemon [0x%p]; rc = [%d]\n",
244 old_daemon->pid, rc);
245 hlist_del(&old_daemon->euid_chain);
246 kfree(old_daemon);
247 }
248 rc = ecryptfs_spawn_daemon(&new_daemon, euid, user_ns, pid);
249 if (rc)
250 printk(KERN_ERR "%s: The gods are displeased with this attempt "
251 "to create a new daemon object for euid [%d]; pid "
252 "[0x%p]; rc = [%d]\n", __func__, euid, pid, rc);
253 mutex_unlock(&ecryptfs_daemon_hash_mux);
254 return rc;
255}
256
257/**
258 * ecryptfs_exorcise_daemon - Destroy the daemon struct 176 * ecryptfs_exorcise_daemon - Destroy the daemon struct
259 * 177 *
260 * Must be called ceremoniously while in possession of 178 * Must be called ceremoniously while in possession of
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index a67fea655f49..4ec8f61ccf5a 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -193,26 +193,20 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
193 int rc = 0; 193 int rc = 0;
194 194
195 mutex_lock(&msg_ctx->mux); 195 mutex_lock(&msg_ctx->mux);
196 if (data) { 196 msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size),
197 msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size), 197 GFP_KERNEL);
198 GFP_KERNEL); 198 if (!msg_ctx->msg) {
199 if (!msg_ctx->msg) { 199 rc = -ENOMEM;
200 rc = -ENOMEM; 200 printk(KERN_ERR "%s: Out of memory whilst attempting "
201 printk(KERN_ERR "%s: Out of memory whilst attempting " 201 "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
202 "to kmalloc(%zd, GFP_KERNEL)\n", __func__, 202 (sizeof(*msg_ctx->msg) + data_size));
203 (sizeof(*msg_ctx->msg) + data_size)); 203 goto out_unlock;
204 goto out_unlock; 204 }
205 }
206 } else
207 msg_ctx->msg = NULL;
208 msg_ctx->msg->index = msg_ctx->index; 205 msg_ctx->msg->index = msg_ctx->index;
209 msg_ctx->msg->data_len = data_size; 206 msg_ctx->msg->data_len = data_size;
210 msg_ctx->type = msg_type; 207 msg_ctx->type = msg_type;
211 if (data) { 208 memcpy(msg_ctx->msg->data, data, data_size);
212 memcpy(msg_ctx->msg->data, data, data_size); 209 msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
213 msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
214 } else
215 msg_ctx->msg_size = 0;
216 mutex_lock(&daemon->mux); 210 mutex_lock(&daemon->mux);
217 list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue); 211 list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
218 daemon->num_queued_msg_ctx++; 212 daemon->num_queued_msg_ctx++;
@@ -418,18 +412,13 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
418 412
419 if (count == 0) 413 if (count == 0)
420 goto out; 414 goto out;
421 data = kmalloc(count, GFP_KERNEL); 415
422 if (!data) { 416 data = memdup_user(buf, count);
423 printk(KERN_ERR "%s: Out of memory whilst attempting to " 417 if (IS_ERR(data)) {
424 "kmalloc([%zd], GFP_KERNEL)\n", __func__, count); 418 printk(KERN_ERR "%s: memdup_user returned error [%ld]\n",
419 __func__, PTR_ERR(data));
425 goto out; 420 goto out;
426 } 421 }
427 rc = copy_from_user(data, buf, count);
428 if (rc) {
429 printk(KERN_ERR "%s: copy_from_user returned error [%d]\n",
430 __func__, rc);
431 goto out_free;
432 }
433 sz = count; 422 sz = count;
434 i = 0; 423 i = 0;
435 switch (data[i++]) { 424 switch (data[i++]) {
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 46cec2b69796..5c6bab9786e3 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -449,6 +449,7 @@ int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode)
449 struct ecryptfs_crypt_stat *crypt_stat; 449 struct ecryptfs_crypt_stat *crypt_stat;
450 450
451 crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; 451 crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
452 BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
452 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) 453 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
453 return ecryptfs_write_inode_size_to_xattr(ecryptfs_inode); 454 return ecryptfs_write_inode_size_to_xattr(ecryptfs_inode);
454 else 455 else
@@ -490,6 +491,16 @@ static int ecryptfs_write_end(struct file *file,
490 ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); 491 ecryptfs_printk(KERN_DEBUG, "Not a new file\n");
491 ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" 492 ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
492 "(page w/ index = [0x%.16x], to = [%d])\n", index, to); 493 "(page w/ index = [0x%.16x], to = [%d])\n", index, to);
494 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
495 rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page, 0,
496 to);
497 if (!rc) {
498 rc = copied;
499 fsstack_copy_inode_size(ecryptfs_inode,
500 ecryptfs_inode_to_lower(ecryptfs_inode));
501 }
502 goto out;
503 }
493 /* Fills in zeros if 'to' goes beyond inode size */ 504 /* Fills in zeros if 'to' goes beyond inode size */
494 rc = fill_zeros_to_end_of_page(page, to); 505 rc = fill_zeros_to_end_of_page(page, to);
495 if (rc) { 506 if (rc) {
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 75c2ea9fee35..a137c6ea2fee 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -117,13 +117,15 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
117 size_t size) 117 size_t size)
118{ 118{
119 struct page *ecryptfs_page; 119 struct page *ecryptfs_page;
120 struct ecryptfs_crypt_stat *crypt_stat;
121 struct inode *ecryptfs_inode = ecryptfs_file->f_dentry->d_inode;
120 char *ecryptfs_page_virt; 122 char *ecryptfs_page_virt;
121 loff_t ecryptfs_file_size = 123 loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode);
122 i_size_read(ecryptfs_file->f_dentry->d_inode);
123 loff_t data_offset = 0; 124 loff_t data_offset = 0;
124 loff_t pos; 125 loff_t pos;
125 int rc = 0; 126 int rc = 0;
126 127
128 crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
127 /* 129 /*
128 * if we are writing beyond current size, then start pos 130 * if we are writing beyond current size, then start pos
129 * at the current size - we'll fill in zeros from there. 131 * at the current size - we'll fill in zeros from there.
@@ -184,7 +186,13 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
184 flush_dcache_page(ecryptfs_page); 186 flush_dcache_page(ecryptfs_page);
185 SetPageUptodate(ecryptfs_page); 187 SetPageUptodate(ecryptfs_page);
186 unlock_page(ecryptfs_page); 188 unlock_page(ecryptfs_page);
187 rc = ecryptfs_encrypt_page(ecryptfs_page); 189 if (crypt_stat->flags & ECRYPTFS_ENCRYPTED)
190 rc = ecryptfs_encrypt_page(ecryptfs_page);
191 else
192 rc = ecryptfs_write_lower_page_segment(ecryptfs_inode,
193 ecryptfs_page,
194 start_offset_in_page,
195 data_offset);
188 page_cache_release(ecryptfs_page); 196 page_cache_release(ecryptfs_page);
189 if (rc) { 197 if (rc) {
190 printk(KERN_ERR "%s: Error encrypting " 198 printk(KERN_ERR "%s: Error encrypting "
@@ -194,14 +202,16 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
194 pos += num_bytes; 202 pos += num_bytes;
195 } 203 }
196 if ((offset + size) > ecryptfs_file_size) { 204 if ((offset + size) > ecryptfs_file_size) {
197 i_size_write(ecryptfs_file->f_dentry->d_inode, (offset + size)); 205 i_size_write(ecryptfs_inode, (offset + size));
198 rc = ecryptfs_write_inode_size_to_metadata( 206 if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) {
199 ecryptfs_file->f_dentry->d_inode); 207 rc = ecryptfs_write_inode_size_to_metadata(
200 if (rc) { 208 ecryptfs_inode);
201 printk(KERN_ERR "Problem with " 209 if (rc) {
202 "ecryptfs_write_inode_size_to_metadata; " 210 printk(KERN_ERR "Problem with "
203 "rc = [%d]\n", rc); 211 "ecryptfs_write_inode_size_to_metadata; "
204 goto out; 212 "rc = [%d]\n", rc);
213 goto out;
214 }
205 } 215 }
206 } 216 }
207out: 217out:
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index c27ac2b358a1..fa4c7e7d15d9 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -170,7 +170,10 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
170 list_for_each_entry(walker, 170 list_for_each_entry(walker,
171 &mount_crypt_stat->global_auth_tok_list, 171 &mount_crypt_stat->global_auth_tok_list,
172 mount_crypt_stat_list) { 172 mount_crypt_stat_list) {
173 seq_printf(m, ",ecryptfs_sig=%s", walker->sig); 173 if (walker->flags & ECRYPTFS_AUTH_TOK_FNEK)
174 seq_printf(m, ",ecryptfs_fnek_sig=%s", walker->sig);
175 else
176 seq_printf(m, ",ecryptfs_sig=%s", walker->sig);
174 } 177 }
175 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); 178 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
176 179
@@ -186,6 +189,8 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
186 seq_printf(m, ",ecryptfs_xattr_metadata"); 189 seq_printf(m, ",ecryptfs_xattr_metadata");
187 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) 190 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
188 seq_printf(m, ",ecryptfs_encrypted_view"); 191 seq_printf(m, ",ecryptfs_encrypted_view");
192 if (mount_crypt_stat->flags & ECRYPTFS_UNLINK_SIGS)
193 seq_printf(m, ",ecryptfs_unlink_sigs");
189 194
190 return 0; 195 return 0;
191} 196}
diff --git a/fs/exec.c b/fs/exec.c
index 052a961e41aa..639177b0eeac 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -69,17 +69,18 @@ int suid_dumpable = 0;
69static LIST_HEAD(formats); 69static LIST_HEAD(formats);
70static DEFINE_RWLOCK(binfmt_lock); 70static DEFINE_RWLOCK(binfmt_lock);
71 71
72int register_binfmt(struct linux_binfmt * fmt) 72int __register_binfmt(struct linux_binfmt * fmt, int insert)
73{ 73{
74 if (!fmt) 74 if (!fmt)
75 return -EINVAL; 75 return -EINVAL;
76 write_lock(&binfmt_lock); 76 write_lock(&binfmt_lock);
77 list_add(&fmt->lh, &formats); 77 insert ? list_add(&fmt->lh, &formats) :
78 list_add_tail(&fmt->lh, &formats);
78 write_unlock(&binfmt_lock); 79 write_unlock(&binfmt_lock);
79 return 0; 80 return 0;
80} 81}
81 82
82EXPORT_SYMBOL(register_binfmt); 83EXPORT_SYMBOL(__register_binfmt);
83 84
84void unregister_binfmt(struct linux_binfmt * fmt) 85void unregister_binfmt(struct linux_binfmt * fmt)
85{ 86{
@@ -1060,7 +1061,6 @@ EXPORT_SYMBOL(install_exec_creds);
1060int check_unsafe_exec(struct linux_binprm *bprm) 1061int check_unsafe_exec(struct linux_binprm *bprm)
1061{ 1062{
1062 struct task_struct *p = current, *t; 1063 struct task_struct *p = current, *t;
1063 unsigned long flags;
1064 unsigned n_fs; 1064 unsigned n_fs;
1065 int res = 0; 1065 int res = 0;
1066 1066
@@ -1068,21 +1068,22 @@ int check_unsafe_exec(struct linux_binprm *bprm)
1068 1068
1069 n_fs = 1; 1069 n_fs = 1;
1070 write_lock(&p->fs->lock); 1070 write_lock(&p->fs->lock);
1071 lock_task_sighand(p, &flags); 1071 rcu_read_lock();
1072 for (t = next_thread(p); t != p; t = next_thread(t)) { 1072 for (t = next_thread(p); t != p; t = next_thread(t)) {
1073 if (t->fs == p->fs) 1073 if (t->fs == p->fs)
1074 n_fs++; 1074 n_fs++;
1075 } 1075 }
1076 rcu_read_unlock();
1076 1077
1077 if (p->fs->users > n_fs) { 1078 if (p->fs->users > n_fs) {
1078 bprm->unsafe |= LSM_UNSAFE_SHARE; 1079 bprm->unsafe |= LSM_UNSAFE_SHARE;
1079 } else { 1080 } else {
1080 if (p->fs->in_exec) 1081 res = -EAGAIN;
1081 res = -EAGAIN; 1082 if (!p->fs->in_exec) {
1082 p->fs->in_exec = 1; 1083 p->fs->in_exec = 1;
1084 res = 1;
1085 }
1083 } 1086 }
1084
1085 unlock_task_sighand(p, &flags);
1086 write_unlock(&p->fs->lock); 1087 write_unlock(&p->fs->lock);
1087 1088
1088 return res; 1089 return res;
@@ -1284,6 +1285,7 @@ int do_execve(char * filename,
1284 struct linux_binprm *bprm; 1285 struct linux_binprm *bprm;
1285 struct file *file; 1286 struct file *file;
1286 struct files_struct *displaced; 1287 struct files_struct *displaced;
1288 bool clear_in_exec;
1287 int retval; 1289 int retval;
1288 1290
1289 retval = unshare_files(&displaced); 1291 retval = unshare_files(&displaced);
@@ -1306,8 +1308,9 @@ int do_execve(char * filename,
1306 goto out_unlock; 1308 goto out_unlock;
1307 1309
1308 retval = check_unsafe_exec(bprm); 1310 retval = check_unsafe_exec(bprm);
1309 if (retval) 1311 if (retval < 0)
1310 goto out_unlock; 1312 goto out_unlock;
1313 clear_in_exec = retval;
1311 1314
1312 file = open_exec(filename); 1315 file = open_exec(filename);
1313 retval = PTR_ERR(file); 1316 retval = PTR_ERR(file);
@@ -1355,9 +1358,7 @@ int do_execve(char * filename,
1355 goto out; 1358 goto out;
1356 1359
1357 /* execve succeeded */ 1360 /* execve succeeded */
1358 write_lock(&current->fs->lock);
1359 current->fs->in_exec = 0; 1361 current->fs->in_exec = 0;
1360 write_unlock(&current->fs->lock);
1361 current->in_execve = 0; 1362 current->in_execve = 0;
1362 mutex_unlock(&current->cred_exec_mutex); 1363 mutex_unlock(&current->cred_exec_mutex);
1363 acct_update_integrals(current); 1364 acct_update_integrals(current);
@@ -1377,9 +1378,8 @@ out_file:
1377 } 1378 }
1378 1379
1379out_unmark: 1380out_unmark:
1380 write_lock(&current->fs->lock); 1381 if (clear_in_exec)
1381 current->fs->in_exec = 0; 1382 current->fs->in_exec = 0;
1382 write_unlock(&current->fs->lock);
1383 1383
1384out_unlock: 1384out_unlock:
1385 current->in_execve = 0; 1385 current->in_execve = 0;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index b43b95563663..acf678831103 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -590,9 +590,8 @@ static int ext2_get_blocks(struct inode *inode,
590 590
591 if (depth == 0) 591 if (depth == 0)
592 return (err); 592 return (err);
593reread:
594 partial = ext2_get_branch(inode, depth, offsets, chain, &err);
595 593
594 partial = ext2_get_branch(inode, depth, offsets, chain, &err);
596 /* Simplest case - block found, no allocation needed */ 595 /* Simplest case - block found, no allocation needed */
597 if (!partial) { 596 if (!partial) {
598 first_block = le32_to_cpu(chain[depth - 1].key); 597 first_block = le32_to_cpu(chain[depth - 1].key);
@@ -602,15 +601,16 @@ reread:
602 while (count < maxblocks && count <= blocks_to_boundary) { 601 while (count < maxblocks && count <= blocks_to_boundary) {
603 ext2_fsblk_t blk; 602 ext2_fsblk_t blk;
604 603
605 if (!verify_chain(chain, partial)) { 604 if (!verify_chain(chain, chain + depth - 1)) {
606 /* 605 /*
607 * Indirect block might be removed by 606 * Indirect block might be removed by
608 * truncate while we were reading it. 607 * truncate while we were reading it.
609 * Handling of that case: forget what we've 608 * Handling of that case: forget what we've
610 * got now, go to reread. 609 * got now, go to reread.
611 */ 610 */
611 err = -EAGAIN;
612 count = 0; 612 count = 0;
613 goto changed; 613 break;
614 } 614 }
615 blk = le32_to_cpu(*(chain[depth-1].p + count)); 615 blk = le32_to_cpu(*(chain[depth-1].p + count));
616 if (blk == first_block + count) 616 if (blk == first_block + count)
@@ -618,7 +618,8 @@ reread:
618 else 618 else
619 break; 619 break;
620 } 620 }
621 goto got_it; 621 if (err != -EAGAIN)
622 goto got_it;
622 } 623 }
623 624
624 /* Next simple case - plain lookup or failed read of indirect block */ 625 /* Next simple case - plain lookup or failed read of indirect block */
@@ -626,6 +627,33 @@ reread:
626 goto cleanup; 627 goto cleanup;
627 628
628 mutex_lock(&ei->truncate_mutex); 629 mutex_lock(&ei->truncate_mutex);
630 /*
631 * If the indirect block is missing while we are reading
632 * the chain(ext3_get_branch() returns -EAGAIN err), or
633 * if the chain has been changed after we grab the semaphore,
634 * (either because another process truncated this branch, or
635 * another get_block allocated this branch) re-grab the chain to see if
636 * the request block has been allocated or not.
637 *
638 * Since we already block the truncate/other get_block
639 * at this point, we will have the current copy of the chain when we
640 * splice the branch into the tree.
641 */
642 if (err == -EAGAIN || !verify_chain(chain, partial)) {
643 while (partial > chain) {
644 brelse(partial->bh);
645 partial--;
646 }
647 partial = ext2_get_branch(inode, depth, offsets, chain, &err);
648 if (!partial) {
649 count++;
650 mutex_unlock(&ei->truncate_mutex);
651 if (err)
652 goto cleanup;
653 clear_buffer_new(bh_result);
654 goto got_it;
655 }
656 }
629 657
630 /* 658 /*
631 * Okay, we need to do block allocation. Lazily initialize the block 659 * Okay, we need to do block allocation. Lazily initialize the block
@@ -683,12 +711,6 @@ cleanup:
683 partial--; 711 partial--;
684 } 712 }
685 return err; 713 return err;
686changed:
687 while (partial > chain) {
688 brelse(partial->bh);
689 partial--;
690 }
691 goto reread;
692} 714}
693 715
694int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) 716int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index f983225266dc..5c4afe652245 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1395,8 +1395,10 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
1395 blk++; 1395 blk++;
1396 } 1396 }
1397out: 1397out:
1398 if (len == towrite) 1398 if (len == towrite) {
1399 mutex_unlock(&inode->i_mutex);
1399 return err; 1400 return err;
1401 }
1400 if (inode->i_size < off+len-towrite) 1402 if (inode->i_size < off+len-towrite)
1401 i_size_write(inode, off+len-towrite); 1403 i_size_write(inode, off+len-towrite);
1402 inode->i_version++; 1404 inode->i_version++;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 466a332e0bd1..fcfa24361856 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1521,12 +1521,16 @@ static int ext3_ordered_writepage(struct page *page,
1521 if (!page_has_buffers(page)) { 1521 if (!page_has_buffers(page)) {
1522 create_empty_buffers(page, inode->i_sb->s_blocksize, 1522 create_empty_buffers(page, inode->i_sb->s_blocksize,
1523 (1 << BH_Dirty)|(1 << BH_Uptodate)); 1523 (1 << BH_Dirty)|(1 << BH_Uptodate));
1524 } else if (!walk_page_buffers(NULL, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { 1524 page_bufs = page_buffers(page);
1525 /* Provide NULL instead of get_block so that we catch bugs if buffers weren't really mapped */ 1525 } else {
1526 return block_write_full_page(page, NULL, wbc); 1526 page_bufs = page_buffers(page);
1527 if (!walk_page_buffers(NULL, page_bufs, 0, PAGE_CACHE_SIZE,
1528 NULL, buffer_unmapped)) {
1529 /* Provide NULL get_block() to catch bugs if buffers
1530 * weren't really mapped */
1531 return block_write_full_page(page, NULL, wbc);
1532 }
1527 } 1533 }
1528 page_bufs = page_buffers(page);
1529
1530 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); 1534 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
1531 1535
1532 if (IS_ERR(handle)) { 1536 if (IS_ERR(handle)) {
@@ -1581,6 +1585,15 @@ static int ext3_writeback_writepage(struct page *page,
1581 if (ext3_journal_current_handle()) 1585 if (ext3_journal_current_handle())
1582 goto out_fail; 1586 goto out_fail;
1583 1587
1588 if (page_has_buffers(page)) {
1589 if (!walk_page_buffers(NULL, page_buffers(page), 0,
1590 PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
1591 /* Provide NULL get_block() to catch bugs if buffers
1592 * weren't really mapped */
1593 return block_write_full_page(page, NULL, wbc);
1594 }
1595 }
1596
1584 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); 1597 handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
1585 if (IS_ERR(handle)) { 1598 if (IS_ERR(handle)) {
1586 ret = PTR_ERR(handle); 1599 ret = PTR_ERR(handle);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ac77d8b8251d..e40332158340 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -326,11 +326,14 @@ ext4_ext_max_entries(struct inode *inode, int depth)
326 326
327static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) 327static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
328{ 328{
329 ext4_fsblk_t block = ext_pblock(ext); 329 ext4_fsblk_t block = ext_pblock(ext), valid_block;
330 int len = ext4_ext_get_actual_len(ext); 330 int len = ext4_ext_get_actual_len(ext);
331 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; 331 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
332 if (unlikely(block < le32_to_cpu(es->s_first_data_block) || 332
333 ((block + len) > ext4_blocks_count(es)))) 333 valid_block = le32_to_cpu(es->s_first_data_block) +
334 EXT4_SB(inode->i_sb)->s_gdb_count;
335 if (unlikely(block <= valid_block ||
336 ((block + len) > ext4_blocks_count(es))))
334 return 0; 337 return 0;
335 else 338 else
336 return 1; 339 return 1;
@@ -339,10 +342,13 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
339static int ext4_valid_extent_idx(struct inode *inode, 342static int ext4_valid_extent_idx(struct inode *inode,
340 struct ext4_extent_idx *ext_idx) 343 struct ext4_extent_idx *ext_idx)
341{ 344{
342 ext4_fsblk_t block = idx_pblock(ext_idx); 345 ext4_fsblk_t block = idx_pblock(ext_idx), valid_block;
343 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; 346 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
344 if (unlikely(block < le32_to_cpu(es->s_first_data_block) || 347
345 (block > ext4_blocks_count(es)))) 348 valid_block = le32_to_cpu(es->s_first_data_block) +
349 EXT4_SB(inode->i_sb)->s_gdb_count;
350 if (unlikely(block <= valid_block ||
351 (block >= ext4_blocks_count(es))))
346 return 0; 352 return 0;
347 else 353 else
348 return 1; 354 return 1;
@@ -2416,8 +2422,6 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2416 len = ee_len; 2422 len = ee_len;
2417 2423
2418 bio = bio_alloc(GFP_NOIO, len); 2424 bio = bio_alloc(GFP_NOIO, len);
2419 if (!bio)
2420 return -ENOMEM;
2421 bio->bi_sector = ee_pblock; 2425 bio->bi_sector = ee_pblock;
2422 bio->bi_bdev = inode->i_sb->s_bdev; 2426 bio->bi_bdev = inode->i_sb->s_bdev;
2423 2427
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 47b84e8df568..f18e0a08a6b5 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -585,6 +585,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
585fallback: 585fallback:
586 ngroups = sbi->s_groups_count; 586 ngroups = sbi->s_groups_count;
587 avefreei = freei / ngroups; 587 avefreei = freei / ngroups;
588fallback_retry:
588 parent_group = EXT4_I(parent)->i_block_group; 589 parent_group = EXT4_I(parent)->i_block_group;
589 for (i = 0; i < ngroups; i++) { 590 for (i = 0; i < ngroups; i++) {
590 grp = (parent_group + i) % ngroups; 591 grp = (parent_group + i) % ngroups;
@@ -602,7 +603,7 @@ fallback:
602 * filesystems the above test can fail to find any blockgroups 603 * filesystems the above test can fail to find any blockgroups
603 */ 604 */
604 avefreei = 0; 605 avefreei = 0;
605 goto fallback; 606 goto fallback_retry;
606 } 607 }
607 608
608 return -1; 609 return -1;
@@ -831,11 +832,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
831 ret2 = find_group_flex(sb, dir, &group); 832 ret2 = find_group_flex(sb, dir, &group);
832 if (ret2 == -1) { 833 if (ret2 == -1) {
833 ret2 = find_group_other(sb, dir, &group, mode); 834 ret2 = find_group_other(sb, dir, &group, mode);
834 if (ret2 == 0 && once) 835 if (ret2 == 0 && once) {
835 once = 0; 836 once = 0;
836 printk(KERN_NOTICE "ext4: find_group_flex " 837 printk(KERN_NOTICE "ext4: find_group_flex "
837 "failed, fallback succeeded dir %lu\n", 838 "failed, fallback succeeded dir %lu\n",
838 dir->i_ino); 839 dir->i_ino);
840 }
839 } 841 }
840 goto got_group; 842 goto got_group;
841 } 843 }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a2e7952bc5f9..e91f978c7f12 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -372,16 +372,16 @@ static int ext4_block_to_path(struct inode *inode,
372} 372}
373 373
374static int __ext4_check_blockref(const char *function, struct inode *inode, 374static int __ext4_check_blockref(const char *function, struct inode *inode,
375 unsigned int *p, unsigned int max) { 375 __le32 *p, unsigned int max) {
376 376
377 unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es); 377 unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es);
378 unsigned int *bref = p; 378 __le32 *bref = p;
379 while (bref < p+max) { 379 while (bref < p+max) {
380 if (unlikely(*bref >= maxblocks)) { 380 if (unlikely(le32_to_cpu(*bref) >= maxblocks)) {
381 ext4_error(inode->i_sb, function, 381 ext4_error(inode->i_sb, function,
382 "block reference %u >= max (%u) " 382 "block reference %u >= max (%u) "
383 "in inode #%lu, offset=%d", 383 "in inode #%lu, offset=%d",
384 *bref, maxblocks, 384 le32_to_cpu(*bref), maxblocks,
385 inode->i_ino, (int)(bref-p)); 385 inode->i_ino, (int)(bref-p));
386 return -EIO; 386 return -EIO;
387 } 387 }
@@ -4357,11 +4357,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4357 ei->i_flags = le32_to_cpu(raw_inode->i_flags); 4357 ei->i_flags = le32_to_cpu(raw_inode->i_flags);
4358 inode->i_blocks = ext4_inode_blocks(raw_inode, ei); 4358 inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
4359 ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); 4359 ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
4360 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 4360 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))
4361 cpu_to_le32(EXT4_OS_HURD)) {
4362 ei->i_file_acl |= 4361 ei->i_file_acl |=
4363 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; 4362 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
4364 }
4365 inode->i_size = ext4_isize(raw_inode); 4363 inode->i_size = ext4_isize(raw_inode);
4366 ei->i_disksize = inode->i_size; 4364 ei->i_disksize = inode->i_size;
4367 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 4365 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
@@ -4409,9 +4407,23 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4409 (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; 4407 (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
4410 } 4408 }
4411 4409
4412 if (ei->i_flags & EXT4_EXTENTS_FL) { 4410 ret = 0;
4413 /* Validate extent which is part of inode */ 4411 if (ei->i_file_acl &&
4414 ret = ext4_ext_check_inode(inode); 4412 ((ei->i_file_acl <
4413 (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
4414 EXT4_SB(sb)->s_gdb_count)) ||
4415 (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
4416 ext4_error(sb, __func__,
4417 "bad extended attribute block %llu in inode #%lu",
4418 ei->i_file_acl, inode->i_ino);
4419 ret = -EIO;
4420 goto bad_inode;
4421 } else if (ei->i_flags & EXT4_EXTENTS_FL) {
4422 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
4423 (S_ISLNK(inode->i_mode) &&
4424 !ext4_inode_is_fast_symlink(inode)))
4425 /* Validate extent which is part of inode */
4426 ret = ext4_ext_check_inode(inode);
4415 } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 4427 } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
4416 (S_ISLNK(inode->i_mode) && 4428 (S_ISLNK(inode->i_mode) &&
4417 !ext4_inode_is_fast_symlink(inode))) { 4429 !ext4_inode_is_fast_symlink(inode))) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9987bba99db3..2958f4e6f222 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2508,6 +2508,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2508 if (EXT4_BLOCKS_PER_GROUP(sb) == 0) 2508 if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
2509 goto cantfind_ext4; 2509 goto cantfind_ext4;
2510 2510
2511 /* check blocks count against device size */
2512 blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2513 if (blocks_count && ext4_blocks_count(es) > blocks_count) {
2514 printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu "
2515 "exceeds size of device (%llu blocks)\n",
2516 ext4_blocks_count(es), blocks_count);
2517 goto failed_mount;
2518 }
2519
2511 /* 2520 /*
2512 * It makes no sense for the first data block to be beyond the end 2521 * It makes no sense for the first data block to be beyond the end
2513 * of the filesystem. 2522 * of the filesystem.
diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig
index d0a69ff25375..182f9ffe2b51 100644
--- a/fs/fat/Kconfig
+++ b/fs/fat/Kconfig
@@ -95,3 +95,6 @@ config FAT_DEFAULT_IOCHARSET
95 Note that "utf8" is not recommended for FAT filesystems. 95 Note that "utf8" is not recommended for FAT filesystems.
96 If unsure, you shouldn't set "utf8" here. 96 If unsure, you shouldn't set "utf8" here.
97 See <file:Documentation/filesystems/vfat.txt> for more information. 97 See <file:Documentation/filesystems/vfat.txt> for more information.
98
99 Enable any character sets you need in File Systems/Native Language
100 Support.
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 1aa70260e6d1..a24c58e181db 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -199,7 +199,7 @@ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2)
199 return retval; 199 return retval;
200} 200}
201 201
202int get_filesystem_list(char * buf) 202int __init get_filesystem_list(char *buf)
203{ 203{
204 int len = 0; 204 int len = 0;
205 struct file_system_type * tmp; 205 struct file_system_type * tmp;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2b25133524a3..06f30e965676 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -938,9 +938,9 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
938} 938}
939 939
940static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, 940static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
941 unsigned *nbytesp, int write) 941 size_t *nbytesp, int write)
942{ 942{
943 unsigned nbytes = *nbytesp; 943 size_t nbytes = *nbytesp;
944 unsigned long user_addr = (unsigned long) buf; 944 unsigned long user_addr = (unsigned long) buf;
945 unsigned offset = user_addr & ~PAGE_MASK; 945 unsigned offset = user_addr & ~PAGE_MASK;
946 int npages; 946 int npages;
@@ -955,7 +955,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
955 return 0; 955 return 0;
956 } 956 }
957 957
958 nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); 958 nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
959 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; 959 npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
960 npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); 960 npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
961 down_read(&current->mm->mmap_sem); 961 down_read(&current->mm->mmap_sem);
@@ -1298,6 +1298,8 @@ static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma)
1298 if (vma->vm_flags & VM_MAYSHARE) 1298 if (vma->vm_flags & VM_MAYSHARE)
1299 return -ENODEV; 1299 return -ENODEV;
1300 1300
1301 invalidate_inode_pages2(file->f_mapping);
1302
1301 return generic_file_mmap(file, vma); 1303 return generic_file_mmap(file, vma);
1302} 1304}
1303 1305
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3984e47d1d33..1afd9f26bcb1 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -597,7 +597,6 @@ __acquires(&gl->gl_spin)
597 597
598 GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); 598 GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
599 599
600 down_read(&gfs2_umount_flush_sem);
601 if (test_bit(GLF_DEMOTE, &gl->gl_flags) && 600 if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
602 gl->gl_demote_state != gl->gl_state) { 601 gl->gl_demote_state != gl->gl_state) {
603 if (find_first_holder(gl)) 602 if (find_first_holder(gl))
@@ -614,15 +613,14 @@ __acquires(&gl->gl_spin)
614 if (ret == 0) 613 if (ret == 0)
615 goto out_unlock; 614 goto out_unlock;
616 if (ret == 2) 615 if (ret == 2)
617 goto out_sem; 616 goto out;
618 gh = find_first_waiter(gl); 617 gh = find_first_waiter(gl);
619 gl->gl_target = gh->gh_state; 618 gl->gl_target = gh->gh_state;
620 if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) 619 if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
621 do_error(gl, 0); /* Fail queued try locks */ 620 do_error(gl, 0); /* Fail queued try locks */
622 } 621 }
623 do_xmote(gl, gh, gl->gl_target); 622 do_xmote(gl, gh, gl->gl_target);
624out_sem: 623out:
625 up_read(&gfs2_umount_flush_sem);
626 return; 624 return;
627 625
628out_sched: 626out_sched:
@@ -631,7 +629,7 @@ out_sched:
631 gfs2_glock_put(gl); 629 gfs2_glock_put(gl);
632out_unlock: 630out_unlock:
633 clear_bit(GLF_LOCK, &gl->gl_flags); 631 clear_bit(GLF_LOCK, &gl->gl_flags);
634 goto out_sem; 632 goto out;
635} 633}
636 634
637static void glock_work_func(struct work_struct *work) 635static void glock_work_func(struct work_struct *work)
@@ -641,6 +639,7 @@ static void glock_work_func(struct work_struct *work)
641 639
642 if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) 640 if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags))
643 finish_xmote(gl, gl->gl_reply); 641 finish_xmote(gl, gl->gl_reply);
642 down_read(&gfs2_umount_flush_sem);
644 spin_lock(&gl->gl_spin); 643 spin_lock(&gl->gl_spin);
645 if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 644 if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
646 gl->gl_state != LM_ST_UNLOCKED && 645 gl->gl_state != LM_ST_UNLOCKED &&
@@ -653,6 +652,7 @@ static void glock_work_func(struct work_struct *work)
653 } 652 }
654 run_queue(gl, 0); 653 run_queue(gl, 0);
655 spin_unlock(&gl->gl_spin); 654 spin_unlock(&gl->gl_spin);
655 up_read(&gfs2_umount_flush_sem);
656 if (!delay || 656 if (!delay ||
657 queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) 657 queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
658 gfs2_glock_put(gl); 658 gfs2_glock_put(gl);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index bf23a62aa925..70f87f43afa2 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -156,6 +156,12 @@ static void inode_go_sync(struct gfs2_glock *gl)
156 error = filemap_fdatawait(metamapping); 156 error = filemap_fdatawait(metamapping);
157 mapping_set_error(metamapping, error); 157 mapping_set_error(metamapping, error);
158 gfs2_ail_empty_gl(gl); 158 gfs2_ail_empty_gl(gl);
159 /*
160 * Writeback of the data mapping may cause the dirty flag to be set
161 * so we have to clear it again here.
162 */
163 smp_mb__before_clear_bit();
164 clear_bit(GLF_DIRTY, &gl->gl_flags);
159} 165}
160 166
161/** 167/**
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7b277d449155..5a31d426116f 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -137,15 +137,15 @@ void gfs2_set_iop(struct inode *inode)
137 if (S_ISREG(mode)) { 137 if (S_ISREG(mode)) {
138 inode->i_op = &gfs2_file_iops; 138 inode->i_op = &gfs2_file_iops;
139 if (gfs2_localflocks(sdp)) 139 if (gfs2_localflocks(sdp))
140 inode->i_fop = gfs2_file_fops_nolock; 140 inode->i_fop = &gfs2_file_fops_nolock;
141 else 141 else
142 inode->i_fop = gfs2_file_fops; 142 inode->i_fop = &gfs2_file_fops;
143 } else if (S_ISDIR(mode)) { 143 } else if (S_ISDIR(mode)) {
144 inode->i_op = &gfs2_dir_iops; 144 inode->i_op = &gfs2_dir_iops;
145 if (gfs2_localflocks(sdp)) 145 if (gfs2_localflocks(sdp))
146 inode->i_fop = gfs2_dir_fops_nolock; 146 inode->i_fop = &gfs2_dir_fops_nolock;
147 else 147 else
148 inode->i_fop = gfs2_dir_fops; 148 inode->i_fop = &gfs2_dir_fops;
149 } else if (S_ISLNK(mode)) { 149 } else if (S_ISLNK(mode)) {
150 inode->i_op = &gfs2_symlink_iops; 150 inode->i_op = &gfs2_symlink_iops;
151 } else { 151 } else {
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index dca4fee3078b..c30be2b66580 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -101,21 +101,23 @@ void gfs2_dinode_print(const struct gfs2_inode *ip);
101extern const struct inode_operations gfs2_file_iops; 101extern const struct inode_operations gfs2_file_iops;
102extern const struct inode_operations gfs2_dir_iops; 102extern const struct inode_operations gfs2_dir_iops;
103extern const struct inode_operations gfs2_symlink_iops; 103extern const struct inode_operations gfs2_symlink_iops;
104extern const struct file_operations *gfs2_file_fops_nolock; 104extern const struct file_operations gfs2_file_fops_nolock;
105extern const struct file_operations *gfs2_dir_fops_nolock; 105extern const struct file_operations gfs2_dir_fops_nolock;
106 106
107extern void gfs2_set_inode_flags(struct inode *inode); 107extern void gfs2_set_inode_flags(struct inode *inode);
108 108
109#ifdef CONFIG_GFS2_FS_LOCKING_DLM 109#ifdef CONFIG_GFS2_FS_LOCKING_DLM
110extern const struct file_operations *gfs2_file_fops; 110extern const struct file_operations gfs2_file_fops;
111extern const struct file_operations *gfs2_dir_fops; 111extern const struct file_operations gfs2_dir_fops;
112
112static inline int gfs2_localflocks(const struct gfs2_sbd *sdp) 113static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
113{ 114{
114 return sdp->sd_args.ar_localflocks; 115 return sdp->sd_args.ar_localflocks;
115} 116}
116#else /* Single node only */ 117#else /* Single node only */
117#define gfs2_file_fops NULL 118#define gfs2_file_fops gfs2_file_fops_nolock
118#define gfs2_dir_fops NULL 119#define gfs2_dir_fops gfs2_dir_fops_nolock
120
119static inline int gfs2_localflocks(const struct gfs2_sbd *sdp) 121static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
120{ 122{
121 return 1; 123 return 1;
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 70b9b8548945..5d82e91887e3 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -413,7 +413,9 @@ out_unlock:
413 gfs2_glock_dq(&gh); 413 gfs2_glock_dq(&gh);
414out: 414out:
415 gfs2_holder_uninit(&gh); 415 gfs2_holder_uninit(&gh);
416 if (ret) 416 if (ret == -ENOMEM)
417 ret = VM_FAULT_OOM;
418 else if (ret)
417 ret = VM_FAULT_SIGBUS; 419 ret = VM_FAULT_SIGBUS;
418 return ret; 420 return ret;
419} 421}
@@ -705,7 +707,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
705 } 707 }
706} 708}
707 709
708const struct file_operations *gfs2_file_fops = &(const struct file_operations){ 710const struct file_operations gfs2_file_fops = {
709 .llseek = gfs2_llseek, 711 .llseek = gfs2_llseek,
710 .read = do_sync_read, 712 .read = do_sync_read,
711 .aio_read = generic_file_aio_read, 713 .aio_read = generic_file_aio_read,
@@ -723,7 +725,7 @@ const struct file_operations *gfs2_file_fops = &(const struct file_operations){
723 .setlease = gfs2_setlease, 725 .setlease = gfs2_setlease,
724}; 726};
725 727
726const struct file_operations *gfs2_dir_fops = &(const struct file_operations){ 728const struct file_operations gfs2_dir_fops = {
727 .readdir = gfs2_readdir, 729 .readdir = gfs2_readdir,
728 .unlocked_ioctl = gfs2_ioctl, 730 .unlocked_ioctl = gfs2_ioctl,
729 .open = gfs2_open, 731 .open = gfs2_open,
@@ -735,7 +737,7 @@ const struct file_operations *gfs2_dir_fops = &(const struct file_operations){
735 737
736#endif /* CONFIG_GFS2_FS_LOCKING_DLM */ 738#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
737 739
738const struct file_operations *gfs2_file_fops_nolock = &(const struct file_operations){ 740const struct file_operations gfs2_file_fops_nolock = {
739 .llseek = gfs2_llseek, 741 .llseek = gfs2_llseek,
740 .read = do_sync_read, 742 .read = do_sync_read,
741 .aio_read = generic_file_aio_read, 743 .aio_read = generic_file_aio_read,
@@ -751,7 +753,7 @@ const struct file_operations *gfs2_file_fops_nolock = &(const struct file_operat
751 .setlease = generic_setlease, 753 .setlease = generic_setlease,
752}; 754};
753 755
754const struct file_operations *gfs2_dir_fops_nolock = &(const struct file_operations){ 756const struct file_operations gfs2_dir_fops_nolock = {
755 .readdir = gfs2_readdir, 757 .readdir = gfs2_readdir,
756 .unlocked_ioctl = gfs2_ioctl, 758 .unlocked_ioctl = gfs2_ioctl,
757 .open = gfs2_open, 759 .open = gfs2_open,
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 51883b3ad89c..650a730707b7 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -272,11 +272,6 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
272 lock_page(page); 272 lock_page(page);
273 273
274 bio = bio_alloc(GFP_NOFS, 1); 274 bio = bio_alloc(GFP_NOFS, 1);
275 if (unlikely(!bio)) {
276 __free_page(page);
277 return -ENOBUFS;
278 }
279
280 bio->bi_sector = sector * (sb->s_blocksize >> 9); 275 bio->bi_sector = sector * (sb->s_blocksize >> 9);
281 bio->bi_bdev = sb->s_bdev; 276 bio->bi_bdev = sb->s_bdev;
282 bio_add_page(bio, page, PAGE_SIZE, 0); 277 bio_add_page(bio, page, PAGE_SIZE, 0);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index abd5429ae285..1c70fa5168d6 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -371,6 +371,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
371 ip = ghs[1].gh_gl->gl_object; 371 ip = ghs[1].gh_gl->gl_object;
372 372
373 ip->i_disksize = size; 373 ip->i_disksize = size;
374 i_size_write(inode, size);
374 375
375 error = gfs2_meta_inode_buffer(ip, &dibh); 376 error = gfs2_meta_inode_buffer(ip, &dibh);
376 377
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 8d53f66b5bcc..152e6c4a0dca 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -81,7 +81,7 @@ struct gfs2_quota_change_host {
81 81
82static LIST_HEAD(qd_lru_list); 82static LIST_HEAD(qd_lru_list);
83static atomic_t qd_lru_count = ATOMIC_INIT(0); 83static atomic_t qd_lru_count = ATOMIC_INIT(0);
84static spinlock_t qd_lru_lock = SPIN_LOCK_UNLOCKED; 84static DEFINE_SPINLOCK(qd_lru_lock);
85 85
86int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask) 86int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask)
87{ 87{
@@ -1364,7 +1364,7 @@ int gfs2_quotad(void *data)
1364 refrigerator(); 1364 refrigerator();
1365 t = min(quotad_timeo, statfs_timeo); 1365 t = min(quotad_timeo, statfs_timeo);
1366 1366
1367 prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_UNINTERRUPTIBLE); 1367 prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE);
1368 spin_lock(&sdp->sd_trunc_lock); 1368 spin_lock(&sdp->sd_trunc_lock);
1369 empty = list_empty(&sdp->sd_trunc_list); 1369 empty = list_empty(&sdp->sd_trunc_list);
1370 spin_unlock(&sdp->sd_trunc_lock); 1370 spin_unlock(&sdp->sd_trunc_lock);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index f03d024038ea..565038243fa2 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -212,8 +212,7 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
212 if (tmp == 0) 212 if (tmp == 0)
213 return BFITNOENT; 213 return BFITNOENT;
214 ptr--; 214 ptr--;
215 bit = fls64(tmp); 215 bit = __ffs64(tmp);
216 bit--; /* fls64 always adds one to the bit count */
217 bit /= 2; /* two bits per entry in the bitmap */ 216 bit /= 2; /* two bits per entry in the bitmap */
218 return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit; 217 return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
219} 218}
@@ -1445,10 +1444,12 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1445u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) 1444u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
1446{ 1445{
1447 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1446 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1447 struct buffer_head *dibh;
1448 struct gfs2_alloc *al = ip->i_alloc; 1448 struct gfs2_alloc *al = ip->i_alloc;
1449 struct gfs2_rgrpd *rgd = al->al_rgd; 1449 struct gfs2_rgrpd *rgd = al->al_rgd;
1450 u32 goal, blk; 1450 u32 goal, blk;
1451 u64 block; 1451 u64 block;
1452 int error;
1452 1453
1453 if (rgrp_contains_block(rgd, ip->i_goal)) 1454 if (rgrp_contains_block(rgd, ip->i_goal))
1454 goal = ip->i_goal - rgd->rd_data0; 1455 goal = ip->i_goal - rgd->rd_data0;
@@ -1461,7 +1462,13 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
1461 rgd->rd_last_alloc = blk; 1462 rgd->rd_last_alloc = blk;
1462 block = rgd->rd_data0 + blk; 1463 block = rgd->rd_data0 + blk;
1463 ip->i_goal = block; 1464 ip->i_goal = block;
1464 1465 error = gfs2_meta_inode_buffer(ip, &dibh);
1466 if (error == 0) {
1467 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
1468 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1469 di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal);
1470 brelse(dibh);
1471 }
1465 gfs2_assert_withdraw(sdp, rgd->rd_free >= *n); 1472 gfs2_assert_withdraw(sdp, rgd->rd_free >= *n);
1466 rgd->rd_free -= *n; 1473 rgd->rd_free -= *n;
1467 1474
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9435dda8f1e0..a1cbff2b4d99 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -70,6 +70,10 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
70 BUG(); 70 BUG();
71 return 0; 71 return 0;
72 } 72 }
73
74 if (!tree)
75 return 0;
76
73 if (tree->node_size >= PAGE_CACHE_SIZE) { 77 if (tree->node_size >= PAGE_CACHE_SIZE) {
74 nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT); 78 nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT);
75 spin_lock(&tree->hash_lock); 79 spin_lock(&tree->hash_lock);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 36ca2e1a4fa3..7b6165f25fbe 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -349,6 +349,7 @@ void hfs_mdb_put(struct super_block *sb)
349 if (HFS_SB(sb)->nls_disk) 349 if (HFS_SB(sb)->nls_disk)
350 unload_nls(HFS_SB(sb)->nls_disk); 350 unload_nls(HFS_SB(sb)->nls_disk);
351 351
352 free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0);
352 kfree(HFS_SB(sb)); 353 kfree(HFS_SB(sb));
353 sb->s_fs_info = NULL; 354 sb->s_fs_info = NULL;
354} 355}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 23a3c76711e0..153d9681192b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -26,7 +26,6 @@
26#include <linux/pagevec.h> 26#include <linux/pagevec.h>
27#include <linux/parser.h> 27#include <linux/parser.h>
28#include <linux/mman.h> 28#include <linux/mman.h>
29#include <linux/quotaops.h>
30#include <linux/slab.h> 29#include <linux/slab.h>
31#include <linux/dnotify.h> 30#include <linux/dnotify.h>
32#include <linux/statfs.h> 31#include <linux/statfs.h>
@@ -842,7 +841,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
842bad_val: 841bad_val:
843 printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n", 842 printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n",
844 args[0].from, p); 843 args[0].from, p);
845 return 1; 844 return -EINVAL;
846} 845}
847 846
848static int 847static int
diff --git a/fs/inode.c b/fs/inode.c
index d06d6d268de9..6ad14a1cd8c9 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1470,42 +1470,6 @@ static void __wait_on_freeing_inode(struct inode *inode)
1470 spin_lock(&inode_lock); 1470 spin_lock(&inode_lock);
1471} 1471}
1472 1472
1473/*
1474 * We rarely want to lock two inodes that do not have a parent/child
1475 * relationship (such as directory, child inode) simultaneously. The
1476 * vast majority of file systems should be able to get along fine
1477 * without this. Do not use these functions except as a last resort.
1478 */
1479void inode_double_lock(struct inode *inode1, struct inode *inode2)
1480{
1481 if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
1482 if (inode1)
1483 mutex_lock(&inode1->i_mutex);
1484 else if (inode2)
1485 mutex_lock(&inode2->i_mutex);
1486 return;
1487 }
1488
1489 if (inode1 < inode2) {
1490 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
1491 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
1492 } else {
1493 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
1494 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
1495 }
1496}
1497EXPORT_SYMBOL(inode_double_lock);
1498
1499void inode_double_unlock(struct inode *inode1, struct inode *inode2)
1500{
1501 if (inode1)
1502 mutex_unlock(&inode1->i_mutex);
1503
1504 if (inode2 && inode2 != inode1)
1505 mutex_unlock(&inode2->i_mutex);
1506}
1507EXPORT_SYMBOL(inode_double_unlock);
1508
1509static __initdata unsigned long ihash_entries; 1473static __initdata unsigned long ihash_entries;
1510static int __init set_ihash_entries(char *str) 1474static int __init set_ihash_entries(char *str)
1511{ 1475{
diff --git a/fs/ioctl.c b/fs/ioctl.c
index ac2d47e43926..82d9c42b8bac 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -258,7 +258,7 @@ int __generic_block_fiemap(struct inode *inode,
258 long long length = 0, map_len = 0; 258 long long length = 0, map_len = 0;
259 u64 logical = 0, phys = 0, size = 0; 259 u64 logical = 0, phys = 0, size = 0;
260 u32 flags = FIEMAP_EXTENT_MERGED; 260 u32 flags = FIEMAP_EXTENT_MERGED;
261 int ret = 0; 261 int ret = 0, past_eof = 0, whole_file = 0;
262 262
263 if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))) 263 if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC)))
264 return ret; 264 return ret;
@@ -266,6 +266,9 @@ int __generic_block_fiemap(struct inode *inode,
266 start_blk = logical_to_blk(inode, start); 266 start_blk = logical_to_blk(inode, start);
267 267
268 length = (long long)min_t(u64, len, i_size_read(inode)); 268 length = (long long)min_t(u64, len, i_size_read(inode));
269 if (length < len)
270 whole_file = 1;
271
269 map_len = length; 272 map_len = length;
270 273
271 do { 274 do {
@@ -282,11 +285,26 @@ int __generic_block_fiemap(struct inode *inode,
282 285
283 /* HOLE */ 286 /* HOLE */
284 if (!buffer_mapped(&tmp)) { 287 if (!buffer_mapped(&tmp)) {
288 length -= blk_to_logical(inode, 1);
289 start_blk++;
290
291 /*
292 * we want to handle the case where there is an
293 * allocated block at the front of the file, and then
294 * nothing but holes up to the end of the file properly,
295 * to make sure that extent at the front gets properly
296 * marked with FIEMAP_EXTENT_LAST
297 */
298 if (!past_eof &&
299 blk_to_logical(inode, start_blk) >=
300 blk_to_logical(inode, 0)+i_size_read(inode))
301 past_eof = 1;
302
285 /* 303 /*
286 * first hole after going past the EOF, this is our 304 * first hole after going past the EOF, this is our
287 * last extent 305 * last extent
288 */ 306 */
289 if (length <= 0) { 307 if (past_eof && size) {
290 flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST; 308 flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST;
291 ret = fiemap_fill_next_extent(fieinfo, logical, 309 ret = fiemap_fill_next_extent(fieinfo, logical,
292 phys, size, 310 phys, size,
@@ -294,15 +312,37 @@ int __generic_block_fiemap(struct inode *inode,
294 break; 312 break;
295 } 313 }
296 314
297 length -= blk_to_logical(inode, 1);
298
299 /* if we have holes up to/past EOF then we're done */ 315 /* if we have holes up to/past EOF then we're done */
300 if (length <= 0) 316 if (length <= 0 || past_eof)
301 break; 317 break;
302
303 start_blk++;
304 } else { 318 } else {
305 if (length <= 0 && size) { 319 /*
320 * we have gone over the length of what we wanted to
321 * map, and it wasn't the entire file, so add the extent
322 * we got last time and exit.
323 *
324 * This is for the case where say we want to map all the
325 * way up to the second to the last block in a file, but
326 * the last block is a hole, making the second to last
327 * block FIEMAP_EXTENT_LAST. In this case we want to
328 * see if there is a hole after the second to last block
329 * so we can mark it properly. If we found data after
330 * we exceeded the length we were requesting, then we
331 * are good to go, just add the extent to the fieinfo
332 * and break
333 */
334 if (length <= 0 && !whole_file) {
335 ret = fiemap_fill_next_extent(fieinfo, logical,
336 phys, size,
337 flags);
338 break;
339 }
340
341 /*
342 * if size != 0 then we know we already have an extent
343 * to add, so add it.
344 */
345 if (size) {
306 ret = fiemap_fill_next_extent(fieinfo, logical, 346 ret = fiemap_fill_next_extent(fieinfo, logical,
307 phys, size, 347 phys, size,
308 flags); 348 flags);
@@ -319,19 +359,14 @@ int __generic_block_fiemap(struct inode *inode,
319 start_blk += logical_to_blk(inode, size); 359 start_blk += logical_to_blk(inode, size);
320 360
321 /* 361 /*
322 * if we are past the EOF we need to loop again to see 362 * If we are past the EOF, then we need to make sure as
323 * if there is a hole so we can mark this extent as the 363 * soon as we find a hole that the last extent we found
324 * last one, and if not keep mapping things until we 364 * is marked with FIEMAP_EXTENT_LAST
325 * find a hole, or we run out of slots in the extent
326 * array
327 */ 365 */
328 if (length <= 0) 366 if (!past_eof &&
329 continue; 367 logical+size >=
330 368 blk_to_logical(inode, 0)+i_size_read(inode))
331 ret = fiemap_fill_next_extent(fieinfo, logical, phys, 369 past_eof = 1;
332 size, flags);
333 if (ret)
334 break;
335 } 370 }
336 cond_resched(); 371 cond_resched();
337 } while (1); 372 } while (1);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index a8e8513a78a9..06560c520f49 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -502,7 +502,7 @@ void journal_commit_transaction(journal_t *journal)
502 err = 0; 502 err = 0;
503 } 503 }
504 504
505 journal_write_revoke_records(journal, commit_transaction); 505 journal_write_revoke_records(journal, commit_transaction, write_op);
506 506
507 /* 507 /*
508 * If we found any dirty or locked buffers, then we should have 508 * If we found any dirty or locked buffers, then we should have
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index c7bd649bbbdc..da6cd9bdaabc 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -55,6 +55,25 @@
55 * need do nothing. 55 * need do nothing.
56 * RevokeValid set, Revoked set: 56 * RevokeValid set, Revoked set:
57 * buffer has been revoked. 57 * buffer has been revoked.
58 *
59 * Locking rules:
60 * We keep two hash tables of revoke records. One hashtable belongs to the
61 * running transaction (is pointed to by journal->j_revoke), the other one
62 * belongs to the committing transaction. Accesses to the second hash table
63 * happen only from the kjournald and no other thread touches this table. Also
64 * journal_switch_revoke_table() which switches which hashtable belongs to the
65 * running and which to the committing transaction is called only from
66 * kjournald. Therefore we need no locks when accessing the hashtable belonging
67 * to the committing transaction.
68 *
69 * All users operating on the hash table belonging to the running transaction
70 * have a handle to the transaction. Therefore they are safe from kjournald
71 * switching hash tables under them. For operations on the lists of entries in
72 * the hash table j_revoke_lock is used.
73 *
74 * Finally, also replay code uses the hash tables but at this moment noone else
75 * can touch them (filesystem isn't mounted yet) and hence no locking is
76 * needed.
58 */ 77 */
59 78
60#ifndef __KERNEL__ 79#ifndef __KERNEL__
@@ -67,6 +86,7 @@
67#include <linux/slab.h> 86#include <linux/slab.h>
68#include <linux/list.h> 87#include <linux/list.h>
69#include <linux/init.h> 88#include <linux/init.h>
89#include <linux/bio.h>
70#endif 90#endif
71#include <linux/log2.h> 91#include <linux/log2.h>
72 92
@@ -99,8 +119,8 @@ struct jbd_revoke_table_s
99#ifdef __KERNEL__ 119#ifdef __KERNEL__
100static void write_one_revoke_record(journal_t *, transaction_t *, 120static void write_one_revoke_record(journal_t *, transaction_t *,
101 struct journal_head **, int *, 121 struct journal_head **, int *,
102 struct jbd_revoke_record_s *); 122 struct jbd_revoke_record_s *, int);
103static void flush_descriptor(journal_t *, struct journal_head *, int); 123static void flush_descriptor(journal_t *, struct journal_head *, int, int);
104#endif 124#endif
105 125
106/* Utility functions to maintain the revoke table */ 126/* Utility functions to maintain the revoke table */
@@ -402,8 +422,6 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
402 * the second time we would still have a pending revoke to cancel. So, 422 * the second time we would still have a pending revoke to cancel. So,
403 * do not trust the Revoked bit on buffers unless RevokeValid is also 423 * do not trust the Revoked bit on buffers unless RevokeValid is also
404 * set. 424 * set.
405 *
406 * The caller must have the journal locked.
407 */ 425 */
408int journal_cancel_revoke(handle_t *handle, struct journal_head *jh) 426int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
409{ 427{
@@ -481,12 +499,9 @@ void journal_switch_revoke_table(journal_t *journal)
481/* 499/*
482 * Write revoke records to the journal for all entries in the current 500 * Write revoke records to the journal for all entries in the current
483 * revoke hash, deleting the entries as we go. 501 * revoke hash, deleting the entries as we go.
484 *
485 * Called with the journal lock held.
486 */ 502 */
487
488void journal_write_revoke_records(journal_t *journal, 503void journal_write_revoke_records(journal_t *journal,
489 transaction_t *transaction) 504 transaction_t *transaction, int write_op)
490{ 505{
491 struct journal_head *descriptor; 506 struct journal_head *descriptor;
492 struct jbd_revoke_record_s *record; 507 struct jbd_revoke_record_s *record;
@@ -510,14 +525,14 @@ void journal_write_revoke_records(journal_t *journal,
510 hash_list->next; 525 hash_list->next;
511 write_one_revoke_record(journal, transaction, 526 write_one_revoke_record(journal, transaction,
512 &descriptor, &offset, 527 &descriptor, &offset,
513 record); 528 record, write_op);
514 count++; 529 count++;
515 list_del(&record->hash); 530 list_del(&record->hash);
516 kmem_cache_free(revoke_record_cache, record); 531 kmem_cache_free(revoke_record_cache, record);
517 } 532 }
518 } 533 }
519 if (descriptor) 534 if (descriptor)
520 flush_descriptor(journal, descriptor, offset); 535 flush_descriptor(journal, descriptor, offset, write_op);
521 jbd_debug(1, "Wrote %d revoke records\n", count); 536 jbd_debug(1, "Wrote %d revoke records\n", count);
522} 537}
523 538
@@ -530,7 +545,8 @@ static void write_one_revoke_record(journal_t *journal,
530 transaction_t *transaction, 545 transaction_t *transaction,
531 struct journal_head **descriptorp, 546 struct journal_head **descriptorp,
532 int *offsetp, 547 int *offsetp,
533 struct jbd_revoke_record_s *record) 548 struct jbd_revoke_record_s *record,
549 int write_op)
534{ 550{
535 struct journal_head *descriptor; 551 struct journal_head *descriptor;
536 int offset; 552 int offset;
@@ -549,7 +565,7 @@ static void write_one_revoke_record(journal_t *journal,
549 /* Make sure we have a descriptor with space left for the record */ 565 /* Make sure we have a descriptor with space left for the record */
550 if (descriptor) { 566 if (descriptor) {
551 if (offset == journal->j_blocksize) { 567 if (offset == journal->j_blocksize) {
552 flush_descriptor(journal, descriptor, offset); 568 flush_descriptor(journal, descriptor, offset, write_op);
553 descriptor = NULL; 569 descriptor = NULL;
554 } 570 }
555 } 571 }
@@ -586,7 +602,7 @@ static void write_one_revoke_record(journal_t *journal,
586 602
587static void flush_descriptor(journal_t *journal, 603static void flush_descriptor(journal_t *journal,
588 struct journal_head *descriptor, 604 struct journal_head *descriptor,
589 int offset) 605 int offset, int write_op)
590{ 606{
591 journal_revoke_header_t *header; 607 journal_revoke_header_t *header;
592 struct buffer_head *bh = jh2bh(descriptor); 608 struct buffer_head *bh = jh2bh(descriptor);
@@ -601,7 +617,7 @@ static void flush_descriptor(journal_t *journal,
601 set_buffer_jwrite(bh); 617 set_buffer_jwrite(bh);
602 BUFFER_TRACE(bh, "write"); 618 BUFFER_TRACE(bh, "write");
603 set_buffer_dirty(bh); 619 set_buffer_dirty(bh);
604 ll_rw_block(SWRITE, 1, &bh); 620 ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh);
605} 621}
606#endif 622#endif
607 623
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 073c8c3df7cd..0b7d3b8226fd 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -506,7 +506,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
506 if (err) 506 if (err)
507 jbd2_journal_abort(journal, err); 507 jbd2_journal_abort(journal, err);
508 508
509 jbd2_journal_write_revoke_records(journal, commit_transaction); 509 jbd2_journal_write_revoke_records(journal, commit_transaction,
510 write_op);
510 511
511 jbd_debug(3, "JBD: commit phase 2\n"); 512 jbd_debug(3, "JBD: commit phase 2\n");
512 513
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index bbe6d592d8b3..a360b06af2e3 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -86,6 +86,7 @@
86#include <linux/slab.h> 86#include <linux/slab.h>
87#include <linux/list.h> 87#include <linux/list.h>
88#include <linux/init.h> 88#include <linux/init.h>
89#include <linux/bio.h>
89#endif 90#endif
90#include <linux/log2.h> 91#include <linux/log2.h>
91 92
@@ -118,8 +119,8 @@ struct jbd2_revoke_table_s
118#ifdef __KERNEL__ 119#ifdef __KERNEL__
119static void write_one_revoke_record(journal_t *, transaction_t *, 120static void write_one_revoke_record(journal_t *, transaction_t *,
120 struct journal_head **, int *, 121 struct journal_head **, int *,
121 struct jbd2_revoke_record_s *); 122 struct jbd2_revoke_record_s *, int);
122static void flush_descriptor(journal_t *, struct journal_head *, int); 123static void flush_descriptor(journal_t *, struct journal_head *, int, int);
123#endif 124#endif
124 125
125/* Utility functions to maintain the revoke table */ 126/* Utility functions to maintain the revoke table */
@@ -499,7 +500,8 @@ void jbd2_journal_switch_revoke_table(journal_t *journal)
499 * revoke hash, deleting the entries as we go. 500 * revoke hash, deleting the entries as we go.
500 */ 501 */
501void jbd2_journal_write_revoke_records(journal_t *journal, 502void jbd2_journal_write_revoke_records(journal_t *journal,
502 transaction_t *transaction) 503 transaction_t *transaction,
504 int write_op)
503{ 505{
504 struct journal_head *descriptor; 506 struct journal_head *descriptor;
505 struct jbd2_revoke_record_s *record; 507 struct jbd2_revoke_record_s *record;
@@ -523,14 +525,14 @@ void jbd2_journal_write_revoke_records(journal_t *journal,
523 hash_list->next; 525 hash_list->next;
524 write_one_revoke_record(journal, transaction, 526 write_one_revoke_record(journal, transaction,
525 &descriptor, &offset, 527 &descriptor, &offset,
526 record); 528 record, write_op);
527 count++; 529 count++;
528 list_del(&record->hash); 530 list_del(&record->hash);
529 kmem_cache_free(jbd2_revoke_record_cache, record); 531 kmem_cache_free(jbd2_revoke_record_cache, record);
530 } 532 }
531 } 533 }
532 if (descriptor) 534 if (descriptor)
533 flush_descriptor(journal, descriptor, offset); 535 flush_descriptor(journal, descriptor, offset, write_op);
534 jbd_debug(1, "Wrote %d revoke records\n", count); 536 jbd_debug(1, "Wrote %d revoke records\n", count);
535} 537}
536 538
@@ -543,7 +545,8 @@ static void write_one_revoke_record(journal_t *journal,
543 transaction_t *transaction, 545 transaction_t *transaction,
544 struct journal_head **descriptorp, 546 struct journal_head **descriptorp,
545 int *offsetp, 547 int *offsetp,
546 struct jbd2_revoke_record_s *record) 548 struct jbd2_revoke_record_s *record,
549 int write_op)
547{ 550{
548 struct journal_head *descriptor; 551 struct journal_head *descriptor;
549 int offset; 552 int offset;
@@ -562,7 +565,7 @@ static void write_one_revoke_record(journal_t *journal,
562 /* Make sure we have a descriptor with space left for the record */ 565 /* Make sure we have a descriptor with space left for the record */
563 if (descriptor) { 566 if (descriptor) {
564 if (offset == journal->j_blocksize) { 567 if (offset == journal->j_blocksize) {
565 flush_descriptor(journal, descriptor, offset); 568 flush_descriptor(journal, descriptor, offset, write_op);
566 descriptor = NULL; 569 descriptor = NULL;
567 } 570 }
568 } 571 }
@@ -607,7 +610,7 @@ static void write_one_revoke_record(journal_t *journal,
607 610
608static void flush_descriptor(journal_t *journal, 611static void flush_descriptor(journal_t *journal,
609 struct journal_head *descriptor, 612 struct journal_head *descriptor,
610 int offset) 613 int offset, int write_op)
611{ 614{
612 jbd2_journal_revoke_header_t *header; 615 jbd2_journal_revoke_header_t *header;
613 struct buffer_head *bh = jh2bh(descriptor); 616 struct buffer_head *bh = jh2bh(descriptor);
@@ -622,7 +625,7 @@ static void flush_descriptor(journal_t *journal,
622 set_buffer_jwrite(bh); 625 set_buffer_jwrite(bh);
623 BUFFER_TRACE(bh, "write"); 626 BUFFER_TRACE(bh, "write");
624 set_buffer_dirty(bh); 627 set_buffer_dirty(bh);
625 ll_rw_block(SWRITE, 1, &bh); 628 ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh);
626} 629}
627#endif 630#endif
628 631
diff --git a/fs/namei.c b/fs/namei.c
index b8433ebfae05..78f253cd2d4f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1248,6 +1248,8 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1248 int err; 1248 int err;
1249 struct qstr this; 1249 struct qstr this;
1250 1250
1251 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1252
1251 err = __lookup_one_len(name, &this, base, len); 1253 err = __lookup_one_len(name, &this, base, len);
1252 if (err) 1254 if (err)
1253 return ERR_PTR(err); 1255 return ERR_PTR(err);
diff --git a/fs/namespace.c b/fs/namespace.c
index c6f54e4c4290..41196209a906 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1377,7 +1377,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1377 if (parent_path) { 1377 if (parent_path) {
1378 detach_mnt(source_mnt, parent_path); 1378 detach_mnt(source_mnt, parent_path);
1379 attach_mnt(source_mnt, path); 1379 attach_mnt(source_mnt, path);
1380 touch_mnt_namespace(current->nsproxy->mnt_ns); 1380 touch_mnt_namespace(parent_path->mnt->mnt_ns);
1381 } else { 1381 } else {
1382 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); 1382 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
1383 commit_tree(source_mnt); 1383 commit_tree(source_mnt);
@@ -1920,8 +1920,9 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1920 if (data_page) 1920 if (data_page)
1921 ((char *)data_page)[PAGE_SIZE - 1] = 0; 1921 ((char *)data_page)[PAGE_SIZE - 1] = 0;
1922 1922
1923 /* Default to relatime */ 1923 /* Default to relatime unless overriden */
1924 mnt_flags |= MNT_RELATIME; 1924 if (!(flags & MS_NOATIME))
1925 mnt_flags |= MNT_RELATIME;
1925 1926
1926 /* Separate the per-mountpoint flags */ 1927 /* Separate the per-mountpoint flags */
1927 if (flags & MS_NOSUID) 1928 if (flags & MS_NOSUID)
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index f54360f50a9c..fa038df63ac8 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -660,13 +660,10 @@ outrel:
660 if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN) 660 if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN)
661 return -ENOMEM; 661 return -ENOMEM;
662 if (user.object_name_len) { 662 if (user.object_name_len) {
663 newname = kmalloc(user.object_name_len, GFP_USER); 663 newname = memdup_user(user.object_name,
664 if (!newname) 664 user.object_name_len);
665 return -ENOMEM; 665 if (IS_ERR(newname))
666 if (copy_from_user(newname, user.object_name, user.object_name_len)) { 666 return PTR_ERR(newname);
667 kfree(newname);
668 return -EFAULT;
669 }
670 } else { 667 } else {
671 newname = NULL; 668 newname = NULL;
672 } 669 }
@@ -760,13 +757,9 @@ outrel:
760 if (user.len > NCP_PRIVATE_DATA_MAX_LEN) 757 if (user.len > NCP_PRIVATE_DATA_MAX_LEN)
761 return -ENOMEM; 758 return -ENOMEM;
762 if (user.len) { 759 if (user.len) {
763 new = kmalloc(user.len, GFP_USER); 760 new = memdup_user(user.data, user.len);
764 if (!new) 761 if (IS_ERR(new))
765 return -ENOMEM; 762 return PTR_ERR(new);
766 if (copy_from_user(new, user.data, user.len)) {
767 kfree(new);
768 return -EFAULT;
769 }
770 } else { 763 } else {
771 new = NULL; 764 new = NULL;
772 } 765 }
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 5a97bcfe03e5..ec7e27d00bc6 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -517,10 +517,10 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
517 517
518 ret = nfs_updatepage(filp, page, 0, pagelen); 518 ret = nfs_updatepage(filp, page, 0, pagelen);
519out_unlock: 519out_unlock:
520 if (!ret)
521 return VM_FAULT_LOCKED;
520 unlock_page(page); 522 unlock_page(page);
521 if (ret) 523 return VM_FAULT_SIGBUS;
522 ret = VM_FAULT_SIGBUS;
523 return ret;
524} 524}
525 525
526static struct vm_operations_struct nfs_file_vm_ops = { 526static struct vm_operations_struct nfs_file_vm_ops = {
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index e6a1932c7110..35869a4921f1 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -713,7 +713,8 @@ nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p,
713 if (args->npages != 0) 713 if (args->npages != 0)
714 xdr_encode_pages(buf, args->pages, 0, args->len); 714 xdr_encode_pages(buf, args->pages, 0, args->len);
715 else 715 else
716 req->rq_slen += args->len; 716 req->rq_slen = xdr_adjust_iovec(req->rq_svec,
717 p + XDR_QUADLEN(args->len));
717 718
718 err = nfsacl_encode(buf, base, args->inode, 719 err = nfsacl_encode(buf, base, args->inode,
719 (args->mask & NFS_ACL) ? 720 (args->mask & NFS_ACL) ?
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 3444c0052a87..5275097a7565 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -229,21 +229,23 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
229 goto out; 229 goto out;
230 status = vfs_readdir(filp, nfsd4_build_namelist, &names); 230 status = vfs_readdir(filp, nfsd4_build_namelist, &names);
231 fput(filp); 231 fput(filp);
232 mutex_lock(&dir->d_inode->i_mutex);
232 while (!list_empty(&names)) { 233 while (!list_empty(&names)) {
233 entry = list_entry(names.next, struct name_list, list); 234 entry = list_entry(names.next, struct name_list, list);
234 235
235 dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); 236 dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
236 if (IS_ERR(dentry)) { 237 if (IS_ERR(dentry)) {
237 status = PTR_ERR(dentry); 238 status = PTR_ERR(dentry);
238 goto out; 239 break;
239 } 240 }
240 status = f(dir, dentry); 241 status = f(dir, dentry);
241 dput(dentry); 242 dput(dentry);
242 if (status) 243 if (status)
243 goto out; 244 break;
244 list_del(&entry->list); 245 list_del(&entry->list);
245 kfree(entry); 246 kfree(entry);
246 } 247 }
248 mutex_unlock(&dir->d_inode->i_mutex);
247out: 249out:
248 while (!list_empty(&names)) { 250 while (!list_empty(&names)) {
249 entry = list_entry(names.next, struct name_list, list); 251 entry = list_entry(names.next, struct name_list, list);
@@ -255,36 +257,6 @@ out:
255} 257}
256 258
257static int 259static int
258nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry)
259{
260 int status;
261
262 if (!S_ISREG(dir->d_inode->i_mode)) {
263 printk("nfsd4: non-file found in client recovery directory\n");
264 return -EINVAL;
265 }
266 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
267 status = vfs_unlink(dir->d_inode, dentry);
268 mutex_unlock(&dir->d_inode->i_mutex);
269 return status;
270}
271
272static int
273nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
274{
275 int status;
276
277 /* For now this directory should already be empty, but we empty it of
278 * any regular files anyway, just in case the directory was created by
279 * a kernel from the future.... */
280 nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
281 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
282 status = vfs_rmdir(dir->d_inode, dentry);
283 mutex_unlock(&dir->d_inode->i_mutex);
284 return status;
285}
286
287static int
288nfsd4_unlink_clid_dir(char *name, int namlen) 260nfsd4_unlink_clid_dir(char *name, int namlen)
289{ 261{
290 struct dentry *dentry; 262 struct dentry *dentry;
@@ -294,18 +266,18 @@ nfsd4_unlink_clid_dir(char *name, int namlen)
294 266
295 mutex_lock(&rec_dir.dentry->d_inode->i_mutex); 267 mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
296 dentry = lookup_one_len(name, rec_dir.dentry, namlen); 268 dentry = lookup_one_len(name, rec_dir.dentry, namlen);
297 mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
298 if (IS_ERR(dentry)) { 269 if (IS_ERR(dentry)) {
299 status = PTR_ERR(dentry); 270 status = PTR_ERR(dentry);
300 return status; 271 goto out_unlock;
301 } 272 }
302 status = -ENOENT; 273 status = -ENOENT;
303 if (!dentry->d_inode) 274 if (!dentry->d_inode)
304 goto out; 275 goto out;
305 276 status = vfs_rmdir(rec_dir.dentry->d_inode, dentry);
306 status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry);
307out: 277out:
308 dput(dentry); 278 dput(dentry);
279out_unlock:
280 mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
309 return status; 281 return status;
310} 282}
311 283
@@ -348,7 +320,7 @@ purge_old(struct dentry *parent, struct dentry *child)
348 if (nfs4_has_reclaimed_state(child->d_name.name, false)) 320 if (nfs4_has_reclaimed_state(child->d_name.name, false))
349 return 0; 321 return 0;
350 322
351 status = nfsd4_clear_clid_dir(parent, child); 323 status = vfs_rmdir(parent->d_inode, child);
352 if (status) 324 if (status)
353 printk("failed to remove client recovery directory %s\n", 325 printk("failed to remove client recovery directory %s\n",
354 child->d_name.name); 326 child->d_name.name);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ab93fcfef254..6c68ffd6b4bb 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -116,10 +116,15 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
116 } 116 }
117 if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { 117 if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
118 /* successfully crossed mount point */ 118 /* successfully crossed mount point */
119 exp_put(exp); 119 /*
120 *expp = exp2; 120 * This is subtle: dentry is *not* under mnt at this point.
121 * The only reason we are safe is that original mnt is pinned
122 * down by exp, so we should dput before putting exp.
123 */
121 dput(dentry); 124 dput(dentry);
122 *dpp = mounts; 125 *dpp = mounts;
126 exp_put(exp);
127 *expp = exp2;
123 } else { 128 } else {
124 exp_put(exp2); 129 exp_put(exp2);
125 dput(mounts); 130 dput(mounts);
@@ -1885,8 +1890,8 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen,
1885 return 0; 1890 return 0;
1886} 1891}
1887 1892
1888static int nfsd_buffered_readdir(struct file *file, filldir_t func, 1893static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
1889 struct readdir_cd *cdp, loff_t *offsetp) 1894 struct readdir_cd *cdp, loff_t *offsetp)
1890{ 1895{
1891 struct readdir_data buf; 1896 struct readdir_data buf;
1892 struct buffered_dirent *de; 1897 struct buffered_dirent *de;
@@ -1896,11 +1901,12 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func,
1896 1901
1897 buf.dirent = (void *)__get_free_page(GFP_KERNEL); 1902 buf.dirent = (void *)__get_free_page(GFP_KERNEL);
1898 if (!buf.dirent) 1903 if (!buf.dirent)
1899 return -ENOMEM; 1904 return nfserrno(-ENOMEM);
1900 1905
1901 offset = *offsetp; 1906 offset = *offsetp;
1902 1907
1903 while (1) { 1908 while (1) {
1909 struct inode *dir_inode = file->f_path.dentry->d_inode;
1904 unsigned int reclen; 1910 unsigned int reclen;
1905 1911
1906 cdp->err = nfserr_eof; /* will be cleared on successful read */ 1912 cdp->err = nfserr_eof; /* will be cleared on successful read */
@@ -1919,26 +1925,38 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func,
1919 if (!size) 1925 if (!size)
1920 break; 1926 break;
1921 1927
1928 /*
1929 * Various filldir functions may end up calling back into
1930 * lookup_one_len() and the file system's ->lookup() method.
1931 * These expect i_mutex to be held, as it would within readdir.
1932 */
1933 host_err = mutex_lock_killable(&dir_inode->i_mutex);
1934 if (host_err)
1935 break;
1936
1922 de = (struct buffered_dirent *)buf.dirent; 1937 de = (struct buffered_dirent *)buf.dirent;
1923 while (size > 0) { 1938 while (size > 0) {
1924 offset = de->offset; 1939 offset = de->offset;
1925 1940
1926 if (func(cdp, de->name, de->namlen, de->offset, 1941 if (func(cdp, de->name, de->namlen, de->offset,
1927 de->ino, de->d_type)) 1942 de->ino, de->d_type))
1928 goto done; 1943 break;
1929 1944
1930 if (cdp->err != nfs_ok) 1945 if (cdp->err != nfs_ok)
1931 goto done; 1946 break;
1932 1947
1933 reclen = ALIGN(sizeof(*de) + de->namlen, 1948 reclen = ALIGN(sizeof(*de) + de->namlen,
1934 sizeof(u64)); 1949 sizeof(u64));
1935 size -= reclen; 1950 size -= reclen;
1936 de = (struct buffered_dirent *)((char *)de + reclen); 1951 de = (struct buffered_dirent *)((char *)de + reclen);
1937 } 1952 }
1953 mutex_unlock(&dir_inode->i_mutex);
1954 if (size > 0) /* We bailed out early */
1955 break;
1956
1938 offset = vfs_llseek(file, 0, SEEK_CUR); 1957 offset = vfs_llseek(file, 0, SEEK_CUR);
1939 } 1958 }
1940 1959
1941 done:
1942 free_page((unsigned long)(buf.dirent)); 1960 free_page((unsigned long)(buf.dirent));
1943 1961
1944 if (host_err) 1962 if (host_err)
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 24638e059bf3..064279e33bbb 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -688,6 +688,8 @@ static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_gc = {
688 .bpop_translate = NULL, 688 .bpop_translate = NULL,
689}; 689};
690 690
691static struct lock_class_key nilfs_bmap_dat_lock_key;
692
691/** 693/**
692 * nilfs_bmap_read - read a bmap from an inode 694 * nilfs_bmap_read - read a bmap from an inode
693 * @bmap: bmap 695 * @bmap: bmap
@@ -715,6 +717,7 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
715 bmap->b_pops = &nilfs_bmap_ptr_ops_p; 717 bmap->b_pops = &nilfs_bmap_ptr_ops_p;
716 bmap->b_last_allocated_key = 0; /* XXX: use macro */ 718 bmap->b_last_allocated_key = 0; /* XXX: use macro */
717 bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; 719 bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT;
720 lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
718 break; 721 break;
719 case NILFS_CPFILE_INO: 722 case NILFS_CPFILE_INO:
720 case NILFS_SUFILE_INO: 723 case NILFS_SUFILE_INO:
@@ -772,6 +775,7 @@ void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap)
772{ 775{
773 memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union)); 776 memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union));
774 init_rwsem(&gcbmap->b_sem); 777 init_rwsem(&gcbmap->b_sem);
778 lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
775 gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode; 779 gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode;
776} 780}
777 781
@@ -779,5 +783,6 @@ void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap)
779{ 783{
780 memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union)); 784 memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union));
781 init_rwsem(&bmap->b_sem); 785 init_rwsem(&bmap->b_sem);
786 lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
782 bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; 787 bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode;
783} 788}
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 7558c977db02..3d0c18a16db1 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -35,11 +35,6 @@
35#include "bmap_union.h" 35#include "bmap_union.h"
36 36
37/* 37/*
38 * NILFS filesystem version
39 */
40#define NILFS_VERSION "2.0.5"
41
42/*
43 * nilfs inode data in memory 38 * nilfs inode data in memory
44 */ 39 */
45struct nilfs_inode_info { 40struct nilfs_inode_info {
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 6ade0963fc1d..4fc081e47d70 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -413,7 +413,6 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
413 struct nilfs_segment_entry *ent, *n; 413 struct nilfs_segment_entry *ent, *n;
414 struct inode *sufile = nilfs->ns_sufile; 414 struct inode *sufile = nilfs->ns_sufile;
415 __u64 segnum[4]; 415 __u64 segnum[4];
416 time_t mtime;
417 int err; 416 int err;
418 int i; 417 int i;
419 418
@@ -442,24 +441,13 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
442 * Collecting segments written after the latest super root. 441 * Collecting segments written after the latest super root.
443 * These are marked dirty to avoid being reallocated in the next write. 442 * These are marked dirty to avoid being reallocated in the next write.
444 */ 443 */
445 mtime = get_seconds();
446 list_for_each_entry_safe(ent, n, head, list) { 444 list_for_each_entry_safe(ent, n, head, list) {
447 if (ent->segnum == segnum[0]) { 445 if (ent->segnum != segnum[0]) {
448 list_del(&ent->list); 446 err = nilfs_sufile_scrap(sufile, ent->segnum);
449 nilfs_free_segment_entry(ent); 447 if (unlikely(err))
450 continue; 448 goto failed;
451 }
452 err = nilfs_open_segment_entry(ent, sufile);
453 if (unlikely(err))
454 goto failed;
455 if (!nilfs_segment_usage_dirty(ent->raw_su)) {
456 /* make the segment garbage */
457 ent->raw_su->su_nblocks = cpu_to_le32(0);
458 ent->raw_su->su_lastmod = cpu_to_le32(mtime);
459 nilfs_segment_usage_set_dirty(ent->raw_su);
460 } 449 }
461 list_del(&ent->list); 450 list_del(&ent->list);
462 nilfs_close_segment_entry(ent, sufile);
463 nilfs_free_segment_entry(ent); 451 nilfs_free_segment_entry(ent);
464 } 452 }
465 453
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index c774cf397e2f..98e68677f045 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -93,6 +93,52 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum,
93 create, NULL, bhp); 93 create, NULL, bhp);
94} 94}
95 95
96static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
97 u64 ncleanadd, u64 ndirtyadd)
98{
99 struct nilfs_sufile_header *header;
100 void *kaddr;
101
102 kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
103 header = kaddr + bh_offset(header_bh);
104 le64_add_cpu(&header->sh_ncleansegs, ncleanadd);
105 le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
106 kunmap_atomic(kaddr, KM_USER0);
107
108 nilfs_mdt_mark_buffer_dirty(header_bh);
109}
110
111int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
112 void (*dofunc)(struct inode *, __u64,
113 struct buffer_head *,
114 struct buffer_head *))
115{
116 struct buffer_head *header_bh, *bh;
117 int ret;
118
119 if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) {
120 printk(KERN_WARNING "%s: invalid segment number: %llu\n",
121 __func__, (unsigned long long)segnum);
122 return -EINVAL;
123 }
124 down_write(&NILFS_MDT(sufile)->mi_sem);
125
126 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
127 if (ret < 0)
128 goto out_sem;
129
130 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, create, &bh);
131 if (!ret) {
132 dofunc(sufile, segnum, header_bh, bh);
133 brelse(bh);
134 }
135 brelse(header_bh);
136
137 out_sem:
138 up_write(&NILFS_MDT(sufile)->mi_sem);
139 return ret;
140}
141
96/** 142/**
97 * nilfs_sufile_alloc - allocate a segment 143 * nilfs_sufile_alloc - allocate a segment
98 * @sufile: inode of segment usage file 144 * @sufile: inode of segment usage file
@@ -113,7 +159,6 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum,
113int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) 159int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
114{ 160{
115 struct buffer_head *header_bh, *su_bh; 161 struct buffer_head *header_bh, *su_bh;
116 struct the_nilfs *nilfs;
117 struct nilfs_sufile_header *header; 162 struct nilfs_sufile_header *header;
118 struct nilfs_segment_usage *su; 163 struct nilfs_segment_usage *su;
119 size_t susz = NILFS_MDT(sufile)->mi_entry_size; 164 size_t susz = NILFS_MDT(sufile)->mi_entry_size;
@@ -124,8 +169,6 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
124 169
125 down_write(&NILFS_MDT(sufile)->mi_sem); 170 down_write(&NILFS_MDT(sufile)->mi_sem);
126 171
127 nilfs = NILFS_MDT(sufile)->mi_nilfs;
128
129 ret = nilfs_sufile_get_header_block(sufile, &header_bh); 172 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
130 if (ret < 0) 173 if (ret < 0)
131 goto out_sem; 174 goto out_sem;
@@ -192,165 +235,84 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
192 return ret; 235 return ret;
193} 236}
194 237
195/** 238void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
196 * nilfs_sufile_cancel_free - 239 struct buffer_head *header_bh,
197 * @sufile: inode of segment usage file 240 struct buffer_head *su_bh)
198 * @segnum: segment number
199 *
200 * Description:
201 *
202 * Return Value: On success, 0 is returned. On error, one of the following
203 * negative error codes is returned.
204 *
205 * %-EIO - I/O error.
206 *
207 * %-ENOMEM - Insufficient amount of memory available.
208 */
209int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum)
210{ 241{
211 struct buffer_head *header_bh, *su_bh;
212 struct the_nilfs *nilfs;
213 struct nilfs_sufile_header *header;
214 struct nilfs_segment_usage *su; 242 struct nilfs_segment_usage *su;
215 void *kaddr; 243 void *kaddr;
216 int ret;
217
218 down_write(&NILFS_MDT(sufile)->mi_sem);
219
220 nilfs = NILFS_MDT(sufile)->mi_nilfs;
221
222 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
223 if (ret < 0)
224 goto out_sem;
225
226 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh);
227 if (ret < 0)
228 goto out_header;
229 244
230 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 245 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
231 su = nilfs_sufile_block_get_segment_usage( 246 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
232 sufile, segnum, su_bh, kaddr);
233 if (unlikely(!nilfs_segment_usage_clean(su))) { 247 if (unlikely(!nilfs_segment_usage_clean(su))) {
234 printk(KERN_WARNING "%s: segment %llu must be clean\n", 248 printk(KERN_WARNING "%s: segment %llu must be clean\n",
235 __func__, (unsigned long long)segnum); 249 __func__, (unsigned long long)segnum);
236 kunmap_atomic(kaddr, KM_USER0); 250 kunmap_atomic(kaddr, KM_USER0);
237 goto out_su_bh; 251 return;
238 } 252 }
239 nilfs_segment_usage_set_dirty(su); 253 nilfs_segment_usage_set_dirty(su);
240 kunmap_atomic(kaddr, KM_USER0); 254 kunmap_atomic(kaddr, KM_USER0);
241 255
242 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 256 nilfs_sufile_mod_counter(header_bh, -1, 1);
243 header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr);
244 le64_add_cpu(&header->sh_ncleansegs, -1);
245 le64_add_cpu(&header->sh_ndirtysegs, 1);
246 kunmap_atomic(kaddr, KM_USER0);
247
248 nilfs_mdt_mark_buffer_dirty(header_bh);
249 nilfs_mdt_mark_buffer_dirty(su_bh); 257 nilfs_mdt_mark_buffer_dirty(su_bh);
250 nilfs_mdt_mark_dirty(sufile); 258 nilfs_mdt_mark_dirty(sufile);
251
252 out_su_bh:
253 brelse(su_bh);
254 out_header:
255 brelse(header_bh);
256 out_sem:
257 up_write(&NILFS_MDT(sufile)->mi_sem);
258 return ret;
259} 259}
260 260
261/** 261void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
262 * nilfs_sufile_freev - free segments 262 struct buffer_head *header_bh,
263 * @sufile: inode of segment usage file 263 struct buffer_head *su_bh)
264 * @segnum: array of segment numbers
265 * @nsegs: number of segments
266 *
267 * Description: nilfs_sufile_freev() frees segments specified by @segnum and
268 * @nsegs, which must have been returned by a previous call to
269 * nilfs_sufile_alloc().
270 *
271 * Return Value: On success, 0 is returned. On error, one of the following
272 * negative error codes is returned.
273 *
274 * %-EIO - I/O error.
275 *
276 * %-ENOMEM - Insufficient amount of memory available.
277 */
278#define NILFS_SUFILE_FREEV_PREALLOC 16
279int nilfs_sufile_freev(struct inode *sufile, __u64 *segnum, size_t nsegs)
280{ 264{
281 struct buffer_head *header_bh, **su_bh,
282 *su_bh_prealloc[NILFS_SUFILE_FREEV_PREALLOC];
283 struct the_nilfs *nilfs;
284 struct nilfs_sufile_header *header;
285 struct nilfs_segment_usage *su; 265 struct nilfs_segment_usage *su;
286 void *kaddr; 266 void *kaddr;
287 int ret, i; 267 int clean, dirty;
288 268
289 down_write(&NILFS_MDT(sufile)->mi_sem); 269 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
290 270 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
291 nilfs = NILFS_MDT(sufile)->mi_nilfs; 271 if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) &&
292 272 su->su_nblocks == cpu_to_le32(0)) {
293 /* prepare resources */
294 if (nsegs <= NILFS_SUFILE_FREEV_PREALLOC)
295 su_bh = su_bh_prealloc;
296 else {
297 su_bh = kmalloc(sizeof(*su_bh) * nsegs, GFP_NOFS);
298 if (su_bh == NULL) {
299 ret = -ENOMEM;
300 goto out_sem;
301 }
302 }
303
304 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
305 if (ret < 0)
306 goto out_su_bh;
307 for (i = 0; i < nsegs; i++) {
308 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum[i],
309 0, &su_bh[i]);
310 if (ret < 0)
311 goto out_bh;
312 }
313
314 /* free segments */
315 for (i = 0; i < nsegs; i++) {
316 kaddr = kmap_atomic(su_bh[i]->b_page, KM_USER0);
317 su = nilfs_sufile_block_get_segment_usage(
318 sufile, segnum[i], su_bh[i], kaddr);
319 WARN_ON(nilfs_segment_usage_error(su));
320 nilfs_segment_usage_set_clean(su);
321 kunmap_atomic(kaddr, KM_USER0); 273 kunmap_atomic(kaddr, KM_USER0);
322 nilfs_mdt_mark_buffer_dirty(su_bh[i]); 274 return;
323 } 275 }
324 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 276 clean = nilfs_segment_usage_clean(su);
325 header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); 277 dirty = nilfs_segment_usage_dirty(su);
326 le64_add_cpu(&header->sh_ncleansegs, nsegs); 278
327 le64_add_cpu(&header->sh_ndirtysegs, -(u64)nsegs); 279 /* make the segment garbage */
280 su->su_lastmod = cpu_to_le64(0);
281 su->su_nblocks = cpu_to_le32(0);
282 su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY);
328 kunmap_atomic(kaddr, KM_USER0); 283 kunmap_atomic(kaddr, KM_USER0);
329 nilfs_mdt_mark_buffer_dirty(header_bh); 284
285 nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
286 nilfs_mdt_mark_buffer_dirty(su_bh);
330 nilfs_mdt_mark_dirty(sufile); 287 nilfs_mdt_mark_dirty(sufile);
288}
331 289
332 out_bh: 290void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
333 for (i--; i >= 0; i--) 291 struct buffer_head *header_bh,
334 brelse(su_bh[i]); 292 struct buffer_head *su_bh)
335 brelse(header_bh); 293{
294 struct nilfs_segment_usage *su;
295 void *kaddr;
296 int sudirty;
336 297
337 out_su_bh: 298 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
338 if (su_bh != su_bh_prealloc) 299 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
339 kfree(su_bh); 300 if (nilfs_segment_usage_clean(su)) {
301 printk(KERN_WARNING "%s: segment %llu is already clean\n",
302 __func__, (unsigned long long)segnum);
303 kunmap_atomic(kaddr, KM_USER0);
304 return;
305 }
306 WARN_ON(nilfs_segment_usage_error(su));
307 WARN_ON(!nilfs_segment_usage_dirty(su));
340 308
341 out_sem: 309 sudirty = nilfs_segment_usage_dirty(su);
342 up_write(&NILFS_MDT(sufile)->mi_sem); 310 nilfs_segment_usage_set_clean(su);
343 return ret; 311 kunmap_atomic(kaddr, KM_USER0);
344} 312 nilfs_mdt_mark_buffer_dirty(su_bh);
345 313
346/** 314 nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
347 * nilfs_sufile_free - 315 nilfs_mdt_mark_dirty(sufile);
348 * @sufile:
349 * @segnum:
350 */
351int nilfs_sufile_free(struct inode *sufile, __u64 segnum)
352{
353 return nilfs_sufile_freev(sufile, &segnum, 1);
354} 316}
355 317
356/** 318/**
@@ -500,72 +462,28 @@ int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp)
500 return ret; 462 return ret;
501} 463}
502 464
503/** 465void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
504 * nilfs_sufile_set_error - mark a segment as erroneous 466 struct buffer_head *header_bh,
505 * @sufile: inode of segment usage file 467 struct buffer_head *su_bh)
506 * @segnum: segment number
507 *
508 * Description: nilfs_sufile_set_error() marks the segment specified by
509 * @segnum as erroneous. The error segment will never be used again.
510 *
511 * Return Value: On success, 0 is returned. On error, one of the following
512 * negative error codes is returned.
513 *
514 * %-EIO - I/O error.
515 *
516 * %-ENOMEM - Insufficient amount of memory available.
517 *
518 * %-EINVAL - Invalid segment usage number.
519 */
520int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum)
521{ 468{
522 struct buffer_head *header_bh, *su_bh;
523 struct nilfs_segment_usage *su; 469 struct nilfs_segment_usage *su;
524 struct nilfs_sufile_header *header;
525 void *kaddr; 470 void *kaddr;
526 int ret; 471 int suclean;
527
528 if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) {
529 printk(KERN_WARNING "%s: invalid segment number: %llu\n",
530 __func__, (unsigned long long)segnum);
531 return -EINVAL;
532 }
533 down_write(&NILFS_MDT(sufile)->mi_sem);
534
535 ret = nilfs_sufile_get_header_block(sufile, &header_bh);
536 if (ret < 0)
537 goto out_sem;
538 ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh);
539 if (ret < 0)
540 goto out_header;
541 472
542 kaddr = kmap_atomic(su_bh->b_page, KM_USER0); 473 kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
543 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); 474 su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
544 if (nilfs_segment_usage_error(su)) { 475 if (nilfs_segment_usage_error(su)) {
545 kunmap_atomic(kaddr, KM_USER0); 476 kunmap_atomic(kaddr, KM_USER0);
546 brelse(su_bh); 477 return;
547 goto out_header;
548 } 478 }
549 479 suclean = nilfs_segment_usage_clean(su);
550 nilfs_segment_usage_set_error(su); 480 nilfs_segment_usage_set_error(su);
551 kunmap_atomic(kaddr, KM_USER0); 481 kunmap_atomic(kaddr, KM_USER0);
552 brelse(su_bh);
553 482
554 kaddr = kmap_atomic(header_bh->b_page, KM_USER0); 483 if (suclean)
555 header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); 484 nilfs_sufile_mod_counter(header_bh, -1, 0);
556 le64_add_cpu(&header->sh_ndirtysegs, -1);
557 kunmap_atomic(kaddr, KM_USER0);
558 nilfs_mdt_mark_buffer_dirty(header_bh);
559 nilfs_mdt_mark_buffer_dirty(su_bh); 485 nilfs_mdt_mark_buffer_dirty(su_bh);
560 nilfs_mdt_mark_dirty(sufile); 486 nilfs_mdt_mark_dirty(sufile);
561 brelse(su_bh);
562
563 out_header:
564 brelse(header_bh);
565
566 out_sem:
567 up_write(&NILFS_MDT(sufile)->mi_sem);
568 return ret;
569} 487}
570 488
571/** 489/**
@@ -625,7 +543,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum,
625 si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks); 543 si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks);
626 si[i + j].sui_flags = le32_to_cpu(su->su_flags) & 544 si[i + j].sui_flags = le32_to_cpu(su->su_flags) &
627 ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); 545 ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE);
628 if (nilfs_segment_is_active(nilfs, segnum + i + j)) 546 if (nilfs_segment_is_active(nilfs, segnum + j))
629 si[i + j].sui_flags |= 547 si[i + j].sui_flags |=
630 (1UL << NILFS_SEGMENT_USAGE_ACTIVE); 548 (1UL << NILFS_SEGMENT_USAGE_ACTIVE);
631 } 549 }
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index d595f33a768d..a2e2efd4ade1 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -36,9 +36,6 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
36} 36}
37 37
38int nilfs_sufile_alloc(struct inode *, __u64 *); 38int nilfs_sufile_alloc(struct inode *, __u64 *);
39int nilfs_sufile_cancel_free(struct inode *, __u64);
40int nilfs_sufile_freev(struct inode *, __u64 *, size_t);
41int nilfs_sufile_free(struct inode *, __u64);
42int nilfs_sufile_get_segment_usage(struct inode *, __u64, 39int nilfs_sufile_get_segment_usage(struct inode *, __u64,
43 struct nilfs_segment_usage **, 40 struct nilfs_segment_usage **,
44 struct buffer_head **); 41 struct buffer_head **);
@@ -46,9 +43,83 @@ void nilfs_sufile_put_segment_usage(struct inode *, __u64,
46 struct buffer_head *); 43 struct buffer_head *);
47int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); 44int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *);
48int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *); 45int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *);
49int nilfs_sufile_set_error(struct inode *, __u64);
50ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *, 46ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *,
51 size_t); 47 size_t);
52 48
49int nilfs_sufile_update(struct inode *, __u64, int,
50 void (*dofunc)(struct inode *, __u64,
51 struct buffer_head *,
52 struct buffer_head *));
53void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
54 struct buffer_head *);
55void nilfs_sufile_do_scrap(struct inode *, __u64, struct buffer_head *,
56 struct buffer_head *);
57void nilfs_sufile_do_free(struct inode *, __u64, struct buffer_head *,
58 struct buffer_head *);
59void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
60 struct buffer_head *);
61
62/**
63 * nilfs_sufile_cancel_free -
64 * @sufile: inode of segment usage file
65 * @segnum: segment number
66 *
67 * Description:
68 *
69 * Return Value: On success, 0 is returned. On error, one of the following
70 * negative error codes is returned.
71 *
72 * %-EIO - I/O error.
73 *
74 * %-ENOMEM - Insufficient amount of memory available.
75 */
76static inline int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum)
77{
78 return nilfs_sufile_update(sufile, segnum, 0,
79 nilfs_sufile_do_cancel_free);
80}
81
82/**
83 * nilfs_sufile_scrap - make a segment garbage
84 * @sufile: inode of segment usage file
85 * @segnum: segment number to be freed
86 */
87static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum)
88{
89 return nilfs_sufile_update(sufile, segnum, 1, nilfs_sufile_do_scrap);
90}
91
92/**
93 * nilfs_sufile_free - free segment
94 * @sufile: inode of segment usage file
95 * @segnum: segment number to be freed
96 */
97static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum)
98{
99 return nilfs_sufile_update(sufile, segnum, 0, nilfs_sufile_do_free);
100}
101
102/**
103 * nilfs_sufile_set_error - mark a segment as erroneous
104 * @sufile: inode of segment usage file
105 * @segnum: segment number
106 *
107 * Description: nilfs_sufile_set_error() marks the segment specified by
108 * @segnum as erroneous. The error segment will never be used again.
109 *
110 * Return Value: On success, 0 is returned. On error, one of the following
111 * negative error codes is returned.
112 *
113 * %-EIO - I/O error.
114 *
115 * %-ENOMEM - Insufficient amount of memory available.
116 *
117 * %-EINVAL - Invalid segment usage number.
118 */
119static inline int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum)
120{
121 return nilfs_sufile_update(sufile, segnum, 0,
122 nilfs_sufile_do_set_error);
123}
53 124
54#endif /* _NILFS_SUFILE_H */ 125#endif /* _NILFS_SUFILE_H */
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index e117e1ea9bff..6989b03e97ab 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -63,7 +63,6 @@
63MODULE_AUTHOR("NTT Corp."); 63MODULE_AUTHOR("NTT Corp.");
64MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " 64MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem "
65 "(NILFS)"); 65 "(NILFS)");
66MODULE_VERSION(NILFS_VERSION);
67MODULE_LICENSE("GPL"); 66MODULE_LICENSE("GPL");
68 67
69static int nilfs_remount(struct super_block *sb, int *flags, char *data); 68static int nilfs_remount(struct super_block *sb, int *flags, char *data);
@@ -476,11 +475,12 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
476{ 475{
477 struct super_block *sb = dentry->d_sb; 476 struct super_block *sb = dentry->d_sb;
478 struct nilfs_sb_info *sbi = NILFS_SB(sb); 477 struct nilfs_sb_info *sbi = NILFS_SB(sb);
478 struct the_nilfs *nilfs = sbi->s_nilfs;
479 u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
479 unsigned long long blocks; 480 unsigned long long blocks;
480 unsigned long overhead; 481 unsigned long overhead;
481 unsigned long nrsvblocks; 482 unsigned long nrsvblocks;
482 sector_t nfreeblocks; 483 sector_t nfreeblocks;
483 struct the_nilfs *nilfs = sbi->s_nilfs;
484 int err; 484 int err;
485 485
486 /* 486 /*
@@ -514,6 +514,9 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
514 buf->f_files = atomic_read(&sbi->s_inodes_count); 514 buf->f_files = atomic_read(&sbi->s_inodes_count);
515 buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ 515 buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */
516 buf->f_namelen = NILFS_NAME_LEN; 516 buf->f_namelen = NILFS_NAME_LEN;
517 buf->f_fsid.val[0] = (u32)id;
518 buf->f_fsid.val[1] = (u32)(id >> 32);
519
517 return 0; 520 return 0;
518} 521}
519 522
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 33400cf0bbe2..7f65b3be4aa9 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -115,6 +115,7 @@ void put_nilfs(struct the_nilfs *nilfs)
115static int nilfs_load_super_root(struct the_nilfs *nilfs, 115static int nilfs_load_super_root(struct the_nilfs *nilfs,
116 struct nilfs_sb_info *sbi, sector_t sr_block) 116 struct nilfs_sb_info *sbi, sector_t sr_block)
117{ 117{
118 static struct lock_class_key dat_lock_key;
118 struct buffer_head *bh_sr; 119 struct buffer_head *bh_sr;
119 struct nilfs_super_root *raw_sr; 120 struct nilfs_super_root *raw_sr;
120 struct nilfs_super_block **sbp = nilfs->ns_sbp; 121 struct nilfs_super_block **sbp = nilfs->ns_sbp;
@@ -163,6 +164,9 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs,
163 if (unlikely(err)) 164 if (unlikely(err))
164 goto failed_sufile; 165 goto failed_sufile;
165 166
167 lockdep_set_class(&NILFS_MDT(nilfs->ns_dat)->mi_sem, &dat_lock_key);
168 lockdep_set_class(&NILFS_MDT(nilfs->ns_gc_dat)->mi_sem, &dat_lock_key);
169
166 nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); 170 nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat);
167 nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size, 171 nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size,
168 sizeof(struct nilfs_cpfile_header)); 172 sizeof(struct nilfs_cpfile_header));
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index bed766e435b5..1634319e2404 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -220,7 +220,7 @@ static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
220 rem = 0; 220 rem = 0;
221 } 221 }
222 222
223 kevent->name = kmalloc(len + rem, GFP_KERNEL); 223 kevent->name = kmalloc(len + rem, GFP_NOFS);
224 if (unlikely(!kevent->name)) { 224 if (unlikely(!kevent->name)) {
225 kmem_cache_free(event_cachep, kevent); 225 kmem_cache_free(event_cachep, kevent);
226 return NULL; 226 return NULL;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 7d604480557a..b574431a031d 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -290,6 +290,21 @@ out_attach:
290 else 290 else
291 mlog_errno(ret); 291 mlog_errno(ret);
292 292
293 /*
294 * In case of error, manually free the allocation and do the iput().
295 * We need to do this because error here means no d_instantiate(),
296 * which means iput() will not be called during dput(dentry).
297 */
298 if (ret < 0 && !alias) {
299 ocfs2_lock_res_free(&dl->dl_lockres);
300 BUG_ON(dl->dl_count != 1);
301 spin_lock(&dentry_attach_lock);
302 dentry->d_fsdata = NULL;
303 spin_unlock(&dentry_attach_lock);
304 kfree(dl);
305 iput(inode);
306 }
307
293 dput(alias); 308 dput(alias);
294 309
295 return ret; 310 return ret;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index e71160cda110..c5752305627c 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2697,7 +2697,7 @@ static int ocfs2_dx_dir_index_block(struct inode *dir,
2697 u32 *num_dx_entries, 2697 u32 *num_dx_entries,
2698 struct buffer_head *dirent_bh) 2698 struct buffer_head *dirent_bh)
2699{ 2699{
2700 int ret, namelen, i; 2700 int ret = 0, namelen, i;
2701 char *de_buf, *limit; 2701 char *de_buf, *limit;
2702 struct ocfs2_dir_entry *de; 2702 struct ocfs2_dir_entry *de;
2703 struct buffer_head *dx_leaf_bh; 2703 struct buffer_head *dx_leaf_bh;
@@ -2934,7 +2934,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
2934 */ 2934 */
2935 BUG_ON(alloc > 2); 2935 BUG_ON(alloc > 2);
2936 2936
2937 ret = ocfs2_reserve_clusters(osb, alloc, &data_ac); 2937 ret = ocfs2_reserve_clusters(osb, alloc + dx_alloc, &data_ac);
2938 if (ret) { 2938 if (ret) {
2939 mlog_errno(ret); 2939 mlog_errno(ret);
2940 goto out; 2940 goto out;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index de3da8eb558c..15713cbb865c 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -100,7 +100,8 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
100 100
101 /* If the inode allocator bit is clear, this inode must be stale */ 101 /* If the inode allocator bit is clear, this inode must be stale */
102 if (!set) { 102 if (!set) {
103 mlog(0, "inode %llu suballoc bit is clear\n", blkno); 103 mlog(0, "inode %llu suballoc bit is clear\n",
104 (unsigned long long)blkno);
104 status = -ESTALE; 105 status = -ESTALE;
105 goto unlock_nfs_sync; 106 goto unlock_nfs_sync;
106 } 107 }
@@ -114,7 +115,7 @@ check_err:
114 if (status < 0) { 115 if (status < 0) {
115 if (status == -ESTALE) { 116 if (status == -ESTALE) {
116 mlog(0, "stale inode ino: %llu generation: %u\n", 117 mlog(0, "stale inode ino: %llu generation: %u\n",
117 blkno, handle->ih_generation); 118 (unsigned long long)blkno, handle->ih_generation);
118 } 119 }
119 result = ERR_PTR(status); 120 result = ERR_PTR(status);
120 goto bail; 121 goto bail;
@@ -129,8 +130,8 @@ check_err:
129check_gen: 130check_gen:
130 if (handle->ih_generation != inode->i_generation) { 131 if (handle->ih_generation != inode->i_generation) {
131 iput(inode); 132 iput(inode);
132 mlog(0, "stale inode ino: %llu generation: %u\n", blkno, 133 mlog(0, "stale inode ino: %llu generation: %u\n",
133 handle->ih_generation); 134 (unsigned long long)blkno, handle->ih_generation);
134 result = ERR_PTR(-ESTALE); 135 result = ERR_PTR(-ESTALE);
135 goto bail; 136 goto bail;
136 } 137 }
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8672b9536039..c2a87c885b73 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1912,6 +1912,22 @@ out_sems:
1912 return written ? written : ret; 1912 return written ? written : ret;
1913} 1913}
1914 1914
1915static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
1916 struct file *out,
1917 struct splice_desc *sd)
1918{
1919 int ret;
1920
1921 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
1922 sd->total_len, 0, NULL);
1923 if (ret < 0) {
1924 mlog_errno(ret);
1925 return ret;
1926 }
1927
1928 return splice_from_pipe_feed(pipe, sd, pipe_to_file);
1929}
1930
1915static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, 1931static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
1916 struct file *out, 1932 struct file *out,
1917 loff_t *ppos, 1933 loff_t *ppos,
@@ -1919,38 +1935,76 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
1919 unsigned int flags) 1935 unsigned int flags)
1920{ 1936{
1921 int ret; 1937 int ret;
1922 struct inode *inode = out->f_path.dentry->d_inode; 1938 struct address_space *mapping = out->f_mapping;
1939 struct inode *inode = mapping->host;
1940 struct splice_desc sd = {
1941 .total_len = len,
1942 .flags = flags,
1943 .pos = *ppos,
1944 .u.file = out,
1945 };
1923 1946
1924 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, 1947 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,
1925 (unsigned int)len, 1948 (unsigned int)len,
1926 out->f_path.dentry->d_name.len, 1949 out->f_path.dentry->d_name.len,
1927 out->f_path.dentry->d_name.name); 1950 out->f_path.dentry->d_name.name);
1928 1951
1929 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); 1952 if (pipe->inode)
1953 mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
1930 1954
1931 ret = ocfs2_rw_lock(inode, 1); 1955 splice_from_pipe_begin(&sd);
1932 if (ret < 0) { 1956 do {
1933 mlog_errno(ret); 1957 ret = splice_from_pipe_next(pipe, &sd);
1934 goto out; 1958 if (ret <= 0)
1935 } 1959 break;
1936 1960
1937 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0, 1961 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
1938 NULL); 1962 ret = ocfs2_rw_lock(inode, 1);
1939 if (ret < 0) { 1963 if (ret < 0)
1940 mlog_errno(ret); 1964 mlog_errno(ret);
1941 goto out_unlock; 1965 else {
1942 } 1966 ret = ocfs2_splice_to_file(pipe, out, &sd);
1967 ocfs2_rw_unlock(inode, 1);
1968 }
1969 mutex_unlock(&inode->i_mutex);
1970 } while (ret > 0);
1971 splice_from_pipe_end(pipe, &sd);
1943 1972
1944 if (pipe->inode) 1973 if (pipe->inode)
1945 mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
1946 ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
1947 if (pipe->inode)
1948 mutex_unlock(&pipe->inode->i_mutex); 1974 mutex_unlock(&pipe->inode->i_mutex);
1949 1975
1950out_unlock: 1976 if (sd.num_spliced)
1951 ocfs2_rw_unlock(inode, 1); 1977 ret = sd.num_spliced;
1952out: 1978
1953 mutex_unlock(&inode->i_mutex); 1979 if (ret > 0) {
1980 unsigned long nr_pages;
1981
1982 *ppos += ret;
1983 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1984
1985 /*
1986 * If file or inode is SYNC and we actually wrote some data,
1987 * sync it.
1988 */
1989 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
1990 int err;
1991
1992 mutex_lock(&inode->i_mutex);
1993 err = ocfs2_rw_lock(inode, 1);
1994 if (err < 0) {
1995 mlog_errno(err);
1996 } else {
1997 err = generic_osync_inode(inode, mapping,
1998 OSYNC_METADATA|OSYNC_DATA);
1999 ocfs2_rw_unlock(inode, 1);
2000 }
2001 mutex_unlock(&inode->i_mutex);
2002
2003 if (err)
2004 ret = err;
2005 }
2006 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
2007 }
1954 2008
1955 mlog_exit(ret); 2009 mlog_exit(ret);
1956 return ret; 2010 return ret;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 619dd7f6c053..eb7b76331eb7 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -437,8 +437,9 @@ static inline int ocfs2_unlink_credits(struct super_block *sb)
437} 437}
438 438
439/* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry + 439/* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry +
440 * inode alloc group descriptor + orphan dir index leaf */ 440 * inode alloc group descriptor + orphan dir index root +
441#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3) 441 * orphan dir index leaf */
442#define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 4)
442 443
443/* dinode update, old dir dinode update, new dir dinode update, old 444/* dinode update, old dir dinode update, new dir dinode update, old
444 * dir dir entry, new dir dir entry, dir entry update for renaming 445 * dir dir entry, new dir dir entry, dir entry update for renaming
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 2220f93f668b..33464c6b60a2 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1025,10 +1025,8 @@ static int ocfs2_rename(struct inode *old_dir,
1025 struct inode *orphan_dir = NULL; 1025 struct inode *orphan_dir = NULL;
1026 struct ocfs2_dinode *newfe = NULL; 1026 struct ocfs2_dinode *newfe = NULL;
1027 char orphan_name[OCFS2_ORPHAN_NAMELEN + 1]; 1027 char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
1028 struct buffer_head *orphan_entry_bh = NULL;
1029 struct buffer_head *newfe_bh = NULL; 1028 struct buffer_head *newfe_bh = NULL;
1030 struct buffer_head *old_inode_bh = NULL; 1029 struct buffer_head *old_inode_bh = NULL;
1031 struct buffer_head *insert_entry_bh = NULL;
1032 struct ocfs2_super *osb = NULL; 1030 struct ocfs2_super *osb = NULL;
1033 u64 newfe_blkno, old_de_ino; 1031 u64 newfe_blkno, old_de_ino;
1034 handle_t *handle = NULL; 1032 handle_t *handle = NULL;
@@ -1455,8 +1453,6 @@ bail:
1455 brelse(old_inode_bh); 1453 brelse(old_inode_bh);
1456 brelse(old_dir_bh); 1454 brelse(old_dir_bh);
1457 brelse(new_dir_bh); 1455 brelse(new_dir_bh);
1458 brelse(orphan_entry_bh);
1459 brelse(insert_entry_bh);
1460 1456
1461 mlog_exit(status); 1457 mlog_exit(status);
1462 1458
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index b4ca5911caaf..8439f6b324b9 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -2197,26 +2197,29 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
2197 struct buffer_head *inode_bh = NULL; 2197 struct buffer_head *inode_bh = NULL;
2198 struct ocfs2_dinode *inode_fe; 2198 struct ocfs2_dinode *inode_fe;
2199 2199
2200 mlog_entry("blkno: %llu\n", blkno); 2200 mlog_entry("blkno: %llu\n", (unsigned long long)blkno);
2201 2201
2202 /* dirty read disk */ 2202 /* dirty read disk */
2203 status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh); 2203 status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh);
2204 if (status < 0) { 2204 if (status < 0) {
2205 mlog(ML_ERROR, "read block %llu failed %d\n", blkno, status); 2205 mlog(ML_ERROR, "read block %llu failed %d\n",
2206 (unsigned long long)blkno, status);
2206 goto bail; 2207 goto bail;
2207 } 2208 }
2208 2209
2209 inode_fe = (struct ocfs2_dinode *) inode_bh->b_data; 2210 inode_fe = (struct ocfs2_dinode *) inode_bh->b_data;
2210 if (!OCFS2_IS_VALID_DINODE(inode_fe)) { 2211 if (!OCFS2_IS_VALID_DINODE(inode_fe)) {
2211 mlog(ML_ERROR, "invalid inode %llu requested\n", blkno); 2212 mlog(ML_ERROR, "invalid inode %llu requested\n",
2213 (unsigned long long)blkno);
2212 status = -EINVAL; 2214 status = -EINVAL;
2213 goto bail; 2215 goto bail;
2214 } 2216 }
2215 2217
2216 if (le16_to_cpu(inode_fe->i_suballoc_slot) != OCFS2_INVALID_SLOT && 2218 if (le16_to_cpu(inode_fe->i_suballoc_slot) != (u16)OCFS2_INVALID_SLOT &&
2217 (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) { 2219 (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) {
2218 mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n", 2220 mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n",
2219 blkno, (u32)le16_to_cpu(inode_fe->i_suballoc_slot)); 2221 (unsigned long long)blkno,
2222 (u32)le16_to_cpu(inode_fe->i_suballoc_slot));
2220 status = -EINVAL; 2223 status = -EINVAL;
2221 goto bail; 2224 goto bail;
2222 } 2225 }
@@ -2251,7 +2254,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2251 u64 bg_blkno; 2254 u64 bg_blkno;
2252 int status; 2255 int status;
2253 2256
2254 mlog_entry("blkno: %llu bit: %u\n", blkno, (unsigned int)bit); 2257 mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno,
2258 (unsigned int)bit);
2255 2259
2256 alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data; 2260 alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data;
2257 if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) { 2261 if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) {
@@ -2266,7 +2270,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2266 status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno, 2270 status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno,
2267 &group_bh); 2271 &group_bh);
2268 if (status < 0) { 2272 if (status < 0) {
2269 mlog(ML_ERROR, "read group %llu failed %d\n", bg_blkno, status); 2273 mlog(ML_ERROR, "read group %llu failed %d\n",
2274 (unsigned long long)bg_blkno, status);
2270 goto bail; 2275 goto bail;
2271 } 2276 }
2272 2277
@@ -2300,7 +2305,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
2300 struct inode *inode_alloc_inode; 2305 struct inode *inode_alloc_inode;
2301 struct buffer_head *alloc_bh = NULL; 2306 struct buffer_head *alloc_bh = NULL;
2302 2307
2303 mlog_entry("blkno: %llu", blkno); 2308 mlog_entry("blkno: %llu", (unsigned long long)blkno);
2304 2309
2305 status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, 2310 status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
2306 &suballoc_bit); 2311 &suballoc_bit);
diff --git a/fs/pipe.c b/fs/pipe.c
index 4af7aa521813..13414ec45b8d 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -37,6 +37,42 @@
37 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 37 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
38 */ 38 */
39 39
40static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
41{
42 if (pipe->inode)
43 mutex_lock_nested(&pipe->inode->i_mutex, subclass);
44}
45
46void pipe_lock(struct pipe_inode_info *pipe)
47{
48 /*
49 * pipe_lock() nests non-pipe inode locks (for writing to a file)
50 */
51 pipe_lock_nested(pipe, I_MUTEX_PARENT);
52}
53EXPORT_SYMBOL(pipe_lock);
54
55void pipe_unlock(struct pipe_inode_info *pipe)
56{
57 if (pipe->inode)
58 mutex_unlock(&pipe->inode->i_mutex);
59}
60EXPORT_SYMBOL(pipe_unlock);
61
62void pipe_double_lock(struct pipe_inode_info *pipe1,
63 struct pipe_inode_info *pipe2)
64{
65 BUG_ON(pipe1 == pipe2);
66
67 if (pipe1 < pipe2) {
68 pipe_lock_nested(pipe1, I_MUTEX_PARENT);
69 pipe_lock_nested(pipe2, I_MUTEX_CHILD);
70 } else {
71 pipe_lock_nested(pipe2, I_MUTEX_CHILD);
72 pipe_lock_nested(pipe1, I_MUTEX_PARENT);
73 }
74}
75
40/* Drop the inode semaphore and wait for a pipe event, atomically */ 76/* Drop the inode semaphore and wait for a pipe event, atomically */
41void pipe_wait(struct pipe_inode_info *pipe) 77void pipe_wait(struct pipe_inode_info *pipe)
42{ 78{
@@ -47,12 +83,10 @@ void pipe_wait(struct pipe_inode_info *pipe)
47 * is considered a noninteractive wait: 83 * is considered a noninteractive wait:
48 */ 84 */
49 prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE); 85 prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
50 if (pipe->inode) 86 pipe_unlock(pipe);
51 mutex_unlock(&pipe->inode->i_mutex);
52 schedule(); 87 schedule();
53 finish_wait(&pipe->wait, &wait); 88 finish_wait(&pipe->wait, &wait);
54 if (pipe->inode) 89 pipe_lock(pipe);
55 mutex_lock(&pipe->inode->i_mutex);
56} 90}
57 91
58static int 92static int
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 7e4877d9dcb5..725a650bbbb8 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -80,6 +80,7 @@
80#include <linux/delayacct.h> 80#include <linux/delayacct.h>
81#include <linux/seq_file.h> 81#include <linux/seq_file.h>
82#include <linux/pid_namespace.h> 82#include <linux/pid_namespace.h>
83#include <linux/ptrace.h>
83#include <linux/tracehook.h> 84#include <linux/tracehook.h>
84 85
85#include <asm/pgtable.h> 86#include <asm/pgtable.h>
@@ -352,6 +353,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
352 char state; 353 char state;
353 pid_t ppid = 0, pgid = -1, sid = -1; 354 pid_t ppid = 0, pgid = -1, sid = -1;
354 int num_threads = 0; 355 int num_threads = 0;
356 int permitted;
355 struct mm_struct *mm; 357 struct mm_struct *mm;
356 unsigned long long start_time; 358 unsigned long long start_time;
357 unsigned long cmin_flt = 0, cmaj_flt = 0; 359 unsigned long cmin_flt = 0, cmaj_flt = 0;
@@ -364,11 +366,14 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
364 366
365 state = *get_task_state(task); 367 state = *get_task_state(task);
366 vsize = eip = esp = 0; 368 vsize = eip = esp = 0;
369 permitted = ptrace_may_access(task, PTRACE_MODE_READ);
367 mm = get_task_mm(task); 370 mm = get_task_mm(task);
368 if (mm) { 371 if (mm) {
369 vsize = task_vsize(mm); 372 vsize = task_vsize(mm);
370 eip = KSTK_EIP(task); 373 if (permitted) {
371 esp = KSTK_ESP(task); 374 eip = KSTK_EIP(task);
375 esp = KSTK_ESP(task);
376 }
372 } 377 }
373 378
374 get_task_comm(tcomm, task); 379 get_task_comm(tcomm, task);
@@ -424,7 +429,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
424 unlock_task_sighand(task, &flags); 429 unlock_task_sighand(task, &flags);
425 } 430 }
426 431
427 if (!whole || num_threads < 2) 432 if (permitted && (!whole || num_threads < 2))
428 wchan = get_wchan(task); 433 wchan = get_wchan(task);
429 if (!whole) { 434 if (!whole) {
430 min_flt = task->min_flt; 435 min_flt = task->min_flt;
@@ -476,7 +481,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
476 rsslim, 481 rsslim,
477 mm ? mm->start_code : 0, 482 mm ? mm->start_code : 0,
478 mm ? mm->end_code : 0, 483 mm ? mm->end_code : 0,
479 mm ? mm->start_stack : 0, 484 (permitted && mm) ? mm->start_stack : 0,
480 esp, 485 esp,
481 eip, 486 eip,
482 /* The signal information here is obsolete. 487 /* The signal information here is obsolete.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index f71559784bfb..fb45615943c2 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -322,7 +322,10 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
322 wchan = get_wchan(task); 322 wchan = get_wchan(task);
323 323
324 if (lookup_symbol_name(wchan, symname) < 0) 324 if (lookup_symbol_name(wchan, symname) < 0)
325 return sprintf(buffer, "%lu", wchan); 325 if (!ptrace_may_access(task, PTRACE_MODE_READ))
326 return 0;
327 else
328 return sprintf(buffer, "%lu", wchan);
326 else 329 else
327 return sprintf(buffer, "%s", symname); 330 return sprintf(buffer, "%s", symname);
328} 331}
@@ -648,14 +651,14 @@ static unsigned mounts_poll(struct file *file, poll_table *wait)
648{ 651{
649 struct proc_mounts *p = file->private_data; 652 struct proc_mounts *p = file->private_data;
650 struct mnt_namespace *ns = p->ns; 653 struct mnt_namespace *ns = p->ns;
651 unsigned res = 0; 654 unsigned res = POLLIN | POLLRDNORM;
652 655
653 poll_wait(file, &ns->poll, wait); 656 poll_wait(file, &ns->poll, wait);
654 657
655 spin_lock(&vfsmount_lock); 658 spin_lock(&vfsmount_lock);
656 if (p->event != ns->event) { 659 if (p->event != ns->event) {
657 p->event = ns->event; 660 p->event = ns->event;
658 res = POLLERR; 661 res |= POLLERR | POLLPRI;
659 } 662 }
660 spin_unlock(&vfsmount_lock); 663 spin_unlock(&vfsmount_lock);
661 664
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 74ea974f5ca6..c6b0302af4c4 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -35,7 +35,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
35#define K(x) ((x) << (PAGE_SHIFT - 10)) 35#define K(x) ((x) << (PAGE_SHIFT - 10))
36 si_meminfo(&i); 36 si_meminfo(&i);
37 si_swapinfo(&i); 37 si_swapinfo(&i);
38 committed = atomic_long_read(&vm_committed_space); 38 committed = percpu_counter_read_positive(&vm_committed_as);
39 allowed = ((totalram_pages - hugetlb_total_pages()) 39 allowed = ((totalram_pages - hugetlb_total_pages())
40 * sysctl_overcommit_ratio / 100) + total_swap_pages; 40 * sysctl_overcommit_ratio / 100) + total_swap_pages;
41 41
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index f75efa22df5e..81e4eb60972e 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -18,6 +18,9 @@
18#ifndef arch_irq_stat 18#ifndef arch_irq_stat
19#define arch_irq_stat() 0 19#define arch_irq_stat() 0
20#endif 20#endif
21#ifndef arch_idle_time
22#define arch_idle_time(cpu) 0
23#endif
21 24
22static int show_stat(struct seq_file *p, void *v) 25static int show_stat(struct seq_file *p, void *v)
23{ 26{
@@ -40,6 +43,7 @@ static int show_stat(struct seq_file *p, void *v)
40 nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); 43 nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
41 system = cputime64_add(system, kstat_cpu(i).cpustat.system); 44 system = cputime64_add(system, kstat_cpu(i).cpustat.system);
42 idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); 45 idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle);
46 idle = cputime64_add(idle, arch_idle_time(i));
43 iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); 47 iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait);
44 irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); 48 irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
45 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); 49 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
@@ -69,6 +73,7 @@ static int show_stat(struct seq_file *p, void *v)
69 nice = kstat_cpu(i).cpustat.nice; 73 nice = kstat_cpu(i).cpustat.nice;
70 system = kstat_cpu(i).cpustat.system; 74 system = kstat_cpu(i).cpustat.system;
71 idle = kstat_cpu(i).cpustat.idle; 75 idle = kstat_cpu(i).cpustat.idle;
76 idle = cputime64_add(idle, arch_idle_time(i));
72 iowait = kstat_cpu(i).cpustat.iowait; 77 iowait = kstat_cpu(i).cpustat.iowait;
73 irq = kstat_cpu(i).cpustat.irq; 78 irq = kstat_cpu(i).cpustat.irq;
74 softirq = kstat_cpu(i).cpustat.softirq; 79 softirq = kstat_cpu(i).cpustat.softirq;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 39e4ad4f59f4..6f61b7cc32e0 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -665,6 +665,10 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
665 goto out_task; 665 goto out_task;
666 666
667 ret = 0; 667 ret = 0;
668
669 if (!count)
670 goto out_task;
671
668 mm = get_task_mm(task); 672 mm = get_task_mm(task);
669 if (!mm) 673 if (!mm)
670 goto out_task; 674 goto out_task;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 12c20377772d..64a72e2e7650 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -135,7 +135,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
135 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 135 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
136 dev = inode->i_sb->s_dev; 136 dev = inode->i_sb->s_dev;
137 ino = inode->i_ino; 137 ino = inode->i_ino;
138 pgoff = (loff_t)vma->pg_off << PAGE_SHIFT; 138 pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
139 } 139 }
140 140
141 seq_printf(m, 141 seq_printf(m,
diff --git a/fs/quota/Makefile b/fs/quota/Makefile
index 385a0831cc99..68d4f6dc0578 100644
--- a/fs/quota/Makefile
+++ b/fs/quota/Makefile
@@ -1,12 +1,3 @@
1#
2# Makefile for the Linux filesystems.
3#
4# 14 Sep 2000, Christoph Hellwig <hch@infradead.org>
5# Rewritten to use lists instead of if-statements.
6#
7
8obj-y :=
9
10obj-$(CONFIG_QUOTA) += dquot.o 1obj-$(CONFIG_QUOTA) += dquot.o
11obj-$(CONFIG_QFMT_V1) += quota_v1.o 2obj-$(CONFIG_QFMT_V1) += quota_v1.o
12obj-$(CONFIG_QFMT_V2) += quota_v2.o 3obj-$(CONFIG_QFMT_V2) += quota_v2.o
diff --git a/fs/romfs/internal.h b/fs/romfs/internal.h
index 06044a9dc62d..95217b830118 100644
--- a/fs/romfs/internal.h
+++ b/fs/romfs/internal.h
@@ -43,5 +43,5 @@ extern int romfs_dev_read(struct super_block *sb, unsigned long pos,
43 void *buf, size_t buflen); 43 void *buf, size_t buflen);
44extern ssize_t romfs_dev_strnlen(struct super_block *sb, 44extern ssize_t romfs_dev_strnlen(struct super_block *sb,
45 unsigned long pos, size_t maxlen); 45 unsigned long pos, size_t maxlen);
46extern int romfs_dev_strncmp(struct super_block *sb, unsigned long pos, 46extern int romfs_dev_strcmp(struct super_block *sb, unsigned long pos,
47 const char *str, size_t size); 47 const char *str, size_t size);
diff --git a/fs/romfs/storage.c b/fs/romfs/storage.c
index 7e3e1e12a081..b3208adf8e71 100644
--- a/fs/romfs/storage.c
+++ b/fs/romfs/storage.c
@@ -67,26 +67,35 @@ static ssize_t romfs_mtd_strnlen(struct super_block *sb,
67 * compare a string to one in a romfs image on MTD 67 * compare a string to one in a romfs image on MTD
68 * - return 1 if matched, 0 if differ, -ve if error 68 * - return 1 if matched, 0 if differ, -ve if error
69 */ 69 */
70static int romfs_mtd_strncmp(struct super_block *sb, unsigned long pos, 70static int romfs_mtd_strcmp(struct super_block *sb, unsigned long pos,
71 const char *str, size_t size) 71 const char *str, size_t size)
72{ 72{
73 u_char buf[16]; 73 u_char buf[17];
74 size_t len, segment; 74 size_t len, segment;
75 int ret; 75 int ret;
76 76
77 /* scan the string up to 16 bytes at a time */ 77 /* scan the string up to 16 bytes at a time, and attempt to grab the
78 * trailing NUL whilst we're at it */
79 buf[0] = 0xff;
80
78 while (size > 0) { 81 while (size > 0) {
79 segment = min_t(size_t, size, 16); 82 segment = min_t(size_t, size + 1, 17);
80 ret = ROMFS_MTD_READ(sb, pos, segment, &len, buf); 83 ret = ROMFS_MTD_READ(sb, pos, segment, &len, buf);
81 if (ret < 0) 84 if (ret < 0)
82 return ret; 85 return ret;
86 len--;
83 if (memcmp(buf, str, len) != 0) 87 if (memcmp(buf, str, len) != 0)
84 return 0; 88 return 0;
89 buf[0] = buf[len];
85 size -= len; 90 size -= len;
86 pos += len; 91 pos += len;
87 str += len; 92 str += len;
88 } 93 }
89 94
95 /* check the trailing NUL was */
96 if (buf[0])
97 return 0;
98
90 return 1; 99 return 1;
91} 100}
92#endif /* CONFIG_ROMFS_ON_MTD */ 101#endif /* CONFIG_ROMFS_ON_MTD */
@@ -111,6 +120,7 @@ static int romfs_blk_read(struct super_block *sb, unsigned long pos,
111 return -EIO; 120 return -EIO;
112 memcpy(buf, bh->b_data + offset, segment); 121 memcpy(buf, bh->b_data + offset, segment);
113 brelse(bh); 122 brelse(bh);
123 buf += segment;
114 buflen -= segment; 124 buflen -= segment;
115 pos += segment; 125 pos += segment;
116 } 126 }
@@ -154,28 +164,48 @@ static ssize_t romfs_blk_strnlen(struct super_block *sb,
154 * compare a string to one in a romfs image on a block device 164 * compare a string to one in a romfs image on a block device
155 * - return 1 if matched, 0 if differ, -ve if error 165 * - return 1 if matched, 0 if differ, -ve if error
156 */ 166 */
157static int romfs_blk_strncmp(struct super_block *sb, unsigned long pos, 167static int romfs_blk_strcmp(struct super_block *sb, unsigned long pos,
158 const char *str, size_t size) 168 const char *str, size_t size)
159{ 169{
160 struct buffer_head *bh; 170 struct buffer_head *bh;
161 unsigned long offset; 171 unsigned long offset;
162 size_t segment; 172 size_t segment;
163 bool x; 173 bool matched, terminated = false;
164 174
165 /* scan the string up to 16 bytes at a time */ 175 /* compare string up to a block at a time */
166 while (size > 0) { 176 while (size > 0) {
167 offset = pos & (ROMBSIZE - 1); 177 offset = pos & (ROMBSIZE - 1);
168 segment = min_t(size_t, size, ROMBSIZE - offset); 178 segment = min_t(size_t, size, ROMBSIZE - offset);
169 bh = sb_bread(sb, pos >> ROMBSBITS); 179 bh = sb_bread(sb, pos >> ROMBSBITS);
170 if (!bh) 180 if (!bh)
171 return -EIO; 181 return -EIO;
172 x = (memcmp(bh->b_data + offset, str, segment) != 0); 182 matched = (memcmp(bh->b_data + offset, str, segment) == 0);
173 brelse(bh); 183
174 if (x)
175 return 0;
176 size -= segment; 184 size -= segment;
177 pos += segment; 185 pos += segment;
178 str += segment; 186 str += segment;
187 if (matched && size == 0 && offset + segment < ROMBSIZE) {
188 if (!bh->b_data[offset + segment])
189 terminated = true;
190 else
191 matched = false;
192 }
193 brelse(bh);
194 if (!matched)
195 return 0;
196 }
197
198 if (!terminated) {
199 /* the terminating NUL must be on the first byte of the next
200 * block */
201 BUG_ON((pos & (ROMBSIZE - 1)) != 0);
202 bh = sb_bread(sb, pos >> ROMBSBITS);
203 if (!bh)
204 return -EIO;
205 matched = !bh->b_data[0];
206 brelse(bh);
207 if (!matched)
208 return 0;
179 } 209 }
180 210
181 return 1; 211 return 1;
@@ -234,10 +264,12 @@ ssize_t romfs_dev_strnlen(struct super_block *sb,
234 264
235/* 265/*
236 * compare a string to one in romfs 266 * compare a string to one in romfs
267 * - the string to be compared to, str, may not be NUL-terminated; instead the
268 * string is of the specified size
237 * - return 1 if matched, 0 if differ, -ve if error 269 * - return 1 if matched, 0 if differ, -ve if error
238 */ 270 */
239int romfs_dev_strncmp(struct super_block *sb, unsigned long pos, 271int romfs_dev_strcmp(struct super_block *sb, unsigned long pos,
240 const char *str, size_t size) 272 const char *str, size_t size)
241{ 273{
242 size_t limit; 274 size_t limit;
243 275
@@ -246,16 +278,16 @@ int romfs_dev_strncmp(struct super_block *sb, unsigned long pos,
246 return -EIO; 278 return -EIO;
247 if (size > ROMFS_MAXFN) 279 if (size > ROMFS_MAXFN)
248 return -ENAMETOOLONG; 280 return -ENAMETOOLONG;
249 if (size > limit - pos) 281 if (size + 1 > limit - pos)
250 return -EIO; 282 return -EIO;
251 283
252#ifdef CONFIG_ROMFS_ON_MTD 284#ifdef CONFIG_ROMFS_ON_MTD
253 if (sb->s_mtd) 285 if (sb->s_mtd)
254 return romfs_mtd_strncmp(sb, pos, str, size); 286 return romfs_mtd_strcmp(sb, pos, str, size);
255#endif 287#endif
256#ifdef CONFIG_ROMFS_ON_BLOCK 288#ifdef CONFIG_ROMFS_ON_BLOCK
257 if (sb->s_bdev) 289 if (sb->s_bdev)
258 return romfs_blk_strncmp(sb, pos, str, size); 290 return romfs_blk_strcmp(sb, pos, str, size);
259#endif 291#endif
260 return -EIO; 292 return -EIO;
261} 293}
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 10ca7d984a8b..c53b5ef8a02f 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -240,8 +240,8 @@ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry,
240 goto error; 240 goto error;
241 241
242 /* try to match the first 16 bytes of name */ 242 /* try to match the first 16 bytes of name */
243 ret = romfs_dev_strncmp(dir->i_sb, offset + ROMFH_SIZE, name, 243 ret = romfs_dev_strcmp(dir->i_sb, offset + ROMFH_SIZE, name,
244 len); 244 len);
245 if (ret < 0) 245 if (ret < 0)
246 goto error; 246 goto error;
247 if (ret == 1) 247 if (ret == 1)
diff --git a/fs/splice.c b/fs/splice.c
index c18aa7e03e2b..666953d59a35 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -182,8 +182,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
182 do_wakeup = 0; 182 do_wakeup = 0;
183 page_nr = 0; 183 page_nr = 0;
184 184
185 if (pipe->inode) 185 pipe_lock(pipe);
186 mutex_lock(&pipe->inode->i_mutex);
187 186
188 for (;;) { 187 for (;;) {
189 if (!pipe->readers) { 188 if (!pipe->readers) {
@@ -245,15 +244,13 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
245 pipe->waiting_writers--; 244 pipe->waiting_writers--;
246 } 245 }
247 246
248 if (pipe->inode) { 247 pipe_unlock(pipe);
249 mutex_unlock(&pipe->inode->i_mutex);
250 248
251 if (do_wakeup) { 249 if (do_wakeup) {
252 smp_mb(); 250 smp_mb();
253 if (waitqueue_active(&pipe->wait)) 251 if (waitqueue_active(&pipe->wait))
254 wake_up_interruptible(&pipe->wait); 252 wake_up_interruptible(&pipe->wait);
255 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 253 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
256 }
257 } 254 }
258 255
259 while (page_nr < spd_pages) 256 while (page_nr < spd_pages)
@@ -555,8 +552,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
555 * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create 552 * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
556 * a new page in the output file page cache and fill/dirty that. 553 * a new page in the output file page cache and fill/dirty that.
557 */ 554 */
558static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 555int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
559 struct splice_desc *sd) 556 struct splice_desc *sd)
560{ 557{
561 struct file *file = sd->u.file; 558 struct file *file = sd->u.file;
562 struct address_space *mapping = file->f_mapping; 559 struct address_space *mapping = file->f_mapping;
@@ -600,108 +597,177 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
600out: 597out:
601 return ret; 598 return ret;
602} 599}
600EXPORT_SYMBOL(pipe_to_file);
601
602static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
603{
604 smp_mb();
605 if (waitqueue_active(&pipe->wait))
606 wake_up_interruptible(&pipe->wait);
607 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
608}
603 609
604/** 610/**
605 * __splice_from_pipe - splice data from a pipe to given actor 611 * splice_from_pipe_feed - feed available data from a pipe to a file
606 * @pipe: pipe to splice from 612 * @pipe: pipe to splice from
607 * @sd: information to @actor 613 * @sd: information to @actor
608 * @actor: handler that splices the data 614 * @actor: handler that splices the data
609 * 615 *
610 * Description: 616 * Description:
611 * This function does little more than loop over the pipe and call 617 * This function loops over the pipe and calls @actor to do the
612 * @actor to do the actual moving of a single struct pipe_buffer to 618 * actual moving of a single struct pipe_buffer to the desired
613 * the desired destination. See pipe_to_file, pipe_to_sendpage, or 619 * destination. It returns when there's no more buffers left in
614 * pipe_to_user. 620 * the pipe or if the requested number of bytes (@sd->total_len)
621 * have been copied. It returns a positive number (one) if the
622 * pipe needs to be filled with more data, zero if the required
623 * number of bytes have been copied and -errno on error.
615 * 624 *
625 * This, together with splice_from_pipe_{begin,end,next}, may be
626 * used to implement the functionality of __splice_from_pipe() when
627 * locking is required around copying the pipe buffers to the
628 * destination.
616 */ 629 */
617ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, 630int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
618 splice_actor *actor) 631 splice_actor *actor)
619{ 632{
620 int ret, do_wakeup, err; 633 int ret;
621
622 ret = 0;
623 do_wakeup = 0;
624
625 for (;;) {
626 if (pipe->nrbufs) {
627 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
628 const struct pipe_buf_operations *ops = buf->ops;
629 634
630 sd->len = buf->len; 635 while (pipe->nrbufs) {
631 if (sd->len > sd->total_len) 636 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
632 sd->len = sd->total_len; 637 const struct pipe_buf_operations *ops = buf->ops;
633 638
634 err = actor(pipe, buf, sd); 639 sd->len = buf->len;
635 if (err <= 0) { 640 if (sd->len > sd->total_len)
636 if (!ret && err != -ENODATA) 641 sd->len = sd->total_len;
637 ret = err;
638 642
639 break; 643 ret = actor(pipe, buf, sd);
640 } 644 if (ret <= 0) {
645 if (ret == -ENODATA)
646 ret = 0;
647 return ret;
648 }
649 buf->offset += ret;
650 buf->len -= ret;
641 651
642 ret += err; 652 sd->num_spliced += ret;
643 buf->offset += err; 653 sd->len -= ret;
644 buf->len -= err; 654 sd->pos += ret;
655 sd->total_len -= ret;
645 656
646 sd->len -= err; 657 if (!buf->len) {
647 sd->pos += err; 658 buf->ops = NULL;
648 sd->total_len -= err; 659 ops->release(pipe, buf);
649 if (sd->len) 660 pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
650 continue; 661 pipe->nrbufs--;
662 if (pipe->inode)
663 sd->need_wakeup = true;
664 }
651 665
652 if (!buf->len) { 666 if (!sd->total_len)
653 buf->ops = NULL; 667 return 0;
654 ops->release(pipe, buf); 668 }
655 pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
656 pipe->nrbufs--;
657 if (pipe->inode)
658 do_wakeup = 1;
659 }
660 669
661 if (!sd->total_len) 670 return 1;
662 break; 671}
663 } 672EXPORT_SYMBOL(splice_from_pipe_feed);
664 673
665 if (pipe->nrbufs) 674/**
666 continue; 675 * splice_from_pipe_next - wait for some data to splice from
676 * @pipe: pipe to splice from
677 * @sd: information about the splice operation
678 *
679 * Description:
680 * This function will wait for some data and return a positive
681 * value (one) if pipe buffers are available. It will return zero
682 * or -errno if no more data needs to be spliced.
683 */
684int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
685{
686 while (!pipe->nrbufs) {
667 if (!pipe->writers) 687 if (!pipe->writers)
668 break; 688 return 0;
669 if (!pipe->waiting_writers) {
670 if (ret)
671 break;
672 }
673 689
674 if (sd->flags & SPLICE_F_NONBLOCK) { 690 if (!pipe->waiting_writers && sd->num_spliced)
675 if (!ret) 691 return 0;
676 ret = -EAGAIN;
677 break;
678 }
679 692
680 if (signal_pending(current)) { 693 if (sd->flags & SPLICE_F_NONBLOCK)
681 if (!ret) 694 return -EAGAIN;
682 ret = -ERESTARTSYS;
683 break;
684 }
685 695
686 if (do_wakeup) { 696 if (signal_pending(current))
687 smp_mb(); 697 return -ERESTARTSYS;
688 if (waitqueue_active(&pipe->wait)) 698
689 wake_up_interruptible_sync(&pipe->wait); 699 if (sd->need_wakeup) {
690 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 700 wakeup_pipe_writers(pipe);
691 do_wakeup = 0; 701 sd->need_wakeup = false;
692 } 702 }
693 703
694 pipe_wait(pipe); 704 pipe_wait(pipe);
695 } 705 }
696 706
697 if (do_wakeup) { 707 return 1;
698 smp_mb(); 708}
699 if (waitqueue_active(&pipe->wait)) 709EXPORT_SYMBOL(splice_from_pipe_next);
700 wake_up_interruptible(&pipe->wait);
701 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
702 }
703 710
704 return ret; 711/**
712 * splice_from_pipe_begin - start splicing from pipe
713 * @sd: information about the splice operation
714 *
715 * Description:
716 * This function should be called before a loop containing
717 * splice_from_pipe_next() and splice_from_pipe_feed() to
718 * initialize the necessary fields of @sd.
719 */
720void splice_from_pipe_begin(struct splice_desc *sd)
721{
722 sd->num_spliced = 0;
723 sd->need_wakeup = false;
724}
725EXPORT_SYMBOL(splice_from_pipe_begin);
726
727/**
728 * splice_from_pipe_end - finish splicing from pipe
729 * @pipe: pipe to splice from
730 * @sd: information about the splice operation
731 *
732 * Description:
733 * This function will wake up pipe writers if necessary. It should
734 * be called after a loop containing splice_from_pipe_next() and
735 * splice_from_pipe_feed().
736 */
737void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
738{
739 if (sd->need_wakeup)
740 wakeup_pipe_writers(pipe);
741}
742EXPORT_SYMBOL(splice_from_pipe_end);
743
744/**
745 * __splice_from_pipe - splice data from a pipe to given actor
746 * @pipe: pipe to splice from
747 * @sd: information to @actor
748 * @actor: handler that splices the data
749 *
750 * Description:
751 * This function does little more than loop over the pipe and call
752 * @actor to do the actual moving of a single struct pipe_buffer to
753 * the desired destination. See pipe_to_file, pipe_to_sendpage, or
754 * pipe_to_user.
755 *
756 */
757ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
758 splice_actor *actor)
759{
760 int ret;
761
762 splice_from_pipe_begin(sd);
763 do {
764 ret = splice_from_pipe_next(pipe, sd);
765 if (ret > 0)
766 ret = splice_from_pipe_feed(pipe, sd, actor);
767 } while (ret > 0);
768 splice_from_pipe_end(pipe, sd);
769
770 return sd->num_spliced ? sd->num_spliced : ret;
705} 771}
706EXPORT_SYMBOL(__splice_from_pipe); 772EXPORT_SYMBOL(__splice_from_pipe);
707 773
@@ -715,7 +781,7 @@ EXPORT_SYMBOL(__splice_from_pipe);
715 * @actor: handler that splices the data 781 * @actor: handler that splices the data
716 * 782 *
717 * Description: 783 * Description:
718 * See __splice_from_pipe. This function locks the input and output inodes, 784 * See __splice_from_pipe. This function locks the pipe inode,
719 * otherwise it's identical to __splice_from_pipe(). 785 * otherwise it's identical to __splice_from_pipe().
720 * 786 *
721 */ 787 */
@@ -724,7 +790,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
724 splice_actor *actor) 790 splice_actor *actor)
725{ 791{
726 ssize_t ret; 792 ssize_t ret;
727 struct inode *inode = out->f_mapping->host;
728 struct splice_desc sd = { 793 struct splice_desc sd = {
729 .total_len = len, 794 .total_len = len,
730 .flags = flags, 795 .flags = flags,
@@ -732,30 +797,15 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
732 .u.file = out, 797 .u.file = out,
733 }; 798 };
734 799
735 /* 800 pipe_lock(pipe);
736 * The actor worker might be calling ->write_begin and
737 * ->write_end. Most of the time, these expect i_mutex to
738 * be held. Since this may result in an ABBA deadlock with
739 * pipe->inode, we have to order lock acquiry here.
740 *
741 * Outer lock must be inode->i_mutex, as pipe_wait() will
742 * release and reacquire pipe->inode->i_mutex, AND inode must
743 * never be a pipe.
744 */
745 WARN_ON(S_ISFIFO(inode->i_mode));
746 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
747 if (pipe->inode)
748 mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
749 ret = __splice_from_pipe(pipe, &sd, actor); 801 ret = __splice_from_pipe(pipe, &sd, actor);
750 if (pipe->inode) 802 pipe_unlock(pipe);
751 mutex_unlock(&pipe->inode->i_mutex);
752 mutex_unlock(&inode->i_mutex);
753 803
754 return ret; 804 return ret;
755} 805}
756 806
757/** 807/**
758 * generic_file_splice_write_nolock - generic_file_splice_write without mutexes 808 * generic_file_splice_write - splice data from a pipe to a file
759 * @pipe: pipe info 809 * @pipe: pipe info
760 * @out: file to write to 810 * @out: file to write to
761 * @ppos: position in @out 811 * @ppos: position in @out
@@ -764,13 +814,12 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
764 * 814 *
765 * Description: 815 * Description:
766 * Will either move or copy pages (determined by @flags options) from 816 * Will either move or copy pages (determined by @flags options) from
767 * the given pipe inode to the given file. The caller is responsible 817 * the given pipe inode to the given file.
768 * for acquiring i_mutex on both inodes.
769 * 818 *
770 */ 819 */
771ssize_t 820ssize_t
772generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, 821generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
773 loff_t *ppos, size_t len, unsigned int flags) 822 loff_t *ppos, size_t len, unsigned int flags)
774{ 823{
775 struct address_space *mapping = out->f_mapping; 824 struct address_space *mapping = out->f_mapping;
776 struct inode *inode = mapping->host; 825 struct inode *inode = mapping->host;
@@ -781,76 +830,28 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
781 .u.file = out, 830 .u.file = out,
782 }; 831 };
783 ssize_t ret; 832 ssize_t ret;
784 int err;
785
786 err = file_remove_suid(out);
787 if (unlikely(err))
788 return err;
789
790 ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
791 if (ret > 0) {
792 unsigned long nr_pages;
793 833
794 *ppos += ret; 834 pipe_lock(pipe);
795 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
796
797 /*
798 * If file or inode is SYNC and we actually wrote some data,
799 * sync it.
800 */
801 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
802 err = generic_osync_inode(inode, mapping,
803 OSYNC_METADATA|OSYNC_DATA);
804 835
805 if (err) 836 splice_from_pipe_begin(&sd);
806 ret = err; 837 do {
807 } 838 ret = splice_from_pipe_next(pipe, &sd);
808 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 839 if (ret <= 0)
809 } 840 break;
810 841
811 return ret; 842 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
812} 843 ret = file_remove_suid(out);
844 if (!ret)
845 ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file);
846 mutex_unlock(&inode->i_mutex);
847 } while (ret > 0);
848 splice_from_pipe_end(pipe, &sd);
813 849
814EXPORT_SYMBOL(generic_file_splice_write_nolock); 850 pipe_unlock(pipe);
815 851
816/** 852 if (sd.num_spliced)
817 * generic_file_splice_write - splice data from a pipe to a file 853 ret = sd.num_spliced;
818 * @pipe: pipe info
819 * @out: file to write to
820 * @ppos: position in @out
821 * @len: number of bytes to splice
822 * @flags: splice modifier flags
823 *
824 * Description:
825 * Will either move or copy pages (determined by @flags options) from
826 * the given pipe inode to the given file.
827 *
828 */
829ssize_t
830generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
831 loff_t *ppos, size_t len, unsigned int flags)
832{
833 struct address_space *mapping = out->f_mapping;
834 struct inode *inode = mapping->host;
835 struct splice_desc sd = {
836 .total_len = len,
837 .flags = flags,
838 .pos = *ppos,
839 .u.file = out,
840 };
841 ssize_t ret;
842 854
843 WARN_ON(S_ISFIFO(inode->i_mode));
844 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
845 ret = file_remove_suid(out);
846 if (likely(!ret)) {
847 if (pipe->inode)
848 mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
849 ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
850 if (pipe->inode)
851 mutex_unlock(&pipe->inode->i_mutex);
852 }
853 mutex_unlock(&inode->i_mutex);
854 if (ret > 0) { 855 if (ret > 0) {
855 unsigned long nr_pages; 856 unsigned long nr_pages;
856 857
@@ -1339,8 +1340,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
1339 if (!pipe) 1340 if (!pipe)
1340 return -EBADF; 1341 return -EBADF;
1341 1342
1342 if (pipe->inode) 1343 pipe_lock(pipe);
1343 mutex_lock(&pipe->inode->i_mutex);
1344 1344
1345 error = ret = 0; 1345 error = ret = 0;
1346 while (nr_segs) { 1346 while (nr_segs) {
@@ -1395,8 +1395,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
1395 iov++; 1395 iov++;
1396 } 1396 }
1397 1397
1398 if (pipe->inode) 1398 pipe_unlock(pipe);
1399 mutex_unlock(&pipe->inode->i_mutex);
1400 1399
1401 if (!ret) 1400 if (!ret)
1402 ret = error; 1401 ret = error;
@@ -1524,7 +1523,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
1524 return 0; 1523 return 0;
1525 1524
1526 ret = 0; 1525 ret = 0;
1527 mutex_lock(&pipe->inode->i_mutex); 1526 pipe_lock(pipe);
1528 1527
1529 while (!pipe->nrbufs) { 1528 while (!pipe->nrbufs) {
1530 if (signal_pending(current)) { 1529 if (signal_pending(current)) {
@@ -1542,7 +1541,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
1542 pipe_wait(pipe); 1541 pipe_wait(pipe);
1543 } 1542 }
1544 1543
1545 mutex_unlock(&pipe->inode->i_mutex); 1544 pipe_unlock(pipe);
1546 return ret; 1545 return ret;
1547} 1546}
1548 1547
@@ -1562,7 +1561,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
1562 return 0; 1561 return 0;
1563 1562
1564 ret = 0; 1563 ret = 0;
1565 mutex_lock(&pipe->inode->i_mutex); 1564 pipe_lock(pipe);
1566 1565
1567 while (pipe->nrbufs >= PIPE_BUFFERS) { 1566 while (pipe->nrbufs >= PIPE_BUFFERS) {
1568 if (!pipe->readers) { 1567 if (!pipe->readers) {
@@ -1583,7 +1582,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
1583 pipe->waiting_writers--; 1582 pipe->waiting_writers--;
1584 } 1583 }
1585 1584
1586 mutex_unlock(&pipe->inode->i_mutex); 1585 pipe_unlock(pipe);
1587 return ret; 1586 return ret;
1588} 1587}
1589 1588
@@ -1599,10 +1598,10 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1599 1598
1600 /* 1599 /*
1601 * Potential ABBA deadlock, work around it by ordering lock 1600 * Potential ABBA deadlock, work around it by ordering lock
1602 * grabbing by inode address. Otherwise two different processes 1601 * grabbing by pipe info address. Otherwise two different processes
1603 * could deadlock (one doing tee from A -> B, the other from B -> A). 1602 * could deadlock (one doing tee from A -> B, the other from B -> A).
1604 */ 1603 */
1605 inode_double_lock(ipipe->inode, opipe->inode); 1604 pipe_double_lock(ipipe, opipe);
1606 1605
1607 do { 1606 do {
1608 if (!opipe->readers) { 1607 if (!opipe->readers) {
@@ -1653,7 +1652,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
1653 if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK)) 1652 if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK))
1654 ret = -EAGAIN; 1653 ret = -EAGAIN;
1655 1654
1656 inode_double_unlock(ipipe->inode, opipe->inode); 1655 pipe_unlock(ipipe);
1656 pipe_unlock(opipe);
1657 1657
1658 /* 1658 /*
1659 * If we put data in the output pipe, wakeup any potential readers. 1659 * If we put data in the output pipe, wakeup any potential readers.
diff --git a/fs/stat.c b/fs/stat.c
index 2db740a0cfb5..075694e31d8b 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -55,59 +55,54 @@ int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
55 55
56EXPORT_SYMBOL(vfs_getattr); 56EXPORT_SYMBOL(vfs_getattr);
57 57
58int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat) 58int vfs_fstat(unsigned int fd, struct kstat *stat)
59{ 59{
60 struct path path; 60 struct file *f = fget(fd);
61 int error; 61 int error = -EBADF;
62 62
63 error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path); 63 if (f) {
64 if (!error) { 64 error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
65 error = vfs_getattr(path.mnt, path.dentry, stat); 65 fput(f);
66 path_put(&path);
67 } 66 }
68 return error; 67 return error;
69} 68}
69EXPORT_SYMBOL(vfs_fstat);
70 70
71int vfs_stat(char __user *name, struct kstat *stat) 71int vfs_fstatat(int dfd, char __user *filename, struct kstat *stat, int flag)
72{ 72{
73 return vfs_stat_fd(AT_FDCWD, name, stat); 73 struct path path;
74} 74 int error = -EINVAL;
75 int lookup_flags = 0;
75 76
76EXPORT_SYMBOL(vfs_stat); 77 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
78 goto out;
77 79
78int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat) 80 if (!(flag & AT_SYMLINK_NOFOLLOW))
79{ 81 lookup_flags |= LOOKUP_FOLLOW;
80 struct path path;
81 int error;
82 82
83 error = user_path_at(dfd, name, 0, &path); 83 error = user_path_at(dfd, filename, lookup_flags, &path);
84 if (!error) { 84 if (error)
85 error = vfs_getattr(path.mnt, path.dentry, stat); 85 goto out;
86 path_put(&path); 86
87 } 87 error = vfs_getattr(path.mnt, path.dentry, stat);
88 path_put(&path);
89out:
88 return error; 90 return error;
89} 91}
92EXPORT_SYMBOL(vfs_fstatat);
90 93
91int vfs_lstat(char __user *name, struct kstat *stat) 94int vfs_stat(char __user *name, struct kstat *stat)
92{ 95{
93 return vfs_lstat_fd(AT_FDCWD, name, stat); 96 return vfs_fstatat(AT_FDCWD, name, stat, 0);
94} 97}
98EXPORT_SYMBOL(vfs_stat);
95 99
96EXPORT_SYMBOL(vfs_lstat); 100int vfs_lstat(char __user *name, struct kstat *stat)
97
98int vfs_fstat(unsigned int fd, struct kstat *stat)
99{ 101{
100 struct file *f = fget(fd); 102 return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW);
101 int error = -EBADF;
102
103 if (f) {
104 error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
105 fput(f);
106 }
107 return error;
108} 103}
104EXPORT_SYMBOL(vfs_lstat);
109 105
110EXPORT_SYMBOL(vfs_fstat);
111 106
112#ifdef __ARCH_WANT_OLD_STAT 107#ifdef __ARCH_WANT_OLD_STAT
113 108
@@ -155,23 +150,25 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta
155SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) 150SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf)
156{ 151{
157 struct kstat stat; 152 struct kstat stat;
158 int error = vfs_stat_fd(AT_FDCWD, filename, &stat); 153 int error;
159 154
160 if (!error) 155 error = vfs_stat(filename, &stat);
161 error = cp_old_stat(&stat, statbuf); 156 if (error)
157 return error;
162 158
163 return error; 159 return cp_old_stat(&stat, statbuf);
164} 160}
165 161
166SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) 162SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf)
167{ 163{
168 struct kstat stat; 164 struct kstat stat;
169 int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); 165 int error;
170 166
171 if (!error) 167 error = vfs_lstat(filename, &stat);
172 error = cp_old_stat(&stat, statbuf); 168 if (error)
169 return error;
173 170
174 return error; 171 return cp_old_stat(&stat, statbuf);
175} 172}
176 173
177SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf) 174SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf)
@@ -240,23 +237,23 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
240SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf) 237SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf)
241{ 238{
242 struct kstat stat; 239 struct kstat stat;
243 int error = vfs_stat_fd(AT_FDCWD, filename, &stat); 240 int error = vfs_stat(filename, &stat);
244
245 if (!error)
246 error = cp_new_stat(&stat, statbuf);
247 241
248 return error; 242 if (error)
243 return error;
244 return cp_new_stat(&stat, statbuf);
249} 245}
250 246
251SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf) 247SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf)
252{ 248{
253 struct kstat stat; 249 struct kstat stat;
254 int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); 250 int error;
255 251
256 if (!error) 252 error = vfs_lstat(filename, &stat);
257 error = cp_new_stat(&stat, statbuf); 253 if (error)
254 return error;
258 255
259 return error; 256 return cp_new_stat(&stat, statbuf);
260} 257}
261 258
262#if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) 259#if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT)
@@ -264,21 +261,12 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, char __user *, filename,
264 struct stat __user *, statbuf, int, flag) 261 struct stat __user *, statbuf, int, flag)
265{ 262{
266 struct kstat stat; 263 struct kstat stat;
267 int error = -EINVAL; 264 int error;
268
269 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
270 goto out;
271
272 if (flag & AT_SYMLINK_NOFOLLOW)
273 error = vfs_lstat_fd(dfd, filename, &stat);
274 else
275 error = vfs_stat_fd(dfd, filename, &stat);
276
277 if (!error)
278 error = cp_new_stat(&stat, statbuf);
279 265
280out: 266 error = vfs_fstatat(dfd, filename, &stat, flag);
281 return error; 267 if (error)
268 return error;
269 return cp_new_stat(&stat, statbuf);
282} 270}
283#endif 271#endif
284 272
@@ -404,21 +392,12 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename,
404 struct stat64 __user *, statbuf, int, flag) 392 struct stat64 __user *, statbuf, int, flag)
405{ 393{
406 struct kstat stat; 394 struct kstat stat;
407 int error = -EINVAL; 395 int error;
408
409 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
410 goto out;
411
412 if (flag & AT_SYMLINK_NOFOLLOW)
413 error = vfs_lstat_fd(dfd, filename, &stat);
414 else
415 error = vfs_stat_fd(dfd, filename, &stat);
416
417 if (!error)
418 error = cp_new_stat64(&stat, statbuf);
419 396
420out: 397 error = vfs_fstatat(dfd, filename, &stat, flag);
421 return error; 398 if (error)
399 return error;
400 return cp_new_stat64(&stat, statbuf);
422} 401}
423#endif /* __ARCH_WANT_STAT64 */ 402#endif /* __ARCH_WANT_STAT64 */
424 403
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 93e0c0281d45..9345806c8853 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -157,14 +157,9 @@ static ssize_t write(struct file *file, const char __user *userbuf,
157 count = size - offs; 157 count = size - offs;
158 } 158 }
159 159
160 temp = kmalloc(count, GFP_KERNEL); 160 temp = memdup_user(userbuf, count);
161 if (!temp) 161 if (IS_ERR(temp))
162 return -ENOMEM; 162 return PTR_ERR(temp);
163
164 if (copy_from_user(temp, userbuf, count)) {
165 count = -EFAULT;
166 goto out_free;
167 }
168 163
169 mutex_lock(&bb->mutex); 164 mutex_lock(&bb->mutex);
170 165
@@ -176,8 +171,6 @@ static ssize_t write(struct file *file, const char __user *userbuf,
176 if (count > 0) 171 if (count > 0)
177 *off = offs + count; 172 *off = offs + count;
178 173
179out_free:
180 kfree(temp);
181 return count; 174 return count;
182} 175}
183 176
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 289c43a47263..b1606e07b7a3 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -446,11 +446,11 @@ static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
446 if (buffer->event != atomic_read(&od->event)) 446 if (buffer->event != atomic_read(&od->event))
447 goto trigger; 447 goto trigger;
448 448
449 return 0; 449 return DEFAULT_POLLMASK;
450 450
451 trigger: 451 trigger:
452 buffer->needs_read_fill = 1; 452 buffer->needs_read_fill = 1;
453 return POLLERR|POLLPRI; 453 return DEFAULT_POLLMASK|POLLERR|POLLPRI;
454} 454}
455 455
456void sysfs_notify_dirent(struct sysfs_dirent *sd) 456void sysfs_notify_dirent(struct sysfs_dirent *sd)
@@ -667,6 +667,7 @@ struct sysfs_schedule_callback_struct {
667 struct work_struct work; 667 struct work_struct work;
668}; 668};
669 669
670static struct workqueue_struct *sysfs_workqueue;
670static DEFINE_MUTEX(sysfs_workq_mutex); 671static DEFINE_MUTEX(sysfs_workq_mutex);
671static LIST_HEAD(sysfs_workq); 672static LIST_HEAD(sysfs_workq);
672static void sysfs_schedule_callback_work(struct work_struct *work) 673static void sysfs_schedule_callback_work(struct work_struct *work)
@@ -715,11 +716,20 @@ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
715 mutex_lock(&sysfs_workq_mutex); 716 mutex_lock(&sysfs_workq_mutex);
716 list_for_each_entry_safe(ss, tmp, &sysfs_workq, workq_list) 717 list_for_each_entry_safe(ss, tmp, &sysfs_workq, workq_list)
717 if (ss->kobj == kobj) { 718 if (ss->kobj == kobj) {
719 module_put(owner);
718 mutex_unlock(&sysfs_workq_mutex); 720 mutex_unlock(&sysfs_workq_mutex);
719 return -EAGAIN; 721 return -EAGAIN;
720 } 722 }
721 mutex_unlock(&sysfs_workq_mutex); 723 mutex_unlock(&sysfs_workq_mutex);
722 724
725 if (sysfs_workqueue == NULL) {
726 sysfs_workqueue = create_workqueue("sysfsd");
727 if (sysfs_workqueue == NULL) {
728 module_put(owner);
729 return -ENOMEM;
730 }
731 }
732
723 ss = kmalloc(sizeof(*ss), GFP_KERNEL); 733 ss = kmalloc(sizeof(*ss), GFP_KERNEL);
724 if (!ss) { 734 if (!ss) {
725 module_put(owner); 735 module_put(owner);
@@ -735,7 +745,7 @@ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
735 mutex_lock(&sysfs_workq_mutex); 745 mutex_lock(&sysfs_workq_mutex);
736 list_add_tail(&ss->workq_list, &sysfs_workq); 746 list_add_tail(&ss->workq_list, &sysfs_workq);
737 mutex_unlock(&sysfs_workq_mutex); 747 mutex_unlock(&sysfs_workq_mutex);
738 schedule_work(&ss->work); 748 queue_work(sysfs_workqueue, &ss->work);
739 return 0; 749 return 0;
740} 750}
741EXPORT_SYMBOL_GPL(sysfs_schedule_callback); 751EXPORT_SYMBOL_GPL(sysfs_schedule_callback);
diff --git a/fs/xattr.c b/fs/xattr.c
index 197c4fcac032..d51b8f9db921 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -237,13 +237,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
237 if (size) { 237 if (size) {
238 if (size > XATTR_SIZE_MAX) 238 if (size > XATTR_SIZE_MAX)
239 return -E2BIG; 239 return -E2BIG;
240 kvalue = kmalloc(size, GFP_KERNEL); 240 kvalue = memdup_user(value, size);
241 if (!kvalue) 241 if (IS_ERR(kvalue))
242 return -ENOMEM; 242 return PTR_ERR(kvalue);
243 if (copy_from_user(kvalue, value, size)) {
244 kfree(kvalue);
245 return -EFAULT;
246 }
247 } 243 }
248 244
249 error = vfs_setxattr(d, kname, kvalue, size, flags); 245 error = vfs_setxattr(d, kname, kvalue, size, flags);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c13f67300fe7..7ec89fc05b2b 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -153,23 +153,6 @@ xfs_find_bdev_for_inode(
153} 153}
154 154
155/* 155/*
156 * Schedule IO completion handling on a xfsdatad if this was
157 * the final hold on this ioend. If we are asked to wait,
158 * flush the workqueue.
159 */
160STATIC void
161xfs_finish_ioend(
162 xfs_ioend_t *ioend,
163 int wait)
164{
165 if (atomic_dec_and_test(&ioend->io_remaining)) {
166 queue_work(xfsdatad_workqueue, &ioend->io_work);
167 if (wait)
168 flush_workqueue(xfsdatad_workqueue);
169 }
170}
171
172/*
173 * We're now finished for good with this ioend structure. 156 * We're now finished for good with this ioend structure.
174 * Update the page state via the associated buffer_heads, 157 * Update the page state via the associated buffer_heads,
175 * release holds on the inode and bio, and finally free 158 * release holds on the inode and bio, and finally free
@@ -310,6 +293,27 @@ xfs_end_bio_read(
310} 293}
311 294
312/* 295/*
296 * Schedule IO completion handling on a xfsdatad if this was
297 * the final hold on this ioend. If we are asked to wait,
298 * flush the workqueue.
299 */
300STATIC void
301xfs_finish_ioend(
302 xfs_ioend_t *ioend,
303 int wait)
304{
305 if (atomic_dec_and_test(&ioend->io_remaining)) {
306 struct workqueue_struct *wq = xfsdatad_workqueue;
307 if (ioend->io_work.func == xfs_end_bio_unwritten)
308 wq = xfsconvertd_workqueue;
309
310 queue_work(wq, &ioend->io_work);
311 if (wait)
312 flush_workqueue(wq);
313 }
314}
315
316/*
313 * Allocate and initialise an IO completion structure. 317 * Allocate and initialise an IO completion structure.
314 * We need to track unwritten extent write completion here initially. 318 * We need to track unwritten extent write completion here initially.
315 * We'll need to extend this for updating the ondisk inode size later 319 * We'll need to extend this for updating the ondisk inode size later
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 1dd528849755..221b3e66ceef 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -19,6 +19,7 @@
19#define __XFS_AOPS_H__ 19#define __XFS_AOPS_H__
20 20
21extern struct workqueue_struct *xfsdatad_workqueue; 21extern struct workqueue_struct *xfsdatad_workqueue;
22extern struct workqueue_struct *xfsconvertd_workqueue;
22extern mempool_t *xfs_ioend_pool; 23extern mempool_t *xfs_ioend_pool;
23 24
24/* 25/*
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index aa1016bb9134..e28800a9f2b5 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -51,6 +51,7 @@ static struct shrinker xfs_buf_shake = {
51 51
52static struct workqueue_struct *xfslogd_workqueue; 52static struct workqueue_struct *xfslogd_workqueue;
53struct workqueue_struct *xfsdatad_workqueue; 53struct workqueue_struct *xfsdatad_workqueue;
54struct workqueue_struct *xfsconvertd_workqueue;
54 55
55#ifdef XFS_BUF_TRACE 56#ifdef XFS_BUF_TRACE
56void 57void
@@ -1775,6 +1776,7 @@ xfs_flush_buftarg(
1775 xfs_buf_t *bp, *n; 1776 xfs_buf_t *bp, *n;
1776 int pincount = 0; 1777 int pincount = 0;
1777 1778
1779 xfs_buf_runall_queues(xfsconvertd_workqueue);
1778 xfs_buf_runall_queues(xfsdatad_workqueue); 1780 xfs_buf_runall_queues(xfsdatad_workqueue);
1779 xfs_buf_runall_queues(xfslogd_workqueue); 1781 xfs_buf_runall_queues(xfslogd_workqueue);
1780 1782
@@ -1831,9 +1833,15 @@ xfs_buf_init(void)
1831 if (!xfsdatad_workqueue) 1833 if (!xfsdatad_workqueue)
1832 goto out_destroy_xfslogd_workqueue; 1834 goto out_destroy_xfslogd_workqueue;
1833 1835
1836 xfsconvertd_workqueue = create_workqueue("xfsconvertd");
1837 if (!xfsconvertd_workqueue)
1838 goto out_destroy_xfsdatad_workqueue;
1839
1834 register_shrinker(&xfs_buf_shake); 1840 register_shrinker(&xfs_buf_shake);
1835 return 0; 1841 return 0;
1836 1842
1843 out_destroy_xfsdatad_workqueue:
1844 destroy_workqueue(xfsdatad_workqueue);
1837 out_destroy_xfslogd_workqueue: 1845 out_destroy_xfslogd_workqueue:
1838 destroy_workqueue(xfslogd_workqueue); 1846 destroy_workqueue(xfslogd_workqueue);
1839 out_free_buf_zone: 1847 out_free_buf_zone:
@@ -1849,6 +1857,7 @@ void
1849xfs_buf_terminate(void) 1857xfs_buf_terminate(void)
1850{ 1858{
1851 unregister_shrinker(&xfs_buf_shake); 1859 unregister_shrinker(&xfs_buf_shake);
1860 destroy_workqueue(xfsconvertd_workqueue);
1852 destroy_workqueue(xfsdatad_workqueue); 1861 destroy_workqueue(xfsdatad_workqueue);
1853 destroy_workqueue(xfslogd_workqueue); 1862 destroy_workqueue(xfslogd_workqueue);
1854 kmem_zone_destroy(xfs_buf_zone); 1863 kmem_zone_destroy(xfs_buf_zone);
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 5aeb77776961..08be36d7326c 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -74,14 +74,14 @@ xfs_flush_pages(
74 74
75 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 75 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
76 xfs_iflags_clear(ip, XFS_ITRUNCATED); 76 xfs_iflags_clear(ip, XFS_ITRUNCATED);
77 ret = filemap_fdatawrite(mapping); 77 ret = -filemap_fdatawrite(mapping);
78 if (flags & XFS_B_ASYNC)
79 return -ret;
80 ret2 = filemap_fdatawait(mapping);
81 if (!ret)
82 ret = ret2;
83 } 78 }
84 return -ret; 79 if (flags & XFS_B_ASYNC)
80 return ret;
81 ret2 = xfs_wait_on_pages(ip, first, last);
82 if (!ret)
83 ret = ret2;
84 return ret;
85} 85}
86 86
87int 87int
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index d0b499418a7d..34eaab608e6e 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -489,17 +489,12 @@ xfs_attrmulti_attr_set(
489 if (len > XATTR_SIZE_MAX) 489 if (len > XATTR_SIZE_MAX)
490 return EINVAL; 490 return EINVAL;
491 491
492 kbuf = kmalloc(len, GFP_KERNEL); 492 kbuf = memdup_user(ubuf, len);
493 if (!kbuf) 493 if (IS_ERR(kbuf))
494 return ENOMEM; 494 return PTR_ERR(kbuf);
495
496 if (copy_from_user(kbuf, ubuf, len))
497 goto out_kfree;
498 495
499 error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); 496 error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
500 497
501 out_kfree:
502 kfree(kbuf);
503 return error; 498 return error;
504} 499}
505 500
@@ -540,20 +535,16 @@ xfs_attrmulti_by_handle(
540 if (!size || size > 16 * PAGE_SIZE) 535 if (!size || size > 16 * PAGE_SIZE)
541 goto out_dput; 536 goto out_dput;
542 537
543 error = ENOMEM; 538 ops = memdup_user(am_hreq.ops, size);
544 ops = kmalloc(size, GFP_KERNEL); 539 if (IS_ERR(ops)) {
545 if (!ops) 540 error = PTR_ERR(ops);
546 goto out_dput; 541 goto out_dput;
547 542 }
548 error = EFAULT;
549 if (copy_from_user(ops, am_hreq.ops, size))
550 goto out_kfree_ops;
551 543
552 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); 544 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
553 if (!attr_name) 545 if (!attr_name)
554 goto out_kfree_ops; 546 goto out_kfree_ops;
555 547
556
557 error = 0; 548 error = 0;
558 for (i = 0; i < am_hreq.opcount; i++) { 549 for (i = 0; i < am_hreq.opcount; i++) {
559 ops[i].am_error = strncpy_from_user(attr_name, 550 ops[i].am_error = strncpy_from_user(attr_name,
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index c70c4e3db790..0882d166239a 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -427,20 +427,16 @@ xfs_compat_attrmulti_by_handle(
427 if (!size || size > 16 * PAGE_SIZE) 427 if (!size || size > 16 * PAGE_SIZE)
428 goto out_dput; 428 goto out_dput;
429 429
430 error = ENOMEM; 430 ops = memdup_user(compat_ptr(am_hreq.ops), size);
431 ops = kmalloc(size, GFP_KERNEL); 431 if (IS_ERR(ops)) {
432 if (!ops) 432 error = PTR_ERR(ops);
433 goto out_dput; 433 goto out_dput;
434 434 }
435 error = EFAULT;
436 if (copy_from_user(ops, compat_ptr(am_hreq.ops), size))
437 goto out_kfree_ops;
438 435
439 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); 436 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
440 if (!attr_name) 437 if (!attr_name)
441 goto out_kfree_ops; 438 goto out_kfree_ops;
442 439
443
444 error = 0; 440 error = 0;
445 for (i = 0; i < am_hreq.opcount; i++) { 441 for (i = 0; i < am_hreq.opcount; i++) {
446 ops[i].am_error = strncpy_from_user(attr_name, 442 ops[i].am_error = strncpy_from_user(attr_name,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 7e90daa0d1d1..9142192ccbe6 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -751,10 +751,26 @@ start:
751 goto relock; 751 goto relock;
752 } 752 }
753 } else { 753 } else {
754 int enospc = 0;
755 ssize_t ret2 = 0;
756
757write_retry:
754 xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, 758 xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs,
755 *offset, ioflags); 759 *offset, ioflags);
756 ret = generic_file_buffered_write(iocb, iovp, segs, 760 ret2 = generic_file_buffered_write(iocb, iovp, segs,
757 pos, offset, count, ret); 761 pos, offset, count, ret);
762 /*
763 * if we just got an ENOSPC, flush the inode now we
764 * aren't holding any page locks and retry *once*
765 */
766 if (ret2 == -ENOSPC && !enospc) {
767 error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE);
768 if (error)
769 goto out_unlock_internal;
770 enospc = 1;
771 goto write_retry;
772 }
773 ret = ret2;
758 } 774 }
759 775
760 current->backing_dev_info = NULL; 776 current->backing_dev_info = NULL;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index a608e72fa405..f7ba76633c29 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -62,12 +62,6 @@ xfs_sync_inodes_ag(
62 uint32_t first_index = 0; 62 uint32_t first_index = 0;
63 int error = 0; 63 int error = 0;
64 int last_error = 0; 64 int last_error = 0;
65 int fflag = XFS_B_ASYNC;
66
67 if (flags & SYNC_DELWRI)
68 fflag = XFS_B_DELWRI;
69 if (flags & SYNC_WAIT)
70 fflag = 0; /* synchronous overrides all */
71 65
72 do { 66 do {
73 struct inode *inode; 67 struct inode *inode;
@@ -128,11 +122,23 @@ xfs_sync_inodes_ag(
128 * If we have to flush data or wait for I/O completion 122 * If we have to flush data or wait for I/O completion
129 * we need to hold the iolock. 123 * we need to hold the iolock.
130 */ 124 */
131 if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) { 125 if (flags & SYNC_DELWRI) {
132 xfs_ilock(ip, XFS_IOLOCK_SHARED); 126 if (VN_DIRTY(inode)) {
133 lock_flags |= XFS_IOLOCK_SHARED; 127 if (flags & SYNC_TRYLOCK) {
134 error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); 128 if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
135 if (flags & SYNC_IOWAIT) 129 lock_flags |= XFS_IOLOCK_SHARED;
130 } else {
131 xfs_ilock(ip, XFS_IOLOCK_SHARED);
132 lock_flags |= XFS_IOLOCK_SHARED;
133 }
134 if (lock_flags & XFS_IOLOCK_SHARED) {
135 error = xfs_flush_pages(ip, 0, -1,
136 (flags & SYNC_WAIT) ? 0
137 : XFS_B_ASYNC,
138 FI_NONE);
139 }
140 }
141 if (VN_CACHED(inode) && (flags & SYNC_IOWAIT))
136 xfs_ioend_wait(ip); 142 xfs_ioend_wait(ip);
137 } 143 }
138 xfs_ilock(ip, XFS_ILOCK_SHARED); 144 xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -398,15 +404,17 @@ STATIC void
398xfs_syncd_queue_work( 404xfs_syncd_queue_work(
399 struct xfs_mount *mp, 405 struct xfs_mount *mp,
400 void *data, 406 void *data,
401 void (*syncer)(struct xfs_mount *, void *)) 407 void (*syncer)(struct xfs_mount *, void *),
408 struct completion *completion)
402{ 409{
403 struct bhv_vfs_sync_work *work; 410 struct xfs_sync_work *work;
404 411
405 work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP); 412 work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
406 INIT_LIST_HEAD(&work->w_list); 413 INIT_LIST_HEAD(&work->w_list);
407 work->w_syncer = syncer; 414 work->w_syncer = syncer;
408 work->w_data = data; 415 work->w_data = data;
409 work->w_mount = mp; 416 work->w_mount = mp;
417 work->w_completion = completion;
410 spin_lock(&mp->m_sync_lock); 418 spin_lock(&mp->m_sync_lock);
411 list_add_tail(&work->w_list, &mp->m_sync_list); 419 list_add_tail(&work->w_list, &mp->m_sync_list);
412 spin_unlock(&mp->m_sync_lock); 420 spin_unlock(&mp->m_sync_lock);
@@ -420,49 +428,26 @@ xfs_syncd_queue_work(
420 * heads, looking about for more room... 428 * heads, looking about for more room...
421 */ 429 */
422STATIC void 430STATIC void
423xfs_flush_inode_work( 431xfs_flush_inodes_work(
424 struct xfs_mount *mp,
425 void *arg)
426{
427 struct inode *inode = arg;
428 filemap_flush(inode->i_mapping);
429 iput(inode);
430}
431
432void
433xfs_flush_inode(
434 xfs_inode_t *ip)
435{
436 struct inode *inode = VFS_I(ip);
437
438 igrab(inode);
439 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
440 delay(msecs_to_jiffies(500));
441}
442
443/*
444 * This is the "bigger hammer" version of xfs_flush_inode_work...
445 * (IOW, "If at first you don't succeed, use a Bigger Hammer").
446 */
447STATIC void
448xfs_flush_device_work(
449 struct xfs_mount *mp, 432 struct xfs_mount *mp,
450 void *arg) 433 void *arg)
451{ 434{
452 struct inode *inode = arg; 435 struct inode *inode = arg;
453 sync_blockdev(mp->m_super->s_bdev); 436 xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK);
437 xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT);
454 iput(inode); 438 iput(inode);
455} 439}
456 440
457void 441void
458xfs_flush_device( 442xfs_flush_inodes(
459 xfs_inode_t *ip) 443 xfs_inode_t *ip)
460{ 444{
461 struct inode *inode = VFS_I(ip); 445 struct inode *inode = VFS_I(ip);
446 DECLARE_COMPLETION_ONSTACK(completion);
462 447
463 igrab(inode); 448 igrab(inode);
464 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); 449 xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
465 delay(msecs_to_jiffies(500)); 450 wait_for_completion(&completion);
466 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); 451 xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
467} 452}
468 453
@@ -497,7 +482,7 @@ xfssyncd(
497{ 482{
498 struct xfs_mount *mp = arg; 483 struct xfs_mount *mp = arg;
499 long timeleft; 484 long timeleft;
500 bhv_vfs_sync_work_t *work, *n; 485 xfs_sync_work_t *work, *n;
501 LIST_HEAD (tmp); 486 LIST_HEAD (tmp);
502 487
503 set_freezable(); 488 set_freezable();
@@ -532,6 +517,8 @@ xfssyncd(
532 list_del(&work->w_list); 517 list_del(&work->w_list);
533 if (work == &mp->m_sync_work) 518 if (work == &mp->m_sync_work)
534 continue; 519 continue;
520 if (work->w_completion)
521 complete(work->w_completion);
535 kmem_free(work); 522 kmem_free(work);
536 } 523 }
537 } 524 }
@@ -545,6 +532,7 @@ xfs_syncd_init(
545{ 532{
546 mp->m_sync_work.w_syncer = xfs_sync_worker; 533 mp->m_sync_work.w_syncer = xfs_sync_worker;
547 mp->m_sync_work.w_mount = mp; 534 mp->m_sync_work.w_mount = mp;
535 mp->m_sync_work.w_completion = NULL;
548 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); 536 mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
549 if (IS_ERR(mp->m_sync_task)) 537 if (IS_ERR(mp->m_sync_task))
550 return -PTR_ERR(mp->m_sync_task); 538 return -PTR_ERR(mp->m_sync_task);
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 04f058c848ae..308d5bf6dfbd 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -21,18 +21,20 @@
21struct xfs_mount; 21struct xfs_mount;
22struct xfs_perag; 22struct xfs_perag;
23 23
24typedef struct bhv_vfs_sync_work { 24typedef struct xfs_sync_work {
25 struct list_head w_list; 25 struct list_head w_list;
26 struct xfs_mount *w_mount; 26 struct xfs_mount *w_mount;
27 void *w_data; /* syncer routine argument */ 27 void *w_data; /* syncer routine argument */
28 void (*w_syncer)(struct xfs_mount *, void *); 28 void (*w_syncer)(struct xfs_mount *, void *);
29} bhv_vfs_sync_work_t; 29 struct completion *w_completion;
30} xfs_sync_work_t;
30 31
31#define SYNC_ATTR 0x0001 /* sync attributes */ 32#define SYNC_ATTR 0x0001 /* sync attributes */
32#define SYNC_DELWRI 0x0002 /* look at delayed writes */ 33#define SYNC_DELWRI 0x0002 /* look at delayed writes */
33#define SYNC_WAIT 0x0004 /* wait for i/o to complete */ 34#define SYNC_WAIT 0x0004 /* wait for i/o to complete */
34#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */ 35#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */
35#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */ 36#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */
37#define SYNC_TRYLOCK 0x0020 /* only try to lock inodes */
36 38
37int xfs_syncd_init(struct xfs_mount *mp); 39int xfs_syncd_init(struct xfs_mount *mp);
38void xfs_syncd_stop(struct xfs_mount *mp); 40void xfs_syncd_stop(struct xfs_mount *mp);
@@ -43,8 +45,7 @@ int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
43int xfs_quiesce_data(struct xfs_mount *mp); 45int xfs_quiesce_data(struct xfs_mount *mp);
44void xfs_quiesce_attr(struct xfs_mount *mp); 46void xfs_quiesce_attr(struct xfs_mount *mp);
45 47
46void xfs_flush_inode(struct xfs_inode *ip); 48void xfs_flush_inodes(struct xfs_inode *ip);
47void xfs_flush_device(struct xfs_inode *ip);
48 49
49int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); 50int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
50int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); 51int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 3a6ed426327a..ca7c6005a487 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5880,7 +5880,7 @@ xfs_getbmap(
5880 void *arg) /* formatter arg */ 5880 void *arg) /* formatter arg */
5881{ 5881{
5882 __int64_t bmvend; /* last block requested */ 5882 __int64_t bmvend; /* last block requested */
5883 int error; /* return value */ 5883 int error = 0; /* return value */
5884 __int64_t fixlen; /* length for -1 case */ 5884 __int64_t fixlen; /* length for -1 case */
5885 int i; /* extent number */ 5885 int i; /* extent number */
5886 int lock; /* lock state */ 5886 int lock; /* lock state */
@@ -5890,39 +5890,18 @@ xfs_getbmap(
5890 int nexleft; /* # of user extents left */ 5890 int nexleft; /* # of user extents left */
5891 int subnex; /* # of bmapi's can do */ 5891 int subnex; /* # of bmapi's can do */
5892 int nmap; /* number of map entries */ 5892 int nmap; /* number of map entries */
5893 struct getbmapx out; /* output structure */ 5893 struct getbmapx *out; /* output structure */
5894 int whichfork; /* data or attr fork */ 5894 int whichfork; /* data or attr fork */
5895 int prealloced; /* this is a file with 5895 int prealloced; /* this is a file with
5896 * preallocated data space */ 5896 * preallocated data space */
5897 int iflags; /* interface flags */ 5897 int iflags; /* interface flags */
5898 int bmapi_flags; /* flags for xfs_bmapi */ 5898 int bmapi_flags; /* flags for xfs_bmapi */
5899 int cur_ext = 0;
5899 5900
5900 mp = ip->i_mount; 5901 mp = ip->i_mount;
5901 iflags = bmv->bmv_iflags; 5902 iflags = bmv->bmv_iflags;
5902
5903 whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; 5903 whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
5904 5904
5905 /* If the BMV_IF_NO_DMAPI_READ interface bit specified, do not
5906 * generate a DMAPI read event. Otherwise, if the DM_EVENT_READ
5907 * bit is set for the file, generate a read event in order
5908 * that the DMAPI application may do its thing before we return
5909 * the extents. Usually this means restoring user file data to
5910 * regions of the file that look like holes.
5911 *
5912 * The "old behavior" (from XFS_IOC_GETBMAP) is to not specify
5913 * BMV_IF_NO_DMAPI_READ so that read events are generated.
5914 * If this were not true, callers of ioctl( XFS_IOC_GETBMAP )
5915 * could misinterpret holes in a DMAPI file as true holes,
5916 * when in fact they may represent offline user data.
5917 */
5918 if ((iflags & BMV_IF_NO_DMAPI_READ) == 0 &&
5919 DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
5920 whichfork == XFS_DATA_FORK) {
5921 error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
5922 if (error)
5923 return XFS_ERROR(error);
5924 }
5925
5926 if (whichfork == XFS_ATTR_FORK) { 5905 if (whichfork == XFS_ATTR_FORK) {
5927 if (XFS_IFORK_Q(ip)) { 5906 if (XFS_IFORK_Q(ip)) {
5928 if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && 5907 if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
@@ -5936,11 +5915,37 @@ xfs_getbmap(
5936 ip->i_mount); 5915 ip->i_mount);
5937 return XFS_ERROR(EFSCORRUPTED); 5916 return XFS_ERROR(EFSCORRUPTED);
5938 } 5917 }
5939 } else if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && 5918
5940 ip->i_d.di_format != XFS_DINODE_FMT_BTREE && 5919 prealloced = 0;
5941 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) 5920 fixlen = 1LL << 32;
5942 return XFS_ERROR(EINVAL); 5921 } else {
5943 if (whichfork == XFS_DATA_FORK) { 5922 /*
5923 * If the BMV_IF_NO_DMAPI_READ interface bit specified, do
5924 * not generate a DMAPI read event. Otherwise, if the
5925 * DM_EVENT_READ bit is set for the file, generate a read
5926 * event in order that the DMAPI application may do its thing
5927 * before we return the extents. Usually this means restoring
5928 * user file data to regions of the file that look like holes.
5929 *
5930 * The "old behavior" (from XFS_IOC_GETBMAP) is to not specify
5931 * BMV_IF_NO_DMAPI_READ so that read events are generated.
5932 * If this were not true, callers of ioctl(XFS_IOC_GETBMAP)
5933 * could misinterpret holes in a DMAPI file as true holes,
5934 * when in fact they may represent offline user data.
5935 */
5936 if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) &&
5937 !(iflags & BMV_IF_NO_DMAPI_READ)) {
5938 error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip,
5939 0, 0, 0, NULL);
5940 if (error)
5941 return XFS_ERROR(error);
5942 }
5943
5944 if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
5945 ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
5946 ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
5947 return XFS_ERROR(EINVAL);
5948
5944 if (xfs_get_extsz_hint(ip) || 5949 if (xfs_get_extsz_hint(ip) ||
5945 ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ 5950 ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
5946 prealloced = 1; 5951 prealloced = 1;
@@ -5949,42 +5954,41 @@ xfs_getbmap(
5949 prealloced = 0; 5954 prealloced = 0;
5950 fixlen = ip->i_size; 5955 fixlen = ip->i_size;
5951 } 5956 }
5952 } else {
5953 prealloced = 0;
5954 fixlen = 1LL << 32;
5955 } 5957 }
5956 5958
5957 if (bmv->bmv_length == -1) { 5959 if (bmv->bmv_length == -1) {
5958 fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); 5960 fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
5959 bmv->bmv_length = MAX( (__int64_t)(fixlen - bmv->bmv_offset), 5961 bmv->bmv_length =
5960 (__int64_t)0); 5962 max_t(__int64_t, fixlen - bmv->bmv_offset, 0);
5961 } else if (bmv->bmv_length < 0) 5963 } else if (bmv->bmv_length == 0) {
5962 return XFS_ERROR(EINVAL);
5963 if (bmv->bmv_length == 0) {
5964 bmv->bmv_entries = 0; 5964 bmv->bmv_entries = 0;
5965 return 0; 5965 return 0;
5966 } else if (bmv->bmv_length < 0) {
5967 return XFS_ERROR(EINVAL);
5966 } 5968 }
5969
5967 nex = bmv->bmv_count - 1; 5970 nex = bmv->bmv_count - 1;
5968 if (nex <= 0) 5971 if (nex <= 0)
5969 return XFS_ERROR(EINVAL); 5972 return XFS_ERROR(EINVAL);
5970 bmvend = bmv->bmv_offset + bmv->bmv_length; 5973 bmvend = bmv->bmv_offset + bmv->bmv_length;
5971 5974
5972 xfs_ilock(ip, XFS_IOLOCK_SHARED);
5973 5975
5974 if (((iflags & BMV_IF_DELALLOC) == 0) && 5976 if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
5975 (whichfork == XFS_DATA_FORK) && 5977 return XFS_ERROR(ENOMEM);
5976 (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) { 5978 out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
5977 /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ 5979 if (!out)
5978 error = xfs_flush_pages(ip, (xfs_off_t)0, 5980 return XFS_ERROR(ENOMEM);
5979 -1, 0, FI_REMAPF); 5981
5980 if (error) { 5982 xfs_ilock(ip, XFS_IOLOCK_SHARED);
5981 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 5983 if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
5982 return error; 5984 if (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size) {
5985 error = xfs_flush_pages(ip, 0, -1, 0, FI_REMAPF);
5986 if (error)
5987 goto out_unlock_iolock;
5983 } 5988 }
5984 }
5985 5989
5986 ASSERT(whichfork == XFS_ATTR_FORK || (iflags & BMV_IF_DELALLOC) || 5990 ASSERT(ip->i_delayed_blks == 0);
5987 ip->i_delayed_blks == 0); 5991 }
5988 5992
5989 lock = xfs_ilock_map_shared(ip); 5993 lock = xfs_ilock_map_shared(ip);
5990 5994
@@ -5995,23 +5999,25 @@ xfs_getbmap(
5995 if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) 5999 if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
5996 nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; 6000 nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
5997 6001
5998 bmapi_flags = xfs_bmapi_aflag(whichfork) | 6002 bmapi_flags = xfs_bmapi_aflag(whichfork);
5999 ((iflags & BMV_IF_PREALLOC) ? 0 : XFS_BMAPI_IGSTATE); 6003 if (!(iflags & BMV_IF_PREALLOC))
6004 bmapi_flags |= XFS_BMAPI_IGSTATE;
6000 6005
6001 /* 6006 /*
6002 * Allocate enough space to handle "subnex" maps at a time. 6007 * Allocate enough space to handle "subnex" maps at a time.
6003 */ 6008 */
6009 error = ENOMEM;
6004 subnex = 16; 6010 subnex = 16;
6005 map = kmem_alloc(subnex * sizeof(*map), KM_SLEEP); 6011 map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL);
6012 if (!map)
6013 goto out_unlock_ilock;
6006 6014
6007 bmv->bmv_entries = 0; 6015 bmv->bmv_entries = 0;
6008 6016
6009 if ((XFS_IFORK_NEXTENTS(ip, whichfork) == 0)) { 6017 if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
6010 if (((iflags & BMV_IF_DELALLOC) == 0) || 6018 (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
6011 whichfork == XFS_ATTR_FORK) { 6019 error = 0;
6012 error = 0; 6020 goto out_free_map;
6013 goto unlock_and_return;
6014 }
6015 } 6021 }
6016 6022
6017 nexleft = nex; 6023 nexleft = nex;
@@ -6023,53 +6029,61 @@ xfs_getbmap(
6023 bmapi_flags, NULL, 0, map, &nmap, 6029 bmapi_flags, NULL, 0, map, &nmap,
6024 NULL, NULL); 6030 NULL, NULL);
6025 if (error) 6031 if (error)
6026 goto unlock_and_return; 6032 goto out_free_map;
6027 ASSERT(nmap <= subnex); 6033 ASSERT(nmap <= subnex);
6028 6034
6029 for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { 6035 for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) {
6030 out.bmv_oflags = 0; 6036 out[cur_ext].bmv_oflags = 0;
6031 if (map[i].br_state == XFS_EXT_UNWRITTEN) 6037 if (map[i].br_state == XFS_EXT_UNWRITTEN)
6032 out.bmv_oflags |= BMV_OF_PREALLOC; 6038 out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
6033 else if (map[i].br_startblock == DELAYSTARTBLOCK) 6039 else if (map[i].br_startblock == DELAYSTARTBLOCK)
6034 out.bmv_oflags |= BMV_OF_DELALLOC; 6040 out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
6035 out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff); 6041 out[cur_ext].bmv_offset =
6036 out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 6042 XFS_FSB_TO_BB(mp, map[i].br_startoff);
6037 out.bmv_unused1 = out.bmv_unused2 = 0; 6043 out[cur_ext].bmv_length =
6044 XFS_FSB_TO_BB(mp, map[i].br_blockcount);
6045 out[cur_ext].bmv_unused1 = 0;
6046 out[cur_ext].bmv_unused2 = 0;
6038 ASSERT(((iflags & BMV_IF_DELALLOC) != 0) || 6047 ASSERT(((iflags & BMV_IF_DELALLOC) != 0) ||
6039 (map[i].br_startblock != DELAYSTARTBLOCK)); 6048 (map[i].br_startblock != DELAYSTARTBLOCK));
6040 if (map[i].br_startblock == HOLESTARTBLOCK && 6049 if (map[i].br_startblock == HOLESTARTBLOCK &&
6041 whichfork == XFS_ATTR_FORK) { 6050 whichfork == XFS_ATTR_FORK) {
6042 /* came to the end of attribute fork */ 6051 /* came to the end of attribute fork */
6043 out.bmv_oflags |= BMV_OF_LAST; 6052 out[cur_ext].bmv_oflags |= BMV_OF_LAST;
6044 goto unlock_and_return; 6053 goto out_free_map;
6045 } else {
6046 int full = 0; /* user array is full */
6047
6048 if (!xfs_getbmapx_fix_eof_hole(ip, &out,
6049 prealloced, bmvend,
6050 map[i].br_startblock)) {
6051 goto unlock_and_return;
6052 }
6053
6054 /* format results & advance arg */
6055 error = formatter(&arg, &out, &full);
6056 if (error || full)
6057 goto unlock_and_return;
6058 nexleft--;
6059 bmv->bmv_offset =
6060 out.bmv_offset + out.bmv_length;
6061 bmv->bmv_length = MAX((__int64_t)0,
6062 (__int64_t)(bmvend - bmv->bmv_offset));
6063 bmv->bmv_entries++;
6064 } 6054 }
6055
6056 if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext],
6057 prealloced, bmvend,
6058 map[i].br_startblock))
6059 goto out_free_map;
6060
6061 nexleft--;
6062 bmv->bmv_offset =
6063 out[cur_ext].bmv_offset +
6064 out[cur_ext].bmv_length;
6065 bmv->bmv_length =
6066 max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
6067 bmv->bmv_entries++;
6068 cur_ext++;
6065 } 6069 }
6066 } while (nmap && nexleft && bmv->bmv_length); 6070 } while (nmap && nexleft && bmv->bmv_length);
6067 6071
6068unlock_and_return: 6072 out_free_map:
6073 kmem_free(map);
6074 out_unlock_ilock:
6069 xfs_iunlock_map_shared(ip, lock); 6075 xfs_iunlock_map_shared(ip, lock);
6076 out_unlock_iolock:
6070 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 6077 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
6071 6078
6072 kmem_free(map); 6079 for (i = 0; i < cur_ext; i++) {
6080 int full = 0; /* user array is full */
6081
6082 /* format results & advance arg */
6083 error = formatter(&arg, &out[i], &full);
6084 if (error || full)
6085 break;
6086 }
6073 6087
6074 return error; 6088 return error;
6075} 6089}
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 478e587087fe..89b81eedce6a 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -69,15 +69,6 @@ xfs_inode_alloc(
69 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 69 ASSERT(!spin_is_locked(&ip->i_flags_lock));
70 ASSERT(completion_done(&ip->i_flush)); 70 ASSERT(completion_done(&ip->i_flush));
71 71
72 /*
73 * initialise the VFS inode here to get failures
74 * out of the way early.
75 */
76 if (!inode_init_always(mp->m_super, VFS_I(ip))) {
77 kmem_zone_free(xfs_inode_zone, ip);
78 return NULL;
79 }
80
81 /* initialise the xfs inode */ 72 /* initialise the xfs inode */
82 ip->i_ino = ino; 73 ip->i_ino = ino;
83 ip->i_mount = mp; 74 ip->i_mount = mp;
@@ -113,6 +104,20 @@ xfs_inode_alloc(
113#ifdef XFS_DIR2_TRACE 104#ifdef XFS_DIR2_TRACE
114 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); 105 ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
115#endif 106#endif
107 /*
108 * Now initialise the VFS inode. We do this after the xfs_inode
109 * initialisation as internal failures will result in ->destroy_inode
110 * being called and that will pass down through the reclaim path and
111 * free the XFS inode. This path requires the XFS inode to already be
112 * initialised. Hence if this call fails, the xfs_inode has already
113 * been freed and we should not reference it at all in the error
114 * handling.
115 */
116 if (!inode_init_always(mp->m_super, VFS_I(ip)))
117 return NULL;
118
119 /* prevent anyone from using this yet */
120 VFS_I(ip)->i_state = I_NEW|I_LOCK;
116 121
117 return ip; 122 return ip;
118} 123}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index e7ae08d1df48..123b20c8cbf2 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1258,8 +1258,10 @@ xfs_file_last_byte(
1258 * necessary. 1258 * necessary.
1259 */ 1259 */
1260 if (ip->i_df.if_flags & XFS_IFEXTENTS) { 1260 if (ip->i_df.if_flags & XFS_IFEXTENTS) {
1261 xfs_ilock(ip, XFS_ILOCK_SHARED);
1261 error = xfs_bmap_last_offset(NULL, ip, &last_block, 1262 error = xfs_bmap_last_offset(NULL, ip, &last_block,
1262 XFS_DATA_FORK); 1263 XFS_DATA_FORK);
1264 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1263 if (error) { 1265 if (error) {
1264 last_block = 0; 1266 last_block = 0;
1265 } 1267 }
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 08ce72316bfe..5aaa2d7ec155 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -338,38 +338,6 @@ xfs_iomap_eof_align_last_fsb(
338} 338}
339 339
340STATIC int 340STATIC int
341xfs_flush_space(
342 xfs_inode_t *ip,
343 int *fsynced,
344 int *ioflags)
345{
346 switch (*fsynced) {
347 case 0:
348 if (ip->i_delayed_blks) {
349 xfs_iunlock(ip, XFS_ILOCK_EXCL);
350 xfs_flush_inode(ip);
351 xfs_ilock(ip, XFS_ILOCK_EXCL);
352 *fsynced = 1;
353 } else {
354 *ioflags |= BMAPI_SYNC;
355 *fsynced = 2;
356 }
357 return 0;
358 case 1:
359 *fsynced = 2;
360 *ioflags |= BMAPI_SYNC;
361 return 0;
362 case 2:
363 xfs_iunlock(ip, XFS_ILOCK_EXCL);
364 xfs_flush_device(ip);
365 xfs_ilock(ip, XFS_ILOCK_EXCL);
366 *fsynced = 3;
367 return 0;
368 }
369 return 1;
370}
371
372STATIC int
373xfs_cmn_err_fsblock_zero( 341xfs_cmn_err_fsblock_zero(
374 xfs_inode_t *ip, 342 xfs_inode_t *ip,
375 xfs_bmbt_irec_t *imap) 343 xfs_bmbt_irec_t *imap)
@@ -538,15 +506,9 @@ error_out:
538} 506}
539 507
540/* 508/*
541 * If the caller is doing a write at the end of the file, 509 * If the caller is doing a write at the end of the file, then extend the
542 * then extend the allocation out to the file system's write 510 * allocation out to the file system's write iosize. We clean up any extra
543 * iosize. We clean up any extra space left over when the 511 * space left over when the file is closed in xfs_inactive().
544 * file is closed in xfs_inactive().
545 *
546 * For sync writes, we are flushing delayed allocate space to
547 * try to make additional space available for allocation near
548 * the filesystem full boundary - preallocation hurts in that
549 * situation, of course.
550 */ 512 */
551STATIC int 513STATIC int
552xfs_iomap_eof_want_preallocate( 514xfs_iomap_eof_want_preallocate(
@@ -565,7 +527,7 @@ xfs_iomap_eof_want_preallocate(
565 int n, error, imaps; 527 int n, error, imaps;
566 528
567 *prealloc = 0; 529 *prealloc = 0;
568 if ((ioflag & BMAPI_SYNC) || (offset + count) <= ip->i_size) 530 if ((offset + count) <= ip->i_size)
569 return 0; 531 return 0;
570 532
571 /* 533 /*
@@ -611,7 +573,7 @@ xfs_iomap_write_delay(
611 xfs_extlen_t extsz; 573 xfs_extlen_t extsz;
612 int nimaps; 574 int nimaps;
613 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; 575 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
614 int prealloc, fsynced = 0; 576 int prealloc, flushed = 0;
615 int error; 577 int error;
616 578
617 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 579 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
@@ -627,12 +589,12 @@ xfs_iomap_write_delay(
627 extsz = xfs_get_extsz_hint(ip); 589 extsz = xfs_get_extsz_hint(ip);
628 offset_fsb = XFS_B_TO_FSBT(mp, offset); 590 offset_fsb = XFS_B_TO_FSBT(mp, offset);
629 591
630retry:
631 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, 592 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
632 ioflag, imap, XFS_WRITE_IMAPS, &prealloc); 593 ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
633 if (error) 594 if (error)
634 return error; 595 return error;
635 596
597retry:
636 if (prealloc) { 598 if (prealloc) {
637 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); 599 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
638 ioalign = XFS_B_TO_FSBT(mp, aligned_offset); 600 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
@@ -659,15 +621,22 @@ retry:
659 621
660 /* 622 /*
661 * If bmapi returned us nothing, and if we didn't get back EDQUOT, 623 * If bmapi returned us nothing, and if we didn't get back EDQUOT,
662 * then we must have run out of space - flush delalloc, and retry.. 624 * then we must have run out of space - flush all other inodes with
625 * delalloc blocks and retry without EOF preallocation.
663 */ 626 */
664 if (nimaps == 0) { 627 if (nimaps == 0) {
665 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, 628 xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
666 ip, offset, count); 629 ip, offset, count);
667 if (xfs_flush_space(ip, &fsynced, &ioflag)) 630 if (flushed)
668 return XFS_ERROR(ENOSPC); 631 return XFS_ERROR(ENOSPC);
669 632
633 xfs_iunlock(ip, XFS_ILOCK_EXCL);
634 xfs_flush_inodes(ip);
635 xfs_ilock(ip, XFS_ILOCK_EXCL);
636
637 flushed = 1;
670 error = 0; 638 error = 0;
639 prealloc = 0;
671 goto retry; 640 goto retry;
672 } 641 }
673 642
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index a1cc1322fc0f..fdcf7b82747f 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -40,8 +40,7 @@ typedef enum {
40 BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */ 40 BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */
41 BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */ 41 BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */
42 BMAPI_MMAP = (1 << 6), /* allocate for mmap write */ 42 BMAPI_MMAP = (1 << 6), /* allocate for mmap write */
43 BMAPI_SYNC = (1 << 7), /* sync write to flush delalloc space */ 43 BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */
44 BMAPI_TRYLOCK = (1 << 8), /* non-blocking request */
45} bmapi_flags_t; 44} bmapi_flags_t;
46 45
47 46
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f76c6d7cea21..3750f04ede0b 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -562,9 +562,8 @@ xfs_log_mount(
562 } 562 }
563 563
564 mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); 564 mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
565 if (!mp->m_log) { 565 if (IS_ERR(mp->m_log)) {
566 cmn_err(CE_WARN, "XFS: Log allocation failed: No memory!"); 566 error = -PTR_ERR(mp->m_log);
567 error = ENOMEM;
568 goto out; 567 goto out;
569 } 568 }
570 569
@@ -1180,10 +1179,13 @@ xlog_alloc_log(xfs_mount_t *mp,
1180 xfs_buf_t *bp; 1179 xfs_buf_t *bp;
1181 int i; 1180 int i;
1182 int iclogsize; 1181 int iclogsize;
1182 int error = ENOMEM;
1183 1183
1184 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); 1184 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
1185 if (!log) 1185 if (!log) {
1186 return NULL; 1186 xlog_warn("XFS: Log allocation failed: No memory!");
1187 goto out;
1188 }
1187 1189
1188 log->l_mp = mp; 1190 log->l_mp = mp;
1189 log->l_targ = log_target; 1191 log->l_targ = log_target;
@@ -1201,19 +1203,35 @@ xlog_alloc_log(xfs_mount_t *mp,
1201 log->l_grant_reserve_cycle = 1; 1203 log->l_grant_reserve_cycle = 1;
1202 log->l_grant_write_cycle = 1; 1204 log->l_grant_write_cycle = 1;
1203 1205
1206 error = EFSCORRUPTED;
1204 if (xfs_sb_version_hassector(&mp->m_sb)) { 1207 if (xfs_sb_version_hassector(&mp->m_sb)) {
1205 log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT; 1208 log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT;
1206 ASSERT(log->l_sectbb_log <= mp->m_sectbb_log); 1209 if (log->l_sectbb_log < 0 ||
1210 log->l_sectbb_log > mp->m_sectbb_log) {
1211 xlog_warn("XFS: Log sector size (0x%x) out of range.",
1212 log->l_sectbb_log);
1213 goto out_free_log;
1214 }
1215
1207 /* for larger sector sizes, must have v2 or external log */ 1216 /* for larger sector sizes, must have v2 or external log */
1208 ASSERT(log->l_sectbb_log == 0 || 1217 if (log->l_sectbb_log != 0 &&
1209 log->l_logBBstart == 0 || 1218 (log->l_logBBstart != 0 &&
1210 xfs_sb_version_haslogv2(&mp->m_sb)); 1219 !xfs_sb_version_haslogv2(&mp->m_sb))) {
1211 ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT); 1220 xlog_warn("XFS: log sector size (0x%x) invalid "
1221 "for configuration.", log->l_sectbb_log);
1222 goto out_free_log;
1223 }
1224 if (mp->m_sb.sb_logsectlog < BBSHIFT) {
1225 xlog_warn("XFS: Log sector log (0x%x) too small.",
1226 mp->m_sb.sb_logsectlog);
1227 goto out_free_log;
1228 }
1212 } 1229 }
1213 log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1; 1230 log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1;
1214 1231
1215 xlog_get_iclog_buffer_size(mp, log); 1232 xlog_get_iclog_buffer_size(mp, log);
1216 1233
1234 error = ENOMEM;
1217 bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); 1235 bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
1218 if (!bp) 1236 if (!bp)
1219 goto out_free_log; 1237 goto out_free_log;
@@ -1313,7 +1331,8 @@ out_free_iclog:
1313 xfs_buf_free(log->l_xbuf); 1331 xfs_buf_free(log->l_xbuf);
1314out_free_log: 1332out_free_log:
1315 kmem_free(log); 1333 kmem_free(log);
1316 return NULL; 1334out:
1335 return ERR_PTR(-error);
1317} /* xlog_alloc_log */ 1336} /* xlog_alloc_log */
1318 1337
1319 1338
@@ -2541,18 +2560,19 @@ redo:
2541 xlog_ins_ticketq(&log->l_reserve_headq, tic); 2560 xlog_ins_ticketq(&log->l_reserve_headq, tic);
2542 xlog_trace_loggrant(log, tic, 2561 xlog_trace_loggrant(log, tic,
2543 "xlog_grant_log_space: sleep 2"); 2562 "xlog_grant_log_space: sleep 2");
2563 spin_unlock(&log->l_grant_lock);
2564 xlog_grant_push_ail(log->l_mp, need_bytes);
2565 spin_lock(&log->l_grant_lock);
2566
2544 XFS_STATS_INC(xs_sleep_logspace); 2567 XFS_STATS_INC(xs_sleep_logspace);
2545 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); 2568 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2546 2569
2547 if (XLOG_FORCED_SHUTDOWN(log)) { 2570 spin_lock(&log->l_grant_lock);
2548 spin_lock(&log->l_grant_lock); 2571 if (XLOG_FORCED_SHUTDOWN(log))
2549 goto error_return; 2572 goto error_return;
2550 }
2551 2573
2552 xlog_trace_loggrant(log, tic, 2574 xlog_trace_loggrant(log, tic,
2553 "xlog_grant_log_space: wake 2"); 2575 "xlog_grant_log_space: wake 2");
2554 xlog_grant_push_ail(log->l_mp, need_bytes);
2555 spin_lock(&log->l_grant_lock);
2556 goto redo; 2576 goto redo;
2557 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2577 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2558 xlog_del_ticketq(&log->l_reserve_headq, tic); 2578 xlog_del_ticketq(&log->l_reserve_headq, tic);
@@ -2631,7 +2651,7 @@ xlog_regrant_write_log_space(xlog_t *log,
2631 * for more free space, otherwise try to get some space for 2651 * for more free space, otherwise try to get some space for
2632 * this transaction. 2652 * this transaction.
2633 */ 2653 */
2634 2654 need_bytes = tic->t_unit_res;
2635 if ((ntic = log->l_write_headq)) { 2655 if ((ntic = log->l_write_headq)) {
2636 free_bytes = xlog_space_left(log, log->l_grant_write_cycle, 2656 free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
2637 log->l_grant_write_bytes); 2657 log->l_grant_write_bytes);
@@ -2651,26 +2671,25 @@ xlog_regrant_write_log_space(xlog_t *log,
2651 2671
2652 xlog_trace_loggrant(log, tic, 2672 xlog_trace_loggrant(log, tic,
2653 "xlog_regrant_write_log_space: sleep 1"); 2673 "xlog_regrant_write_log_space: sleep 1");
2674 spin_unlock(&log->l_grant_lock);
2675 xlog_grant_push_ail(log->l_mp, need_bytes);
2676 spin_lock(&log->l_grant_lock);
2677
2654 XFS_STATS_INC(xs_sleep_logspace); 2678 XFS_STATS_INC(xs_sleep_logspace);
2655 sv_wait(&tic->t_wait, PINOD|PLTWAIT, 2679 sv_wait(&tic->t_wait, PINOD|PLTWAIT,
2656 &log->l_grant_lock, s); 2680 &log->l_grant_lock, s);
2657 2681
2658 /* If we're shutting down, this tic is already 2682 /* If we're shutting down, this tic is already
2659 * off the queue */ 2683 * off the queue */
2660 if (XLOG_FORCED_SHUTDOWN(log)) { 2684 spin_lock(&log->l_grant_lock);
2661 spin_lock(&log->l_grant_lock); 2685 if (XLOG_FORCED_SHUTDOWN(log))
2662 goto error_return; 2686 goto error_return;
2663 }
2664 2687
2665 xlog_trace_loggrant(log, tic, 2688 xlog_trace_loggrant(log, tic,
2666 "xlog_regrant_write_log_space: wake 1"); 2689 "xlog_regrant_write_log_space: wake 1");
2667 xlog_grant_push_ail(log->l_mp, tic->t_unit_res);
2668 spin_lock(&log->l_grant_lock);
2669 } 2690 }
2670 } 2691 }
2671 2692
2672 need_bytes = tic->t_unit_res;
2673
2674redo: 2693redo:
2675 if (XLOG_FORCED_SHUTDOWN(log)) 2694 if (XLOG_FORCED_SHUTDOWN(log))
2676 goto error_return; 2695 goto error_return;
@@ -2680,19 +2699,20 @@ redo:
2680 if (free_bytes < need_bytes) { 2699 if (free_bytes < need_bytes) {
2681 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) 2700 if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
2682 xlog_ins_ticketq(&log->l_write_headq, tic); 2701 xlog_ins_ticketq(&log->l_write_headq, tic);
2702 spin_unlock(&log->l_grant_lock);
2703 xlog_grant_push_ail(log->l_mp, need_bytes);
2704 spin_lock(&log->l_grant_lock);
2705
2683 XFS_STATS_INC(xs_sleep_logspace); 2706 XFS_STATS_INC(xs_sleep_logspace);
2684 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); 2707 sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
2685 2708
2686 /* If we're shutting down, this tic is already off the queue */ 2709 /* If we're shutting down, this tic is already off the queue */
2687 if (XLOG_FORCED_SHUTDOWN(log)) { 2710 spin_lock(&log->l_grant_lock);
2688 spin_lock(&log->l_grant_lock); 2711 if (XLOG_FORCED_SHUTDOWN(log))
2689 goto error_return; 2712 goto error_return;
2690 }
2691 2713
2692 xlog_trace_loggrant(log, tic, 2714 xlog_trace_loggrant(log, tic,
2693 "xlog_regrant_write_log_space: wake 2"); 2715 "xlog_regrant_write_log_space: wake 2");
2694 xlog_grant_push_ail(log->l_mp, need_bytes);
2695 spin_lock(&log->l_grant_lock);
2696 goto redo; 2716 goto redo;
2697 } else if (tic->t_flags & XLOG_TIC_IN_Q) 2717 } else if (tic->t_flags & XLOG_TIC_IN_Q)
2698 xlog_del_ticketq(&log->l_write_headq, tic); 2718 xlog_del_ticketq(&log->l_write_headq, tic);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b101990df027..65a99725d0cc 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -291,14 +291,17 @@ xfs_mount_validate_sb(
291 sbp->sb_sectsize > XFS_MAX_SECTORSIZE || 291 sbp->sb_sectsize > XFS_MAX_SECTORSIZE ||
292 sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG || 292 sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG ||
293 sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG || 293 sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG ||
294 sbp->sb_sectsize != (1 << sbp->sb_sectlog) ||
294 sbp->sb_blocksize < XFS_MIN_BLOCKSIZE || 295 sbp->sb_blocksize < XFS_MIN_BLOCKSIZE ||
295 sbp->sb_blocksize > XFS_MAX_BLOCKSIZE || 296 sbp->sb_blocksize > XFS_MAX_BLOCKSIZE ||
296 sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || 297 sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG ||
297 sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || 298 sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG ||
299 sbp->sb_blocksize != (1 << sbp->sb_blocklog) ||
298 sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || 300 sbp->sb_inodesize < XFS_DINODE_MIN_SIZE ||
299 sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || 301 sbp->sb_inodesize > XFS_DINODE_MAX_SIZE ||
300 sbp->sb_inodelog < XFS_DINODE_MIN_LOG || 302 sbp->sb_inodelog < XFS_DINODE_MIN_LOG ||
301 sbp->sb_inodelog > XFS_DINODE_MAX_LOG || 303 sbp->sb_inodelog > XFS_DINODE_MAX_LOG ||
304 sbp->sb_inodesize != (1 << sbp->sb_inodelog) ||
302 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 305 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) ||
303 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 306 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
304 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 307 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 7af44adffc8f..d6a64392f983 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -313,7 +313,7 @@ typedef struct xfs_mount {
313#endif 313#endif
314 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ 314 struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
315 struct task_struct *m_sync_task; /* generalised sync thread */ 315 struct task_struct *m_sync_task; /* generalised sync thread */
316 bhv_vfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ 316 xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */
317 struct list_head m_sync_list; /* sync thread work item list */ 317 struct list_head m_sync_list; /* sync thread work item list */
318 spinlock_t m_sync_lock; /* work item list lock */ 318 spinlock_t m_sync_lock; /* work item list lock */
319 int m_sync_seq; /* sync thread generation no. */ 319 int m_sync_seq; /* sync thread generation no. */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 7394c7af5de5..19cf90a9c762 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1457,6 +1457,13 @@ xfs_create(
1457 error = xfs_trans_reserve(tp, resblks, log_res, 0, 1457 error = xfs_trans_reserve(tp, resblks, log_res, 0,
1458 XFS_TRANS_PERM_LOG_RES, log_count); 1458 XFS_TRANS_PERM_LOG_RES, log_count);
1459 if (error == ENOSPC) { 1459 if (error == ENOSPC) {
1460 /* flush outstanding delalloc blocks and retry */
1461 xfs_flush_inodes(dp);
1462 error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0,
1463 XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
1464 }
1465 if (error == ENOSPC) {
1466 /* No space at all so try a "no-allocation" reservation */
1460 resblks = 0; 1467 resblks = 0;
1461 error = xfs_trans_reserve(tp, 0, log_res, 0, 1468 error = xfs_trans_reserve(tp, 0, log_res, 0,
1462 XFS_TRANS_PERM_LOG_RES, log_count); 1469 XFS_TRANS_PERM_LOG_RES, log_count);