aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorSteve French <sfrench@us.ibm.com>2009-04-30 11:36:52 -0400
committerSteve French <sfrench@us.ibm.com>2009-04-30 11:36:52 -0400
commit912bc6ae3de99c7bada4577d4341ace4ee59b5be (patch)
tree28fd1a4bb9e4b05aa833285b46df169f12c0e24d /fs
parentd37dc42ab6f040b8f0f2962ab219c5b2accf748d (diff)
parent091438dd5668396328a3419abcbc6591159eb8d1 (diff)
Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs')
-rw-r--r--fs/autofs/dirhash.c34
-rw-r--r--fs/autofs4/dev-ioctl.c12
-rw-r--r--fs/bio.c127
-rw-r--r--fs/btrfs/Makefile19
-rw-r--r--fs/btrfs/acl.c18
-rw-r--r--fs/btrfs/async-thread.c60
-rw-r--r--fs/btrfs/async-thread.h2
-rw-r--r--fs/btrfs/ctree.c17
-rw-r--r--fs/btrfs/ctree.h6
-rw-r--r--fs/btrfs/disk-io.c102
-rw-r--r--fs/btrfs/extent-tree.c49
-rw-r--r--fs/btrfs/extent_io.c167
-rw-r--r--fs/btrfs/extent_map.c17
-rw-r--r--fs/btrfs/file.c95
-rw-r--r--fs/btrfs/free-space-cache.c15
-rw-r--r--fs/btrfs/inode-map.c2
-rw-r--r--fs/btrfs/inode.c183
-rw-r--r--fs/btrfs/ioctl.c58
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/super.c40
-rw-r--r--fs/btrfs/transaction.c6
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c159
-rw-r--r--fs/btrfs/volumes.h16
-rw-r--r--fs/compat.c48
-rw-r--r--fs/compat_ioctl.c7
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/ecryptfs/crypto.c21
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h1
-rw-r--r--fs/ecryptfs/inode.c37
-rw-r--r--fs/ecryptfs/main.c14
-rw-r--r--fs/ecryptfs/messaging.c82
-rw-r--r--fs/ecryptfs/miscdev.c43
-rw-r--r--fs/ecryptfs/mmap.c11
-rw-r--r--fs/ecryptfs/read_write.c32
-rw-r--r--fs/ecryptfs/super.c7
-rw-r--r--fs/exec.c25
-rw-r--r--fs/ext2/super.c4
-rw-r--r--fs/ext4/extents.c18
-rw-r--r--fs/ext4/ialloc.c6
-rw-r--r--fs/ext4/inode.c24
-rw-r--r--fs/filesystems.c2
-rw-r--r--fs/gfs2/glops.c6
-rw-r--r--fs/gfs2/ops_file.c4
-rw-r--r--fs/gfs2/rgrp.c13
-rw-r--r--fs/hugetlbfs/inode.c3
-rw-r--r--fs/jbd/commit.c2
-rw-r--r--fs/jbd/revoke.c20
-rw-r--r--fs/jbd2/commit.c3
-rw-r--r--fs/jbd2/revoke.c21
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/ncpfs/ioctl.c21
-rw-r--r--fs/nfs/nfs3xdr.c3
-rw-r--r--fs/nfsd/nfs4recover.c46
-rw-r--r--fs/nfsd/vfs.c34
-rw-r--r--fs/proc/stat.c5
-rw-r--r--fs/quota/Makefile9
-rw-r--r--fs/romfs/internal.h4
-rw-r--r--fs/romfs/storage.c68
-rw-r--r--fs/romfs/super.c4
-rw-r--r--fs/stat.c137
-rw-r--r--fs/sysfs/bin.c13
-rw-r--r--fs/xattr.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c23
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c12
66 files changed, 1047 insertions, 1009 deletions
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index bf8c8af98004..4eb4d8dfb2f1 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -39,10 +39,12 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
39{ 39{
40 struct autofs_dirhash *dh = &sbi->dirhash; 40 struct autofs_dirhash *dh = &sbi->dirhash;
41 struct autofs_dir_ent *ent; 41 struct autofs_dir_ent *ent;
42 struct dentry *dentry;
43 unsigned long timeout = sbi->exp_timeout; 42 unsigned long timeout = sbi->exp_timeout;
44 43
45 while (1) { 44 while (1) {
45 struct path path;
46 int umount_ok;
47
46 if ( list_empty(&dh->expiry_head) || sbi->catatonic ) 48 if ( list_empty(&dh->expiry_head) || sbi->catatonic )
47 return NULL; /* No entries */ 49 return NULL; /* No entries */
48 /* We keep the list sorted by last_usage and want old stuff */ 50 /* We keep the list sorted by last_usage and want old stuff */
@@ -57,17 +59,17 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
57 return ent; /* Symlinks are always expirable */ 59 return ent; /* Symlinks are always expirable */
58 60
59 /* Get the dentry for the autofs subdirectory */ 61 /* Get the dentry for the autofs subdirectory */
60 dentry = ent->dentry; 62 path.dentry = ent->dentry;
61 63
62 if ( !dentry ) { 64 if (!path.dentry) {
63 /* Should only happen in catatonic mode */ 65 /* Should only happen in catatonic mode */
64 printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name); 66 printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name);
65 autofs_delete_usage(ent); 67 autofs_delete_usage(ent);
66 continue; 68 continue;
67 } 69 }
68 70
69 if ( !dentry->d_inode ) { 71 if (!path.dentry->d_inode) {
70 dput(dentry); 72 dput(path.dentry);
71 printk("autofs: negative dentry on expiry queue: %s\n", 73 printk("autofs: negative dentry on expiry queue: %s\n",
72 ent->name); 74 ent->name);
73 autofs_delete_usage(ent); 75 autofs_delete_usage(ent);
@@ -76,29 +78,29 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
76 78
77 /* Make sure entry is mounted and unused; note that dentry will 79 /* Make sure entry is mounted and unused; note that dentry will
78 point to the mounted-on-top root. */ 80 point to the mounted-on-top root. */
79 if (!S_ISDIR(dentry->d_inode->i_mode)||!d_mountpoint(dentry)) { 81 if (!S_ISDIR(path.dentry->d_inode->i_mode) ||
82 !d_mountpoint(path.dentry)) {
80 DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); 83 DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
81 continue; 84 continue;
82 } 85 }
83 mntget(mnt); 86 path.mnt = mnt;
84 dget(dentry); 87 path_get(&path);
85 if (!follow_down(&mnt, &dentry)) { 88 if (!follow_down(&path.mnt, &path.dentry)) {
86 dput(dentry); 89 path_put(&path);
87 mntput(mnt);
88 DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); 90 DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
89 continue; 91 continue;
90 } 92 }
91 while (d_mountpoint(dentry) && follow_down(&mnt, &dentry)) 93 while (d_mountpoint(path.dentry) &&
94 follow_down(&path.mnt, &path.dentry))
92 ; 95 ;
93 dput(dentry); 96 umount_ok = may_umount(path.mnt);
97 path_put(&path);
94 98
95 if ( may_umount(mnt) ) { 99 if (umount_ok) {
96 mntput(mnt);
97 DPRINTK(("autofs: signaling expire on %s\n", ent->name)); 100 DPRINTK(("autofs: signaling expire on %s\n", ent->name));
98 return ent; /* Expirable! */ 101 return ent; /* Expirable! */
99 } 102 }
100 DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name)); 103 DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name));
101 mntput(mnt);
102 } 104 }
103 return NULL; /* No expirable entries */ 105 return NULL; /* No expirable entries */
104} 106}
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 9e5ae8a4f5c8..84168c0dcc2d 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -54,11 +54,10 @@ static int check_name(const char *name)
54 * Check a string doesn't overrun the chunk of 54 * Check a string doesn't overrun the chunk of
55 * memory we copied from user land. 55 * memory we copied from user land.
56 */ 56 */
57static int invalid_str(char *str, void *end) 57static int invalid_str(char *str, size_t size)
58{ 58{
59 while ((void *) str <= end) 59 if (memchr(str, 0, size))
60 if (!*str++) 60 return 0;
61 return 0;
62 return -EINVAL; 61 return -EINVAL;
63} 62}
64 63
@@ -138,8 +137,7 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
138 } 137 }
139 138
140 if (param->size > sizeof(*param)) { 139 if (param->size > sizeof(*param)) {
141 err = invalid_str(param->path, 140 err = invalid_str(param->path, param->size - sizeof(*param));
142 (void *) ((size_t) param + param->size));
143 if (err) { 141 if (err) {
144 AUTOFS_WARN( 142 AUTOFS_WARN(
145 "path string terminator missing for cmd(0x%08x)", 143 "path string terminator missing for cmd(0x%08x)",
@@ -488,7 +486,7 @@ static int autofs_dev_ioctl_requester(struct file *fp,
488 } 486 }
489 487
490 path = param->path; 488 path = param->path;
491 devid = sbi->sb->s_dev; 489 devid = new_encode_dev(sbi->sb->s_dev);
492 490
493 param->requester.uid = param->requester.gid = -1; 491 param->requester.uid = param->requester.gid = -1;
494 492
diff --git a/fs/bio.c b/fs/bio.c
index cd42bb882f30..98711647ece4 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -175,14 +175,6 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
175 struct bio_vec *bvl; 175 struct bio_vec *bvl;
176 176
177 /* 177 /*
178 * If 'bs' is given, lookup the pool and do the mempool alloc.
179 * If not, this is a bio_kmalloc() allocation and just do a
180 * kzalloc() for the exact number of vecs right away.
181 */
182 if (!bs)
183 bvl = kmalloc(nr * sizeof(struct bio_vec), gfp_mask);
184
185 /*
186 * see comment near bvec_array define! 178 * see comment near bvec_array define!
187 */ 179 */
188 switch (nr) { 180 switch (nr) {
@@ -260,21 +252,6 @@ void bio_free(struct bio *bio, struct bio_set *bs)
260 mempool_free(p, bs->bio_pool); 252 mempool_free(p, bs->bio_pool);
261} 253}
262 254
263/*
264 * default destructor for a bio allocated with bio_alloc_bioset()
265 */
266static void bio_fs_destructor(struct bio *bio)
267{
268 bio_free(bio, fs_bio_set);
269}
270
271static void bio_kmalloc_destructor(struct bio *bio)
272{
273 if (bio_has_allocated_vec(bio))
274 kfree(bio->bi_io_vec);
275 kfree(bio);
276}
277
278void bio_init(struct bio *bio) 255void bio_init(struct bio *bio)
279{ 256{
280 memset(bio, 0, sizeof(*bio)); 257 memset(bio, 0, sizeof(*bio));
@@ -301,21 +278,15 @@ void bio_init(struct bio *bio)
301 **/ 278 **/
302struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 279struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
303{ 280{
281 unsigned long idx = BIO_POOL_NONE;
304 struct bio_vec *bvl = NULL; 282 struct bio_vec *bvl = NULL;
305 struct bio *bio = NULL; 283 struct bio *bio;
306 unsigned long idx = 0; 284 void *p;
307 void *p = NULL; 285
308 286 p = mempool_alloc(bs->bio_pool, gfp_mask);
309 if (bs) { 287 if (unlikely(!p))
310 p = mempool_alloc(bs->bio_pool, gfp_mask); 288 return NULL;
311 if (!p) 289 bio = p + bs->front_pad;
312 goto err;
313 bio = p + bs->front_pad;
314 } else {
315 bio = kmalloc(sizeof(*bio), gfp_mask);
316 if (!bio)
317 goto err;
318 }
319 290
320 bio_init(bio); 291 bio_init(bio);
321 292
@@ -332,22 +303,50 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
332 303
333 nr_iovecs = bvec_nr_vecs(idx); 304 nr_iovecs = bvec_nr_vecs(idx);
334 } 305 }
306out_set:
335 bio->bi_flags |= idx << BIO_POOL_OFFSET; 307 bio->bi_flags |= idx << BIO_POOL_OFFSET;
336 bio->bi_max_vecs = nr_iovecs; 308 bio->bi_max_vecs = nr_iovecs;
337out_set:
338 bio->bi_io_vec = bvl; 309 bio->bi_io_vec = bvl;
339
340 return bio; 310 return bio;
341 311
342err_free: 312err_free:
343 if (bs) 313 mempool_free(p, bs->bio_pool);
344 mempool_free(p, bs->bio_pool);
345 else
346 kfree(bio);
347err:
348 return NULL; 314 return NULL;
349} 315}
350 316
317static void bio_fs_destructor(struct bio *bio)
318{
319 bio_free(bio, fs_bio_set);
320}
321
322/**
323 * bio_alloc - allocate a new bio, memory pool backed
324 * @gfp_mask: allocation mask to use
325 * @nr_iovecs: number of iovecs
326 *
327 * Allocate a new bio with @nr_iovecs bvecs. If @gfp_mask
328 * contains __GFP_WAIT, the allocation is guaranteed to succeed.
329 *
330 * RETURNS:
331 * Pointer to new bio on success, NULL on failure.
332 */
333struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
334{
335 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
336
337 if (bio)
338 bio->bi_destructor = bio_fs_destructor;
339
340 return bio;
341}
342
343static void bio_kmalloc_destructor(struct bio *bio)
344{
345 if (bio_integrity(bio))
346 bio_integrity_free(bio);
347 kfree(bio);
348}
349
351/** 350/**
352 * bio_alloc - allocate a bio for I/O 351 * bio_alloc - allocate a bio for I/O
353 * @gfp_mask: the GFP_ mask given to the slab allocator 352 * @gfp_mask: the GFP_ mask given to the slab allocator
@@ -366,29 +365,20 @@ err:
366 * do so can cause livelocks under memory pressure. 365 * do so can cause livelocks under memory pressure.
367 * 366 *
368 **/ 367 **/
369struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
370{
371 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
372
373 if (bio)
374 bio->bi_destructor = bio_fs_destructor;
375
376 return bio;
377}
378
379/*
380 * Like bio_alloc(), but doesn't use a mempool backing. This means that
381 * it CAN fail, but while bio_alloc() can only be used for allocations
382 * that have a short (finite) life span, bio_kmalloc() should be used
383 * for more permanent bio allocations (like allocating some bio's for
384 * initalization or setup purposes).
385 */
386struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) 368struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
387{ 369{
388 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); 370 struct bio *bio;
389 371
390 if (bio) 372 bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
391 bio->bi_destructor = bio_kmalloc_destructor; 373 gfp_mask);
374 if (unlikely(!bio))
375 return NULL;
376
377 bio_init(bio);
378 bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET;
379 bio->bi_max_vecs = nr_iovecs;
380 bio->bi_io_vec = bio->bi_inline_vecs;
381 bio->bi_destructor = bio_kmalloc_destructor;
392 382
393 return bio; 383 return bio;
394} 384}
@@ -827,12 +817,15 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
827 len += iov[i].iov_len; 817 len += iov[i].iov_len;
828 } 818 }
829 819
820 if (offset)
821 nr_pages++;
822
830 bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); 823 bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
831 if (!bmd) 824 if (!bmd)
832 return ERR_PTR(-ENOMEM); 825 return ERR_PTR(-ENOMEM);
833 826
834 ret = -ENOMEM; 827 ret = -ENOMEM;
835 bio = bio_alloc(gfp_mask, nr_pages); 828 bio = bio_kmalloc(gfp_mask, nr_pages);
836 if (!bio) 829 if (!bio)
837 goto out_bmd; 830 goto out_bmd;
838 831
@@ -956,7 +949,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
956 if (!nr_pages) 949 if (!nr_pages)
957 return ERR_PTR(-EINVAL); 950 return ERR_PTR(-EINVAL);
958 951
959 bio = bio_alloc(gfp_mask, nr_pages); 952 bio = bio_kmalloc(gfp_mask, nr_pages);
960 if (!bio) 953 if (!bio)
961 return ERR_PTR(-ENOMEM); 954 return ERR_PTR(-ENOMEM);
962 955
@@ -1140,7 +1133,7 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data,
1140 int offset, i; 1133 int offset, i;
1141 struct bio *bio; 1134 struct bio *bio;
1142 1135
1143 bio = bio_alloc(gfp_mask, nr_pages); 1136 bio = bio_kmalloc(gfp_mask, nr_pages);
1144 if (!bio) 1137 if (!bio)
1145 return ERR_PTR(-ENOMEM); 1138 return ERR_PTR(-ENOMEM);
1146 1139
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9adf5e4f7e96..94212844a9bc 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -1,25 +1,10 @@
1ifneq ($(KERNELRELEASE),)
2# kbuild part of makefile
3 1
4obj-$(CONFIG_BTRFS_FS) := btrfs.o 2obj-$(CONFIG_BTRFS_FS) := btrfs.o
5btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ 3
4btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
6 file-item.o inode-item.o inode-map.o disk-io.o \ 5 file-item.o inode-item.o inode-map.o disk-io.o \
7 transaction.o inode.o file.o tree-defrag.o \ 6 transaction.o inode.o file.o tree-defrag.o \
8 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ 7 extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
9 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ 8 extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
10 ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ 9 ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \
11 compression.o delayed-ref.o 10 compression.o delayed-ref.o
12else
13
14# Normal Makefile
15
16KERNELDIR := /lib/modules/`uname -r`/build
17all:
18 $(MAKE) -C $(KERNELDIR) M=`pwd` CONFIG_BTRFS_FS=m modules
19
20modules_install:
21 $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install
22clean:
23 $(MAKE) -C $(KERNELDIR) M=`pwd` clean
24
25endif
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 7fdd184a528d..cbba000dccbe 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -60,15 +60,20 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
60 return ERR_PTR(-EINVAL); 60 return ERR_PTR(-EINVAL);
61 } 61 }
62 62
63 /* Handle the cached NULL acl case without locking */
64 acl = ACCESS_ONCE(*p_acl);
65 if (!acl)
66 return acl;
67
63 spin_lock(&inode->i_lock); 68 spin_lock(&inode->i_lock);
64 if (*p_acl != BTRFS_ACL_NOT_CACHED) 69 acl = *p_acl;
65 acl = posix_acl_dup(*p_acl); 70 if (acl != BTRFS_ACL_NOT_CACHED)
71 acl = posix_acl_dup(acl);
66 spin_unlock(&inode->i_lock); 72 spin_unlock(&inode->i_lock);
67 73
68 if (acl) 74 if (acl != BTRFS_ACL_NOT_CACHED)
69 return acl; 75 return acl;
70 76
71
72 size = __btrfs_getxattr(inode, name, "", 0); 77 size = __btrfs_getxattr(inode, name, "", 0);
73 if (size > 0) { 78 if (size > 0) {
74 value = kzalloc(size, GFP_NOFS); 79 value = kzalloc(size, GFP_NOFS);
@@ -80,9 +85,12 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
80 btrfs_update_cached_acl(inode, p_acl, acl); 85 btrfs_update_cached_acl(inode, p_acl, acl);
81 } 86 }
82 kfree(value); 87 kfree(value);
83 } else if (size == -ENOENT) { 88 } else if (size == -ENOENT || size == -ENODATA || size == 0) {
89 /* FIXME, who returns -ENOENT? I think nobody */
84 acl = NULL; 90 acl = NULL;
85 btrfs_update_cached_acl(inode, p_acl, acl); 91 btrfs_update_cached_acl(inode, p_acl, acl);
92 } else {
93 acl = ERR_PTR(-EIO);
86 } 94 }
87 95
88 return acl; 96 return acl;
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 51bfdfc8fcda..502c3d61de62 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -25,6 +25,7 @@
25#define WORK_QUEUED_BIT 0 25#define WORK_QUEUED_BIT 0
26#define WORK_DONE_BIT 1 26#define WORK_DONE_BIT 1
27#define WORK_ORDER_DONE_BIT 2 27#define WORK_ORDER_DONE_BIT 2
28#define WORK_HIGH_PRIO_BIT 3
28 29
29/* 30/*
30 * container for the kthread task pointer and the list of pending work 31 * container for the kthread task pointer and the list of pending work
@@ -36,6 +37,7 @@ struct btrfs_worker_thread {
36 37
37 /* list of struct btrfs_work that are waiting for service */ 38 /* list of struct btrfs_work that are waiting for service */
38 struct list_head pending; 39 struct list_head pending;
40 struct list_head prio_pending;
39 41
40 /* list of worker threads from struct btrfs_workers */ 42 /* list of worker threads from struct btrfs_workers */
41 struct list_head worker_list; 43 struct list_head worker_list;
@@ -103,10 +105,16 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers,
103 105
104 spin_lock_irqsave(&workers->lock, flags); 106 spin_lock_irqsave(&workers->lock, flags);
105 107
106 while (!list_empty(&workers->order_list)) { 108 while (1) {
107 work = list_entry(workers->order_list.next, 109 if (!list_empty(&workers->prio_order_list)) {
108 struct btrfs_work, order_list); 110 work = list_entry(workers->prio_order_list.next,
109 111 struct btrfs_work, order_list);
112 } else if (!list_empty(&workers->order_list)) {
113 work = list_entry(workers->order_list.next,
114 struct btrfs_work, order_list);
115 } else {
116 break;
117 }
110 if (!test_bit(WORK_DONE_BIT, &work->flags)) 118 if (!test_bit(WORK_DONE_BIT, &work->flags))
111 break; 119 break;
112 120
@@ -143,8 +151,14 @@ static int worker_loop(void *arg)
143 do { 151 do {
144 spin_lock_irq(&worker->lock); 152 spin_lock_irq(&worker->lock);
145again_locked: 153again_locked:
146 while (!list_empty(&worker->pending)) { 154 while (1) {
147 cur = worker->pending.next; 155 if (!list_empty(&worker->prio_pending))
156 cur = worker->prio_pending.next;
157 else if (!list_empty(&worker->pending))
158 cur = worker->pending.next;
159 else
160 break;
161
148 work = list_entry(cur, struct btrfs_work, list); 162 work = list_entry(cur, struct btrfs_work, list);
149 list_del(&work->list); 163 list_del(&work->list);
150 clear_bit(WORK_QUEUED_BIT, &work->flags); 164 clear_bit(WORK_QUEUED_BIT, &work->flags);
@@ -163,7 +177,6 @@ again_locked:
163 177
164 spin_lock_irq(&worker->lock); 178 spin_lock_irq(&worker->lock);
165 check_idle_worker(worker); 179 check_idle_worker(worker);
166
167 } 180 }
168 if (freezing(current)) { 181 if (freezing(current)) {
169 worker->working = 0; 182 worker->working = 0;
@@ -178,7 +191,8 @@ again_locked:
178 * jump_in? 191 * jump_in?
179 */ 192 */
180 smp_mb(); 193 smp_mb();
181 if (!list_empty(&worker->pending)) 194 if (!list_empty(&worker->pending) ||
195 !list_empty(&worker->prio_pending))
182 continue; 196 continue;
183 197
184 /* 198 /*
@@ -191,7 +205,8 @@ again_locked:
191 */ 205 */
192 schedule_timeout(1); 206 schedule_timeout(1);
193 smp_mb(); 207 smp_mb();
194 if (!list_empty(&worker->pending)) 208 if (!list_empty(&worker->pending) ||
209 !list_empty(&worker->prio_pending))
195 continue; 210 continue;
196 211
197 if (kthread_should_stop()) 212 if (kthread_should_stop())
@@ -200,7 +215,8 @@ again_locked:
200 /* still no more work?, sleep for real */ 215 /* still no more work?, sleep for real */
201 spin_lock_irq(&worker->lock); 216 spin_lock_irq(&worker->lock);
202 set_current_state(TASK_INTERRUPTIBLE); 217 set_current_state(TASK_INTERRUPTIBLE);
203 if (!list_empty(&worker->pending)) 218 if (!list_empty(&worker->pending) ||
219 !list_empty(&worker->prio_pending))
204 goto again_locked; 220 goto again_locked;
205 221
206 /* 222 /*
@@ -248,6 +264,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max)
248 INIT_LIST_HEAD(&workers->worker_list); 264 INIT_LIST_HEAD(&workers->worker_list);
249 INIT_LIST_HEAD(&workers->idle_list); 265 INIT_LIST_HEAD(&workers->idle_list);
250 INIT_LIST_HEAD(&workers->order_list); 266 INIT_LIST_HEAD(&workers->order_list);
267 INIT_LIST_HEAD(&workers->prio_order_list);
251 spin_lock_init(&workers->lock); 268 spin_lock_init(&workers->lock);
252 workers->max_workers = max; 269 workers->max_workers = max;
253 workers->idle_thresh = 32; 270 workers->idle_thresh = 32;
@@ -273,6 +290,7 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
273 } 290 }
274 291
275 INIT_LIST_HEAD(&worker->pending); 292 INIT_LIST_HEAD(&worker->pending);
293 INIT_LIST_HEAD(&worker->prio_pending);
276 INIT_LIST_HEAD(&worker->worker_list); 294 INIT_LIST_HEAD(&worker->worker_list);
277 spin_lock_init(&worker->lock); 295 spin_lock_init(&worker->lock);
278 atomic_set(&worker->num_pending, 0); 296 atomic_set(&worker->num_pending, 0);
@@ -396,7 +414,10 @@ int btrfs_requeue_work(struct btrfs_work *work)
396 goto out; 414 goto out;
397 415
398 spin_lock_irqsave(&worker->lock, flags); 416 spin_lock_irqsave(&worker->lock, flags);
399 list_add_tail(&work->list, &worker->pending); 417 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
418 list_add_tail(&work->list, &worker->prio_pending);
419 else
420 list_add_tail(&work->list, &worker->pending);
400 atomic_inc(&worker->num_pending); 421 atomic_inc(&worker->num_pending);
401 422
402 /* by definition we're busy, take ourselves off the idle 423 /* by definition we're busy, take ourselves off the idle
@@ -422,6 +443,11 @@ out:
422 return 0; 443 return 0;
423} 444}
424 445
446void btrfs_set_work_high_prio(struct btrfs_work *work)
447{
448 set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
449}
450
425/* 451/*
426 * places a struct btrfs_work into the pending queue of one of the kthreads 452 * places a struct btrfs_work into the pending queue of one of the kthreads
427 */ 453 */
@@ -438,7 +464,12 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
438 worker = find_worker(workers); 464 worker = find_worker(workers);
439 if (workers->ordered) { 465 if (workers->ordered) {
440 spin_lock_irqsave(&workers->lock, flags); 466 spin_lock_irqsave(&workers->lock, flags);
441 list_add_tail(&work->order_list, &workers->order_list); 467 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
468 list_add_tail(&work->order_list,
469 &workers->prio_order_list);
470 } else {
471 list_add_tail(&work->order_list, &workers->order_list);
472 }
442 spin_unlock_irqrestore(&workers->lock, flags); 473 spin_unlock_irqrestore(&workers->lock, flags);
443 } else { 474 } else {
444 INIT_LIST_HEAD(&work->order_list); 475 INIT_LIST_HEAD(&work->order_list);
@@ -446,7 +477,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
446 477
447 spin_lock_irqsave(&worker->lock, flags); 478 spin_lock_irqsave(&worker->lock, flags);
448 479
449 list_add_tail(&work->list, &worker->pending); 480 if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
481 list_add_tail(&work->list, &worker->prio_pending);
482 else
483 list_add_tail(&work->list, &worker->pending);
450 atomic_inc(&worker->num_pending); 484 atomic_inc(&worker->num_pending);
451 check_busy_worker(worker); 485 check_busy_worker(worker);
452 486
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 31be4ed8b63e..1b511c109db6 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -85,6 +85,7 @@ struct btrfs_workers {
85 * of work items waiting for completion 85 * of work items waiting for completion
86 */ 86 */
87 struct list_head order_list; 87 struct list_head order_list;
88 struct list_head prio_order_list;
88 89
89 /* lock for finding the next worker thread to queue on */ 90 /* lock for finding the next worker thread to queue on */
90 spinlock_t lock; 91 spinlock_t lock;
@@ -98,4 +99,5 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers);
98int btrfs_stop_workers(struct btrfs_workers *workers); 99int btrfs_stop_workers(struct btrfs_workers *workers);
99void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); 100void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max);
100int btrfs_requeue_work(struct btrfs_work *work); 101int btrfs_requeue_work(struct btrfs_work *work);
102void btrfs_set_work_high_prio(struct btrfs_work *work);
101#endif 103#endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index e5b2533b691a..a99f1c2a710d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1325,12 +1325,12 @@ static noinline int reada_for_balance(struct btrfs_root *root,
1325 int ret = 0; 1325 int ret = 0;
1326 int blocksize; 1326 int blocksize;
1327 1327
1328 parent = path->nodes[level - 1]; 1328 parent = path->nodes[level + 1];
1329 if (!parent) 1329 if (!parent)
1330 return 0; 1330 return 0;
1331 1331
1332 nritems = btrfs_header_nritems(parent); 1332 nritems = btrfs_header_nritems(parent);
1333 slot = path->slots[level]; 1333 slot = path->slots[level + 1];
1334 blocksize = btrfs_level_size(root, level); 1334 blocksize = btrfs_level_size(root, level);
1335 1335
1336 if (slot > 0) { 1336 if (slot > 0) {
@@ -1341,7 +1341,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
1341 block1 = 0; 1341 block1 = 0;
1342 free_extent_buffer(eb); 1342 free_extent_buffer(eb);
1343 } 1343 }
1344 if (slot < nritems) { 1344 if (slot + 1 < nritems) {
1345 block2 = btrfs_node_blockptr(parent, slot + 1); 1345 block2 = btrfs_node_blockptr(parent, slot + 1);
1346 gen = btrfs_node_ptr_generation(parent, slot + 1); 1346 gen = btrfs_node_ptr_generation(parent, slot + 1);
1347 eb = btrfs_find_tree_block(root, block2, blocksize); 1347 eb = btrfs_find_tree_block(root, block2, blocksize);
@@ -1351,7 +1351,11 @@ static noinline int reada_for_balance(struct btrfs_root *root,
1351 } 1351 }
1352 if (block1 || block2) { 1352 if (block1 || block2) {
1353 ret = -EAGAIN; 1353 ret = -EAGAIN;
1354
1355 /* release the whole path */
1354 btrfs_release_path(root, path); 1356 btrfs_release_path(root, path);
1357
1358 /* read the blocks */
1355 if (block1) 1359 if (block1)
1356 readahead_tree_block(root, block1, blocksize, 0); 1360 readahead_tree_block(root, block1, blocksize, 0);
1357 if (block2) 1361 if (block2)
@@ -1361,7 +1365,7 @@ static noinline int reada_for_balance(struct btrfs_root *root,
1361 eb = read_tree_block(root, block1, blocksize, 0); 1365 eb = read_tree_block(root, block1, blocksize, 0);
1362 free_extent_buffer(eb); 1366 free_extent_buffer(eb);
1363 } 1367 }
1364 if (block1) { 1368 if (block2) {
1365 eb = read_tree_block(root, block2, blocksize, 0); 1369 eb = read_tree_block(root, block2, blocksize, 0);
1366 free_extent_buffer(eb); 1370 free_extent_buffer(eb);
1367 } 1371 }
@@ -1481,12 +1485,15 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1481 * of the btree by dropping locks before 1485 * of the btree by dropping locks before
1482 * we read. 1486 * we read.
1483 */ 1487 */
1484 btrfs_release_path(NULL, p); 1488 btrfs_unlock_up_safe(p, level + 1);
1489 btrfs_set_path_blocking(p);
1490
1485 if (tmp) 1491 if (tmp)
1486 free_extent_buffer(tmp); 1492 free_extent_buffer(tmp);
1487 if (p->reada) 1493 if (p->reada)
1488 reada_for_search(root, p, level, slot, key->objectid); 1494 reada_for_search(root, p, level, slot, key->objectid);
1489 1495
1496 btrfs_release_path(NULL, p);
1490 tmp = read_tree_block(root, blocknr, blocksize, gen); 1497 tmp = read_tree_block(root, blocknr, blocksize, gen);
1491 if (tmp) 1498 if (tmp)
1492 free_extent_buffer(tmp); 1499 free_extent_buffer(tmp);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ad96495dedc5..4414a5d9983a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -881,6 +881,9 @@ struct btrfs_fs_info {
881 u64 metadata_alloc_profile; 881 u64 metadata_alloc_profile;
882 u64 system_alloc_profile; 882 u64 system_alloc_profile;
883 883
884 unsigned data_chunk_allocations;
885 unsigned metadata_ratio;
886
884 void *bdev_holder; 887 void *bdev_holder;
885}; 888};
886 889
@@ -2174,7 +2177,8 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
2174extern struct file_operations btrfs_file_operations; 2177extern struct file_operations btrfs_file_operations;
2175int btrfs_drop_extents(struct btrfs_trans_handle *trans, 2178int btrfs_drop_extents(struct btrfs_trans_handle *trans,
2176 struct btrfs_root *root, struct inode *inode, 2179 struct btrfs_root *root, struct inode *inode,
2177 u64 start, u64 end, u64 inline_limit, u64 *hint_block); 2180 u64 start, u64 end, u64 locked_end,
2181 u64 inline_limit, u64 *hint_block);
2178int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, 2182int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
2179 struct btrfs_root *root, 2183 struct btrfs_root *root,
2180 struct inode *inode, u64 start, u64 end); 2184 struct inode *inode, u64 start, u64 end);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 92caa8035f36..0ff16d3331da 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -232,10 +232,14 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
232 memcpy(&found, result, csum_size); 232 memcpy(&found, result, csum_size);
233 233
234 read_extent_buffer(buf, &val, 0, csum_size); 234 read_extent_buffer(buf, &val, 0, csum_size);
235 printk(KERN_INFO "btrfs: %s checksum verify failed " 235 if (printk_ratelimit()) {
236 "on %llu wanted %X found %X level %d\n", 236 printk(KERN_INFO "btrfs: %s checksum verify "
237 root->fs_info->sb->s_id, 237 "failed on %llu wanted %X found %X "
238 buf->start, val, found, btrfs_header_level(buf)); 238 "level %d\n",
239 root->fs_info->sb->s_id,
240 (unsigned long long)buf->start, val, found,
241 btrfs_header_level(buf));
242 }
239 if (result != (char *)&inline_result) 243 if (result != (char *)&inline_result)
240 kfree(result); 244 kfree(result);
241 return 1; 245 return 1;
@@ -268,10 +272,13 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
268 ret = 0; 272 ret = 0;
269 goto out; 273 goto out;
270 } 274 }
271 printk("parent transid verify failed on %llu wanted %llu found %llu\n", 275 if (printk_ratelimit()) {
272 (unsigned long long)eb->start, 276 printk("parent transid verify failed on %llu wanted %llu "
273 (unsigned long long)parent_transid, 277 "found %llu\n",
274 (unsigned long long)btrfs_header_generation(eb)); 278 (unsigned long long)eb->start,
279 (unsigned long long)parent_transid,
280 (unsigned long long)btrfs_header_generation(eb));
281 }
275 ret = 1; 282 ret = 1;
276 clear_extent_buffer_uptodate(io_tree, eb); 283 clear_extent_buffer_uptodate(io_tree, eb);
277out: 284out:
@@ -415,9 +422,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
415 422
416 found_start = btrfs_header_bytenr(eb); 423 found_start = btrfs_header_bytenr(eb);
417 if (found_start != start) { 424 if (found_start != start) {
418 printk(KERN_INFO "btrfs bad tree block start %llu %llu\n", 425 if (printk_ratelimit()) {
419 (unsigned long long)found_start, 426 printk(KERN_INFO "btrfs bad tree block start "
420 (unsigned long long)eb->start); 427 "%llu %llu\n",
428 (unsigned long long)found_start,
429 (unsigned long long)eb->start);
430 }
421 ret = -EIO; 431 ret = -EIO;
422 goto err; 432 goto err;
423 } 433 }
@@ -429,8 +439,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
429 goto err; 439 goto err;
430 } 440 }
431 if (check_tree_block_fsid(root, eb)) { 441 if (check_tree_block_fsid(root, eb)) {
432 printk(KERN_INFO "btrfs bad fsid on block %llu\n", 442 if (printk_ratelimit()) {
433 (unsigned long long)eb->start); 443 printk(KERN_INFO "btrfs bad fsid on block %llu\n",
444 (unsigned long long)eb->start);
445 }
434 ret = -EIO; 446 ret = -EIO;
435 goto err; 447 goto err;
436 } 448 }
@@ -579,19 +591,12 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
579 async->bio_flags = bio_flags; 591 async->bio_flags = bio_flags;
580 592
581 atomic_inc(&fs_info->nr_async_submits); 593 atomic_inc(&fs_info->nr_async_submits);
594
595 if (rw & (1 << BIO_RW_SYNCIO))
596 btrfs_set_work_high_prio(&async->work);
597
582 btrfs_queue_worker(&fs_info->workers, &async->work); 598 btrfs_queue_worker(&fs_info->workers, &async->work);
583#if 0
584 int limit = btrfs_async_submit_limit(fs_info);
585 if (atomic_read(&fs_info->nr_async_submits) > limit) {
586 wait_event_timeout(fs_info->async_submit_wait,
587 (atomic_read(&fs_info->nr_async_submits) < limit),
588 HZ/10);
589 599
590 wait_event_timeout(fs_info->async_submit_wait,
591 (atomic_read(&fs_info->nr_async_bios) < limit),
592 HZ/10);
593 }
594#endif
595 while (atomic_read(&fs_info->async_submit_draining) && 600 while (atomic_read(&fs_info->async_submit_draining) &&
596 atomic_read(&fs_info->nr_async_submits)) { 601 atomic_read(&fs_info->nr_async_submits)) {
597 wait_event(fs_info->async_submit_wait, 602 wait_event(fs_info->async_submit_wait,
@@ -656,6 +661,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
656 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, 661 return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio,
657 mirror_num, 0); 662 mirror_num, 0);
658 } 663 }
664
659 /* 665 /*
660 * kthread helpers are used to submit writes so that checksumming 666 * kthread helpers are used to submit writes so that checksumming
661 * can happen in parallel across all CPUs 667 * can happen in parallel across all CPUs
@@ -765,27 +771,6 @@ static void btree_invalidatepage(struct page *page, unsigned long offset)
765 } 771 }
766} 772}
767 773
768#if 0
769static int btree_writepage(struct page *page, struct writeback_control *wbc)
770{
771 struct buffer_head *bh;
772 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
773 struct buffer_head *head;
774 if (!page_has_buffers(page)) {
775 create_empty_buffers(page, root->fs_info->sb->s_blocksize,
776 (1 << BH_Dirty)|(1 << BH_Uptodate));
777 }
778 head = page_buffers(page);
779 bh = head;
780 do {
781 if (buffer_dirty(bh))
782 csum_tree_block(root, bh, 0);
783 bh = bh->b_this_page;
784 } while (bh != head);
785 return block_write_full_page(page, btree_get_block, wbc);
786}
787#endif
788
789static struct address_space_operations btree_aops = { 774static struct address_space_operations btree_aops = {
790 .readpage = btree_readpage, 775 .readpage = btree_readpage,
791 .writepage = btree_writepage, 776 .writepage = btree_writepage,
@@ -1273,11 +1258,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
1273 int ret = 0; 1258 int ret = 0;
1274 struct btrfs_device *device; 1259 struct btrfs_device *device;
1275 struct backing_dev_info *bdi; 1260 struct backing_dev_info *bdi;
1276#if 0 1261
1277 if ((bdi_bits & (1 << BDI_write_congested)) &&
1278 btrfs_congested_async(info, 0))
1279 return 1;
1280#endif
1281 list_for_each_entry(device, &info->fs_devices->devices, dev_list) { 1262 list_for_each_entry(device, &info->fs_devices->devices, dev_list) {
1282 if (!device->bdev) 1263 if (!device->bdev)
1283 continue; 1264 continue;
@@ -1599,6 +1580,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1599 fs_info->btree_inode = new_inode(sb); 1580 fs_info->btree_inode = new_inode(sb);
1600 fs_info->btree_inode->i_ino = 1; 1581 fs_info->btree_inode->i_ino = 1;
1601 fs_info->btree_inode->i_nlink = 1; 1582 fs_info->btree_inode->i_nlink = 1;
1583 fs_info->metadata_ratio = 8;
1602 1584
1603 fs_info->thread_pool_size = min_t(unsigned long, 1585 fs_info->thread_pool_size = min_t(unsigned long,
1604 num_online_cpus() + 2, 8); 1586 num_online_cpus() + 2, 8);
@@ -1689,7 +1671,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1689 if (features) { 1671 if (features) {
1690 printk(KERN_ERR "BTRFS: couldn't mount because of " 1672 printk(KERN_ERR "BTRFS: couldn't mount because of "
1691 "unsupported optional features (%Lx).\n", 1673 "unsupported optional features (%Lx).\n",
1692 features); 1674 (unsigned long long)features);
1693 err = -EINVAL; 1675 err = -EINVAL;
1694 goto fail_iput; 1676 goto fail_iput;
1695 } 1677 }
@@ -1699,7 +1681,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1699 if (!(sb->s_flags & MS_RDONLY) && features) { 1681 if (!(sb->s_flags & MS_RDONLY) && features) {
1700 printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " 1682 printk(KERN_ERR "BTRFS: couldn't mount RDWR because of "
1701 "unsupported option features (%Lx).\n", 1683 "unsupported option features (%Lx).\n",
1702 features); 1684 (unsigned long long)features);
1703 err = -EINVAL; 1685 err = -EINVAL;
1704 goto fail_iput; 1686 goto fail_iput;
1705 } 1687 }
@@ -2095,10 +2077,10 @@ static int write_dev_supers(struct btrfs_device *device,
2095 device->barriers = 0; 2077 device->barriers = 0;
2096 get_bh(bh); 2078 get_bh(bh);
2097 lock_buffer(bh); 2079 lock_buffer(bh);
2098 ret = submit_bh(WRITE, bh); 2080 ret = submit_bh(WRITE_SYNC, bh);
2099 } 2081 }
2100 } else { 2082 } else {
2101 ret = submit_bh(WRITE, bh); 2083 ret = submit_bh(WRITE_SYNC, bh);
2102 } 2084 }
2103 2085
2104 if (!ret && wait) { 2086 if (!ret && wait) {
@@ -2291,7 +2273,7 @@ int close_ctree(struct btrfs_root *root)
2291 2273
2292 if (fs_info->delalloc_bytes) { 2274 if (fs_info->delalloc_bytes) {
2293 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", 2275 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
2294 fs_info->delalloc_bytes); 2276 (unsigned long long)fs_info->delalloc_bytes);
2295 } 2277 }
2296 if (fs_info->total_ref_cache_size) { 2278 if (fs_info->total_ref_cache_size) {
2297 printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", 2279 printk(KERN_INFO "btrfs: at umount reference cache size %llu\n",
@@ -2328,16 +2310,6 @@ int close_ctree(struct btrfs_root *root)
2328 btrfs_stop_workers(&fs_info->endio_write_workers); 2310 btrfs_stop_workers(&fs_info->endio_write_workers);
2329 btrfs_stop_workers(&fs_info->submit_workers); 2311 btrfs_stop_workers(&fs_info->submit_workers);
2330 2312
2331#if 0
2332 while (!list_empty(&fs_info->hashers)) {
2333 struct btrfs_hasher *hasher;
2334 hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher,
2335 hashers);
2336 list_del(&hasher->hashers);
2337 crypto_free_hash(&fs_info->hash_tfm);
2338 kfree(hasher);
2339 }
2340#endif
2341 btrfs_close_devices(fs_info->fs_devices); 2313 btrfs_close_devices(fs_info->fs_devices);
2342 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2314 btrfs_mapping_tree_free(&fs_info->mapping_tree);
2343 2315
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 178df4c67de4..e4966444811b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1844,10 +1844,14 @@ again:
1844 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" 1844 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
1845 ", %llu bytes_used, %llu bytes_reserved, " 1845 ", %llu bytes_used, %llu bytes_reserved, "
1846 "%llu bytes_pinned, %llu bytes_readonly, %llu may use" 1846 "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
1847 "%llu total\n", bytes, data_sinfo->bytes_delalloc, 1847 "%llu total\n", (unsigned long long)bytes,
1848 data_sinfo->bytes_used, data_sinfo->bytes_reserved, 1848 (unsigned long long)data_sinfo->bytes_delalloc,
1849 data_sinfo->bytes_pinned, data_sinfo->bytes_readonly, 1849 (unsigned long long)data_sinfo->bytes_used,
1850 data_sinfo->bytes_may_use, data_sinfo->total_bytes); 1850 (unsigned long long)data_sinfo->bytes_reserved,
1851 (unsigned long long)data_sinfo->bytes_pinned,
1852 (unsigned long long)data_sinfo->bytes_readonly,
1853 (unsigned long long)data_sinfo->bytes_may_use,
1854 (unsigned long long)data_sinfo->total_bytes);
1851 return -ENOSPC; 1855 return -ENOSPC;
1852 } 1856 }
1853 data_sinfo->bytes_may_use += bytes; 1857 data_sinfo->bytes_may_use += bytes;
@@ -1918,15 +1922,29 @@ void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
1918 spin_unlock(&info->lock); 1922 spin_unlock(&info->lock);
1919} 1923}
1920 1924
1925static void force_metadata_allocation(struct btrfs_fs_info *info)
1926{
1927 struct list_head *head = &info->space_info;
1928 struct btrfs_space_info *found;
1929
1930 rcu_read_lock();
1931 list_for_each_entry_rcu(found, head, list) {
1932 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
1933 found->force_alloc = 1;
1934 }
1935 rcu_read_unlock();
1936}
1937
1921static int do_chunk_alloc(struct btrfs_trans_handle *trans, 1938static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1922 struct btrfs_root *extent_root, u64 alloc_bytes, 1939 struct btrfs_root *extent_root, u64 alloc_bytes,
1923 u64 flags, int force) 1940 u64 flags, int force)
1924{ 1941{
1925 struct btrfs_space_info *space_info; 1942 struct btrfs_space_info *space_info;
1943 struct btrfs_fs_info *fs_info = extent_root->fs_info;
1926 u64 thresh; 1944 u64 thresh;
1927 int ret = 0; 1945 int ret = 0;
1928 1946
1929 mutex_lock(&extent_root->fs_info->chunk_mutex); 1947 mutex_lock(&fs_info->chunk_mutex);
1930 1948
1931 flags = btrfs_reduce_alloc_profile(extent_root, flags); 1949 flags = btrfs_reduce_alloc_profile(extent_root, flags);
1932 1950
@@ -1958,6 +1976,18 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1958 } 1976 }
1959 spin_unlock(&space_info->lock); 1977 spin_unlock(&space_info->lock);
1960 1978
1979 /*
1980 * if we're doing a data chunk, go ahead and make sure that
1981 * we keep a reasonable number of metadata chunks allocated in the
1982 * FS as well.
1983 */
1984 if (flags & BTRFS_BLOCK_GROUP_DATA) {
1985 fs_info->data_chunk_allocations++;
1986 if (!(fs_info->data_chunk_allocations %
1987 fs_info->metadata_ratio))
1988 force_metadata_allocation(fs_info);
1989 }
1990
1961 ret = btrfs_alloc_chunk(trans, extent_root, flags); 1991 ret = btrfs_alloc_chunk(trans, extent_root, flags);
1962 if (ret) 1992 if (ret)
1963 space_info->full = 1; 1993 space_info->full = 1;
@@ -2798,9 +2828,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
2798 info->bytes_pinned - info->bytes_reserved), 2828 info->bytes_pinned - info->bytes_reserved),
2799 (info->full) ? "" : "not "); 2829 (info->full) ? "" : "not ");
2800 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 2830 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
2801 " may_use=%llu, used=%llu\n", info->total_bytes, 2831 " may_use=%llu, used=%llu\n",
2802 info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use, 2832 (unsigned long long)info->total_bytes,
2803 info->bytes_used); 2833 (unsigned long long)info->bytes_pinned,
2834 (unsigned long long)info->bytes_delalloc,
2835 (unsigned long long)info->bytes_may_use,
2836 (unsigned long long)info->bytes_used);
2804 2837
2805 down_read(&info->groups_sem); 2838 down_read(&info->groups_sem);
2806 list_for_each_entry(cache, &info->block_groups, list) { 2839 list_for_each_entry(cache, &info->block_groups, list) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index eb2bee8b7fbf..fe9eb990e443 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -17,12 +17,6 @@
17#include "ctree.h" 17#include "ctree.h"
18#include "btrfs_inode.h" 18#include "btrfs_inode.h"
19 19
20/* temporary define until extent_map moves out of btrfs */
21struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
22 unsigned long extra_flags,
23 void (*ctor)(void *, struct kmem_cache *,
24 unsigned long));
25
26static struct kmem_cache *extent_state_cache; 20static struct kmem_cache *extent_state_cache;
27static struct kmem_cache *extent_buffer_cache; 21static struct kmem_cache *extent_buffer_cache;
28 22
@@ -50,20 +44,23 @@ struct extent_page_data {
50 /* tells writepage not to lock the state bits for this range 44 /* tells writepage not to lock the state bits for this range
51 * it still does the unlocking 45 * it still does the unlocking
52 */ 46 */
53 int extent_locked; 47 unsigned int extent_locked:1;
48
49 /* tells the submit_bio code to use a WRITE_SYNC */
50 unsigned int sync_io:1;
54}; 51};
55 52
56int __init extent_io_init(void) 53int __init extent_io_init(void)
57{ 54{
58 extent_state_cache = btrfs_cache_create("extent_state", 55 extent_state_cache = kmem_cache_create("extent_state",
59 sizeof(struct extent_state), 0, 56 sizeof(struct extent_state), 0,
60 NULL); 57 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
61 if (!extent_state_cache) 58 if (!extent_state_cache)
62 return -ENOMEM; 59 return -ENOMEM;
63 60
64 extent_buffer_cache = btrfs_cache_create("extent_buffers", 61 extent_buffer_cache = kmem_cache_create("extent_buffers",
65 sizeof(struct extent_buffer), 0, 62 sizeof(struct extent_buffer), 0,
66 NULL); 63 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
67 if (!extent_buffer_cache) 64 if (!extent_buffer_cache)
68 goto free_state_cache; 65 goto free_state_cache;
69 return 0; 66 return 0;
@@ -1404,69 +1401,6 @@ out:
1404 return total_bytes; 1401 return total_bytes;
1405} 1402}
1406 1403
1407#if 0
1408/*
1409 * helper function to lock both pages and extents in the tree.
1410 * pages must be locked first.
1411 */
1412static int lock_range(struct extent_io_tree *tree, u64 start, u64 end)
1413{
1414 unsigned long index = start >> PAGE_CACHE_SHIFT;
1415 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1416 struct page *page;
1417 int err;
1418
1419 while (index <= end_index) {
1420 page = grab_cache_page(tree->mapping, index);
1421 if (!page) {
1422 err = -ENOMEM;
1423 goto failed;
1424 }
1425 if (IS_ERR(page)) {
1426 err = PTR_ERR(page);
1427 goto failed;
1428 }
1429 index++;
1430 }
1431 lock_extent(tree, start, end, GFP_NOFS);
1432 return 0;
1433
1434failed:
1435 /*
1436 * we failed above in getting the page at 'index', so we undo here
1437 * up to but not including the page at 'index'
1438 */
1439 end_index = index;
1440 index = start >> PAGE_CACHE_SHIFT;
1441 while (index < end_index) {
1442 page = find_get_page(tree->mapping, index);
1443 unlock_page(page);
1444 page_cache_release(page);
1445 index++;
1446 }
1447 return err;
1448}
1449
1450/*
1451 * helper function to unlock both pages and extents in the tree.
1452 */
1453static int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
1454{
1455 unsigned long index = start >> PAGE_CACHE_SHIFT;
1456 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1457 struct page *page;
1458
1459 while (index <= end_index) {
1460 page = find_get_page(tree->mapping, index);
1461 unlock_page(page);
1462 page_cache_release(page);
1463 index++;
1464 }
1465 unlock_extent(tree, start, end, GFP_NOFS);
1466 return 0;
1467}
1468#endif
1469
1470/* 1404/*
1471 * set the private field for a given byte offset in the tree. If there isn't 1405 * set the private field for a given byte offset in the tree. If there isn't
1472 * an extent_state there already, this does nothing. 1406 * an extent_state there already, this does nothing.
@@ -2101,6 +2035,16 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
2101 return ret; 2035 return ret;
2102} 2036}
2103 2037
2038static noinline void update_nr_written(struct page *page,
2039 struct writeback_control *wbc,
2040 unsigned long nr_written)
2041{
2042 wbc->nr_to_write -= nr_written;
2043 if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
2044 wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
2045 page->mapping->writeback_index = page->index + nr_written;
2046}
2047
2104/* 2048/*
2105 * the writepage semantics are similar to regular writepage. extent 2049 * the writepage semantics are similar to regular writepage. extent
2106 * records are inserted to lock ranges in the tree, and as dirty areas 2050 * records are inserted to lock ranges in the tree, and as dirty areas
@@ -2136,8 +2080,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2136 u64 delalloc_end; 2080 u64 delalloc_end;
2137 int page_started; 2081 int page_started;
2138 int compressed; 2082 int compressed;
2083 int write_flags;
2139 unsigned long nr_written = 0; 2084 unsigned long nr_written = 0;
2140 2085
2086 if (wbc->sync_mode == WB_SYNC_ALL)
2087 write_flags = WRITE_SYNC_PLUG;
2088 else
2089 write_flags = WRITE;
2090
2141 WARN_ON(!PageLocked(page)); 2091 WARN_ON(!PageLocked(page));
2142 pg_offset = i_size & (PAGE_CACHE_SIZE - 1); 2092 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
2143 if (page->index > end_index || 2093 if (page->index > end_index ||
@@ -2164,6 +2114,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2164 delalloc_end = 0; 2114 delalloc_end = 0;
2165 page_started = 0; 2115 page_started = 0;
2166 if (!epd->extent_locked) { 2116 if (!epd->extent_locked) {
2117 /*
2118 * make sure the wbc mapping index is at least updated
2119 * to this page.
2120 */
2121 update_nr_written(page, wbc, 0);
2122
2167 while (delalloc_end < page_end) { 2123 while (delalloc_end < page_end) {
2168 nr_delalloc = find_lock_delalloc_range(inode, tree, 2124 nr_delalloc = find_lock_delalloc_range(inode, tree,
2169 page, 2125 page,
@@ -2185,7 +2141,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2185 */ 2141 */
2186 if (page_started) { 2142 if (page_started) {
2187 ret = 0; 2143 ret = 0;
2188 goto update_nr_written; 2144 /*
2145 * we've unlocked the page, so we can't update
2146 * the mapping's writeback index, just update
2147 * nr_to_write.
2148 */
2149 wbc->nr_to_write -= nr_written;
2150 goto done_unlocked;
2189 } 2151 }
2190 } 2152 }
2191 lock_extent(tree, start, page_end, GFP_NOFS); 2153 lock_extent(tree, start, page_end, GFP_NOFS);
@@ -2198,13 +2160,18 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2198 if (ret == -EAGAIN) { 2160 if (ret == -EAGAIN) {
2199 unlock_extent(tree, start, page_end, GFP_NOFS); 2161 unlock_extent(tree, start, page_end, GFP_NOFS);
2200 redirty_page_for_writepage(wbc, page); 2162 redirty_page_for_writepage(wbc, page);
2163 update_nr_written(page, wbc, nr_written);
2201 unlock_page(page); 2164 unlock_page(page);
2202 ret = 0; 2165 ret = 0;
2203 goto update_nr_written; 2166 goto done_unlocked;
2204 } 2167 }
2205 } 2168 }
2206 2169
2207 nr_written++; 2170 /*
2171 * we don't want to touch the inode after unlocking the page,
2172 * so we update the mapping writeback index now
2173 */
2174 update_nr_written(page, wbc, nr_written + 1);
2208 2175
2209 end = page_end; 2176 end = page_end;
2210 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) 2177 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0))
@@ -2314,9 +2281,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2314 (unsigned long long)end); 2281 (unsigned long long)end);
2315 } 2282 }
2316 2283
2317 ret = submit_extent_page(WRITE, tree, page, sector, 2284 ret = submit_extent_page(write_flags, tree, page,
2318 iosize, pg_offset, bdev, 2285 sector, iosize, pg_offset,
2319 &epd->bio, max_nr, 2286 bdev, &epd->bio, max_nr,
2320 end_bio_extent_writepage, 2287 end_bio_extent_writepage,
2321 0, 0, 0); 2288 0, 0, 0);
2322 if (ret) 2289 if (ret)
@@ -2336,11 +2303,8 @@ done:
2336 unlock_extent(tree, unlock_start, page_end, GFP_NOFS); 2303 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2337 unlock_page(page); 2304 unlock_page(page);
2338 2305
2339update_nr_written: 2306done_unlocked:
2340 wbc->nr_to_write -= nr_written; 2307
2341 if (wbc->range_cyclic || (wbc->nr_to_write > 0 &&
2342 wbc->range_start == 0 && wbc->range_end == LLONG_MAX))
2343 page->mapping->writeback_index = page->index + nr_written;
2344 return 0; 2308 return 0;
2345} 2309}
2346 2310
@@ -2460,15 +2424,23 @@ retry:
2460 return ret; 2424 return ret;
2461} 2425}
2462 2426
2463static noinline void flush_write_bio(void *data) 2427static void flush_epd_write_bio(struct extent_page_data *epd)
2464{ 2428{
2465 struct extent_page_data *epd = data;
2466 if (epd->bio) { 2429 if (epd->bio) {
2467 submit_one_bio(WRITE, epd->bio, 0, 0); 2430 if (epd->sync_io)
2431 submit_one_bio(WRITE_SYNC, epd->bio, 0, 0);
2432 else
2433 submit_one_bio(WRITE, epd->bio, 0, 0);
2468 epd->bio = NULL; 2434 epd->bio = NULL;
2469 } 2435 }
2470} 2436}
2471 2437
2438static noinline void flush_write_bio(void *data)
2439{
2440 struct extent_page_data *epd = data;
2441 flush_epd_write_bio(epd);
2442}
2443
2472int extent_write_full_page(struct extent_io_tree *tree, struct page *page, 2444int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2473 get_extent_t *get_extent, 2445 get_extent_t *get_extent,
2474 struct writeback_control *wbc) 2446 struct writeback_control *wbc)
@@ -2480,23 +2452,22 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2480 .tree = tree, 2452 .tree = tree,
2481 .get_extent = get_extent, 2453 .get_extent = get_extent,
2482 .extent_locked = 0, 2454 .extent_locked = 0,
2455 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
2483 }; 2456 };
2484 struct writeback_control wbc_writepages = { 2457 struct writeback_control wbc_writepages = {
2485 .bdi = wbc->bdi, 2458 .bdi = wbc->bdi,
2486 .sync_mode = WB_SYNC_NONE, 2459 .sync_mode = wbc->sync_mode,
2487 .older_than_this = NULL, 2460 .older_than_this = NULL,
2488 .nr_to_write = 64, 2461 .nr_to_write = 64,
2489 .range_start = page_offset(page) + PAGE_CACHE_SIZE, 2462 .range_start = page_offset(page) + PAGE_CACHE_SIZE,
2490 .range_end = (loff_t)-1, 2463 .range_end = (loff_t)-1,
2491 }; 2464 };
2492 2465
2493
2494 ret = __extent_writepage(page, wbc, &epd); 2466 ret = __extent_writepage(page, wbc, &epd);
2495 2467
2496 extent_write_cache_pages(tree, mapping, &wbc_writepages, 2468 extent_write_cache_pages(tree, mapping, &wbc_writepages,
2497 __extent_writepage, &epd, flush_write_bio); 2469 __extent_writepage, &epd, flush_write_bio);
2498 if (epd.bio) 2470 flush_epd_write_bio(&epd);
2499 submit_one_bio(WRITE, epd.bio, 0, 0);
2500 return ret; 2471 return ret;
2501} 2472}
2502 2473
@@ -2515,6 +2486,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
2515 .tree = tree, 2486 .tree = tree,
2516 .get_extent = get_extent, 2487 .get_extent = get_extent,
2517 .extent_locked = 1, 2488 .extent_locked = 1,
2489 .sync_io = mode == WB_SYNC_ALL,
2518 }; 2490 };
2519 struct writeback_control wbc_writepages = { 2491 struct writeback_control wbc_writepages = {
2520 .bdi = inode->i_mapping->backing_dev_info, 2492 .bdi = inode->i_mapping->backing_dev_info,
@@ -2540,8 +2512,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
2540 start += PAGE_CACHE_SIZE; 2512 start += PAGE_CACHE_SIZE;
2541 } 2513 }
2542 2514
2543 if (epd.bio) 2515 flush_epd_write_bio(&epd);
2544 submit_one_bio(WRITE, epd.bio, 0, 0);
2545 return ret; 2516 return ret;
2546} 2517}
2547 2518
@@ -2556,13 +2527,13 @@ int extent_writepages(struct extent_io_tree *tree,
2556 .tree = tree, 2527 .tree = tree,
2557 .get_extent = get_extent, 2528 .get_extent = get_extent,
2558 .extent_locked = 0, 2529 .extent_locked = 0,
2530 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
2559 }; 2531 };
2560 2532
2561 ret = extent_write_cache_pages(tree, mapping, wbc, 2533 ret = extent_write_cache_pages(tree, mapping, wbc,
2562 __extent_writepage, &epd, 2534 __extent_writepage, &epd,
2563 flush_write_bio); 2535 flush_write_bio);
2564 if (epd.bio) 2536 flush_epd_write_bio(&epd);
2565 submit_one_bio(WRITE, epd.bio, 0, 0);
2566 return ret; 2537 return ret;
2567} 2538}
2568 2539
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index b187917b36fa..30c9365861e6 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -6,19 +6,14 @@
6#include <linux/hardirq.h> 6#include <linux/hardirq.h>
7#include "extent_map.h" 7#include "extent_map.h"
8 8
9/* temporary define until extent_map moves out of btrfs */
10struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
11 unsigned long extra_flags,
12 void (*ctor)(void *, struct kmem_cache *,
13 unsigned long));
14 9
15static struct kmem_cache *extent_map_cache; 10static struct kmem_cache *extent_map_cache;
16 11
17int __init extent_map_init(void) 12int __init extent_map_init(void)
18{ 13{
19 extent_map_cache = btrfs_cache_create("extent_map", 14 extent_map_cache = kmem_cache_create("extent_map",
20 sizeof(struct extent_map), 0, 15 sizeof(struct extent_map), 0,
21 NULL); 16 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
22 if (!extent_map_cache) 17 if (!extent_map_cache)
23 return -ENOMEM; 18 return -ENOMEM;
24 return 0; 19 return 0;
@@ -43,7 +38,6 @@ void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask)
43 tree->map.rb_node = NULL; 38 tree->map.rb_node = NULL;
44 spin_lock_init(&tree->lock); 39 spin_lock_init(&tree->lock);
45} 40}
46EXPORT_SYMBOL(extent_map_tree_init);
47 41
48/** 42/**
49 * alloc_extent_map - allocate new extent map structure 43 * alloc_extent_map - allocate new extent map structure
@@ -64,7 +58,6 @@ struct extent_map *alloc_extent_map(gfp_t mask)
64 atomic_set(&em->refs, 1); 58 atomic_set(&em->refs, 1);
65 return em; 59 return em;
66} 60}
67EXPORT_SYMBOL(alloc_extent_map);
68 61
69/** 62/**
70 * free_extent_map - drop reference count of an extent_map 63 * free_extent_map - drop reference count of an extent_map
@@ -83,7 +76,6 @@ void free_extent_map(struct extent_map *em)
83 kmem_cache_free(extent_map_cache, em); 76 kmem_cache_free(extent_map_cache, em);
84 } 77 }
85} 78}
86EXPORT_SYMBOL(free_extent_map);
87 79
88static struct rb_node *tree_insert(struct rb_root *root, u64 offset, 80static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
89 struct rb_node *node) 81 struct rb_node *node)
@@ -264,7 +256,6 @@ int add_extent_mapping(struct extent_map_tree *tree,
264out: 256out:
265 return ret; 257 return ret;
266} 258}
267EXPORT_SYMBOL(add_extent_mapping);
268 259
269/* simple helper to do math around the end of an extent, handling wrap */ 260/* simple helper to do math around the end of an extent, handling wrap */
270static u64 range_end(u64 start, u64 len) 261static u64 range_end(u64 start, u64 len)
@@ -326,7 +317,6 @@ found:
326out: 317out:
327 return em; 318 return em;
328} 319}
329EXPORT_SYMBOL(lookup_extent_mapping);
330 320
331/** 321/**
332 * remove_extent_mapping - removes an extent_map from the extent tree 322 * remove_extent_mapping - removes an extent_map from the extent tree
@@ -346,4 +336,3 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
346 em->in_tree = 0; 336 em->in_tree = 0;
347 return ret; 337 return ret;
348} 338}
349EXPORT_SYMBOL(remove_extent_mapping);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9c9fb46ccd08..1d51dc38bb49 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -272,83 +272,6 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
272 return 0; 272 return 0;
273} 273}
274 274
275int btrfs_check_file(struct btrfs_root *root, struct inode *inode)
276{
277 return 0;
278#if 0
279 struct btrfs_path *path;
280 struct btrfs_key found_key;
281 struct extent_buffer *leaf;
282 struct btrfs_file_extent_item *extent;
283 u64 last_offset = 0;
284 int nritems;
285 int slot;
286 int found_type;
287 int ret;
288 int err = 0;
289 u64 extent_end = 0;
290
291 path = btrfs_alloc_path();
292 ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino,
293 last_offset, 0);
294 while (1) {
295 nritems = btrfs_header_nritems(path->nodes[0]);
296 if (path->slots[0] >= nritems) {
297 ret = btrfs_next_leaf(root, path);
298 if (ret)
299 goto out;
300 nritems = btrfs_header_nritems(path->nodes[0]);
301 }
302 slot = path->slots[0];
303 leaf = path->nodes[0];
304 btrfs_item_key_to_cpu(leaf, &found_key, slot);
305 if (found_key.objectid != inode->i_ino)
306 break;
307 if (found_key.type != BTRFS_EXTENT_DATA_KEY)
308 goto out;
309
310 if (found_key.offset < last_offset) {
311 WARN_ON(1);
312 btrfs_print_leaf(root, leaf);
313 printk(KERN_ERR "inode %lu found offset %llu "
314 "expected %llu\n", inode->i_ino,
315 (unsigned long long)found_key.offset,
316 (unsigned long long)last_offset);
317 err = 1;
318 goto out;
319 }
320 extent = btrfs_item_ptr(leaf, slot,
321 struct btrfs_file_extent_item);
322 found_type = btrfs_file_extent_type(leaf, extent);
323 if (found_type == BTRFS_FILE_EXTENT_REG) {
324 extent_end = found_key.offset +
325 btrfs_file_extent_num_bytes(leaf, extent);
326 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
327 struct btrfs_item *item;
328 item = btrfs_item_nr(leaf, slot);
329 extent_end = found_key.offset +
330 btrfs_file_extent_inline_len(leaf, extent);
331 extent_end = (extent_end + root->sectorsize - 1) &
332 ~((u64)root->sectorsize - 1);
333 }
334 last_offset = extent_end;
335 path->slots[0]++;
336 }
337 if (0 && last_offset < inode->i_size) {
338 WARN_ON(1);
339 btrfs_print_leaf(root, leaf);
340 printk(KERN_ERR "inode %lu found offset %llu size %llu\n",
341 inode->i_ino, (unsigned long long)last_offset,
342 (unsigned long long)inode->i_size);
343 err = 1;
344
345 }
346out:
347 btrfs_free_path(path);
348 return err;
349#endif
350}
351
352/* 275/*
353 * this is very complex, but the basic idea is to drop all extents 276 * this is very complex, but the basic idea is to drop all extents
354 * in the range start - end. hint_block is filled in with a block number 277 * in the range start - end. hint_block is filled in with a block number
@@ -363,15 +286,16 @@ out:
363 */ 286 */
364noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, 287noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
365 struct btrfs_root *root, struct inode *inode, 288 struct btrfs_root *root, struct inode *inode,
366 u64 start, u64 end, u64 inline_limit, u64 *hint_byte) 289 u64 start, u64 end, u64 locked_end,
290 u64 inline_limit, u64 *hint_byte)
367{ 291{
368 u64 extent_end = 0; 292 u64 extent_end = 0;
369 u64 locked_end = end;
370 u64 search_start = start; 293 u64 search_start = start;
371 u64 leaf_start; 294 u64 leaf_start;
372 u64 ram_bytes = 0; 295 u64 ram_bytes = 0;
373 u64 orig_parent = 0; 296 u64 orig_parent = 0;
374 u64 disk_bytenr = 0; 297 u64 disk_bytenr = 0;
298 u64 orig_locked_end = locked_end;
375 u8 compression; 299 u8 compression;
376 u8 encryption; 300 u8 encryption;
377 u16 other_encoding = 0; 301 u16 other_encoding = 0;
@@ -684,11 +608,10 @@ next_slot:
684 } 608 }
685out: 609out:
686 btrfs_free_path(path); 610 btrfs_free_path(path);
687 if (locked_end > end) { 611 if (locked_end > orig_locked_end) {
688 unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, 612 unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end,
689 GFP_NOFS); 613 locked_end - 1, GFP_NOFS);
690 } 614 }
691 btrfs_check_file(root, inode);
692 return ret; 615 return ret;
693} 616}
694 617
@@ -830,7 +753,7 @@ again:
830 753
831 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 754 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
832 BUG_ON(ret); 755 BUG_ON(ret);
833 goto done; 756 goto release;
834 } else if (split == start) { 757 } else if (split == start) {
835 if (locked_end < extent_end) { 758 if (locked_end < extent_end) {
836 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 759 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
@@ -926,6 +849,8 @@ again:
926 } 849 }
927done: 850done:
928 btrfs_mark_buffer_dirty(leaf); 851 btrfs_mark_buffer_dirty(leaf);
852
853release:
929 btrfs_release_path(root, path); 854 btrfs_release_path(root, path);
930 if (split_end && split == start) { 855 if (split_end && split == start) {
931 split = end; 856 split = end;
@@ -1131,7 +1056,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1131 if (will_write) { 1056 if (will_write) {
1132 btrfs_fdatawrite_range(inode->i_mapping, pos, 1057 btrfs_fdatawrite_range(inode->i_mapping, pos,
1133 pos + write_bytes - 1, 1058 pos + write_bytes - 1,
1134 WB_SYNC_NONE); 1059 WB_SYNC_ALL);
1135 } else { 1060 } else {
1136 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1061 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
1137 num_pages); 1062 num_pages);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 768b9523662d..0bc93657b460 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -332,13 +332,17 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
332 printk(KERN_ERR "couldn't find space %llu to free\n", 332 printk(KERN_ERR "couldn't find space %llu to free\n",
333 (unsigned long long)offset); 333 (unsigned long long)offset);
334 printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n", 334 printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n",
335 block_group->cached, block_group->key.objectid, 335 block_group->cached,
336 block_group->key.offset); 336 (unsigned long long)block_group->key.objectid,
337 (unsigned long long)block_group->key.offset);
337 btrfs_dump_free_space(block_group, bytes); 338 btrfs_dump_free_space(block_group, bytes);
338 } else if (info) { 339 } else if (info) {
339 printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, " 340 printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, "
340 "but wanted offset=%llu bytes=%llu\n", 341 "but wanted offset=%llu bytes=%llu\n",
341 info->offset, info->bytes, offset, bytes); 342 (unsigned long long)info->offset,
343 (unsigned long long)info->bytes,
344 (unsigned long long)offset,
345 (unsigned long long)bytes);
342 } 346 }
343 WARN_ON(1); 347 WARN_ON(1);
344 } 348 }
@@ -357,8 +361,9 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
357 info = rb_entry(n, struct btrfs_free_space, offset_index); 361 info = rb_entry(n, struct btrfs_free_space, offset_index);
358 if (info->bytes >= bytes) 362 if (info->bytes >= bytes)
359 count++; 363 count++;
360 printk(KERN_ERR "entry offset %llu, bytes %llu\n", info->offset, 364 printk(KERN_ERR "entry offset %llu, bytes %llu\n",
361 info->bytes); 365 (unsigned long long)info->offset,
366 (unsigned long long)info->bytes);
362 } 367 }
363 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 368 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
364 "\n", count); 369 "\n", count);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index cc7334d833c9..9abbced1123d 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -79,7 +79,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans,
79 } 79 }
80 path = btrfs_alloc_path(); 80 path = btrfs_alloc_path();
81 BUG_ON(!path); 81 BUG_ON(!path);
82 search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); 82 search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID);
83 search_key.objectid = search_start; 83 search_key.objectid = search_start;
84 search_key.type = 0; 84 search_key.type = 0;
85 search_key.offset = 0; 85 search_key.offset = 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a0d1dd492a58..90c23eb28829 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -70,7 +70,6 @@ static struct extent_io_ops btrfs_extent_io_ops;
70static struct kmem_cache *btrfs_inode_cachep; 70static struct kmem_cache *btrfs_inode_cachep;
71struct kmem_cache *btrfs_trans_handle_cachep; 71struct kmem_cache *btrfs_trans_handle_cachep;
72struct kmem_cache *btrfs_transaction_cachep; 72struct kmem_cache *btrfs_transaction_cachep;
73struct kmem_cache *btrfs_bit_radix_cachep;
74struct kmem_cache *btrfs_path_cachep; 73struct kmem_cache *btrfs_path_cachep;
75 74
76#define S_SHIFT 12 75#define S_SHIFT 12
@@ -234,7 +233,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
234 } 233 }
235 234
236 ret = btrfs_drop_extents(trans, root, inode, start, 235 ret = btrfs_drop_extents(trans, root, inode, start,
237 aligned_end, start, &hint_byte); 236 aligned_end, aligned_end, start, &hint_byte);
238 BUG_ON(ret); 237 BUG_ON(ret);
239 238
240 if (isize > actual_end) 239 if (isize > actual_end)
@@ -1439,6 +1438,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1439 struct inode *inode, u64 file_pos, 1438 struct inode *inode, u64 file_pos,
1440 u64 disk_bytenr, u64 disk_num_bytes, 1439 u64 disk_bytenr, u64 disk_num_bytes,
1441 u64 num_bytes, u64 ram_bytes, 1440 u64 num_bytes, u64 ram_bytes,
1441 u64 locked_end,
1442 u8 compression, u8 encryption, 1442 u8 compression, u8 encryption,
1443 u16 other_encoding, int extent_type) 1443 u16 other_encoding, int extent_type)
1444{ 1444{
@@ -1455,7 +1455,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1455 1455
1456 path->leave_spinning = 1; 1456 path->leave_spinning = 1;
1457 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1457 ret = btrfs_drop_extents(trans, root, inode, file_pos,
1458 file_pos + num_bytes, file_pos, &hint); 1458 file_pos + num_bytes, locked_end,
1459 file_pos, &hint);
1459 BUG_ON(ret); 1460 BUG_ON(ret);
1460 1461
1461 ins.objectid = inode->i_ino; 1462 ins.objectid = inode->i_ino;
@@ -1590,6 +1591,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1590 ordered_extent->disk_len, 1591 ordered_extent->disk_len,
1591 ordered_extent->len, 1592 ordered_extent->len,
1592 ordered_extent->len, 1593 ordered_extent->len,
1594 ordered_extent->file_offset +
1595 ordered_extent->len,
1593 compressed, 0, 0, 1596 compressed, 0, 0,
1594 BTRFS_FILE_EXTENT_REG); 1597 BTRFS_FILE_EXTENT_REG);
1595 BUG_ON(ret); 1598 BUG_ON(ret);
@@ -1819,10 +1822,12 @@ good:
1819 return 0; 1822 return 0;
1820 1823
1821zeroit: 1824zeroit:
1822 printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " 1825 if (printk_ratelimit()) {
1823 "private %llu\n", page->mapping->host->i_ino, 1826 printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u "
1824 (unsigned long long)start, csum, 1827 "private %llu\n", page->mapping->host->i_ino,
1825 (unsigned long long)private); 1828 (unsigned long long)start, csum,
1829 (unsigned long long)private);
1830 }
1826 memset(kaddr + offset, 1, end - start + 1); 1831 memset(kaddr + offset, 1, end - start + 1);
1827 flush_dcache_page(page); 1832 flush_dcache_page(page);
1828 kunmap_atomic(kaddr, KM_USER0); 1833 kunmap_atomic(kaddr, KM_USER0);
@@ -2011,6 +2016,57 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2011} 2016}
2012 2017
2013/* 2018/*
2019 * very simple check to peek ahead in the leaf looking for xattrs. If we
2020 * don't find any xattrs, we know there can't be any acls.
2021 *
2022 * slot is the slot the inode is in, objectid is the objectid of the inode
2023 */
2024static noinline int acls_after_inode_item(struct extent_buffer *leaf,
2025 int slot, u64 objectid)
2026{
2027 u32 nritems = btrfs_header_nritems(leaf);
2028 struct btrfs_key found_key;
2029 int scanned = 0;
2030
2031 slot++;
2032 while (slot < nritems) {
2033 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2034
2035 /* we found a different objectid, there must not be acls */
2036 if (found_key.objectid != objectid)
2037 return 0;
2038
2039 /* we found an xattr, assume we've got an acl */
2040 if (found_key.type == BTRFS_XATTR_ITEM_KEY)
2041 return 1;
2042
2043 /*
2044 * we found a key greater than an xattr key, there can't
2045 * be any acls later on
2046 */
2047 if (found_key.type > BTRFS_XATTR_ITEM_KEY)
2048 return 0;
2049
2050 slot++;
2051 scanned++;
2052
2053 /*
2054 * it goes inode, inode backrefs, xattrs, extents,
2055 * so if there are a ton of hard links to an inode there can
2056 * be a lot of backrefs. Don't waste time searching too hard,
2057 * this is just an optimization
2058 */
2059 if (scanned >= 8)
2060 break;
2061 }
2062 /* we hit the end of the leaf before we found an xattr or
2063 * something larger than an xattr. We have to assume the inode
2064 * has acls
2065 */
2066 return 1;
2067}
2068
2069/*
2014 * read an inode from the btree into the in-memory inode 2070 * read an inode from the btree into the in-memory inode
2015 */ 2071 */
2016void btrfs_read_locked_inode(struct inode *inode) 2072void btrfs_read_locked_inode(struct inode *inode)
@@ -2021,6 +2077,7 @@ void btrfs_read_locked_inode(struct inode *inode)
2021 struct btrfs_timespec *tspec; 2077 struct btrfs_timespec *tspec;
2022 struct btrfs_root *root = BTRFS_I(inode)->root; 2078 struct btrfs_root *root = BTRFS_I(inode)->root;
2023 struct btrfs_key location; 2079 struct btrfs_key location;
2080 int maybe_acls;
2024 u64 alloc_group_block; 2081 u64 alloc_group_block;
2025 u32 rdev; 2082 u32 rdev;
2026 int ret; 2083 int ret;
@@ -2067,6 +2124,16 @@ void btrfs_read_locked_inode(struct inode *inode)
2067 2124
2068 alloc_group_block = btrfs_inode_block_group(leaf, inode_item); 2125 alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
2069 2126
2127 /*
2128 * try to precache a NULL acl entry for files that don't have
2129 * any xattrs or acls
2130 */
2131 maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino);
2132 if (!maybe_acls) {
2133 BTRFS_I(inode)->i_acl = NULL;
2134 BTRFS_I(inode)->i_default_acl = NULL;
2135 }
2136
2070 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, 2137 BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
2071 alloc_group_block, 0); 2138 alloc_group_block, 0);
2072 btrfs_free_path(path); 2139 btrfs_free_path(path);
@@ -2877,6 +2944,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
2877 err = btrfs_drop_extents(trans, root, inode, 2944 err = btrfs_drop_extents(trans, root, inode,
2878 cur_offset, 2945 cur_offset,
2879 cur_offset + hole_size, 2946 cur_offset + hole_size,
2947 block_end,
2880 cur_offset, &hint_byte); 2948 cur_offset, &hint_byte);
2881 if (err) 2949 if (err)
2882 break; 2950 break;
@@ -3041,8 +3109,8 @@ static noinline void init_btrfs_i(struct inode *inode)
3041{ 3109{
3042 struct btrfs_inode *bi = BTRFS_I(inode); 3110 struct btrfs_inode *bi = BTRFS_I(inode);
3043 3111
3044 bi->i_acl = NULL; 3112 bi->i_acl = BTRFS_ACL_NOT_CACHED;
3045 bi->i_default_acl = NULL; 3113 bi->i_default_acl = BTRFS_ACL_NOT_CACHED;
3046 3114
3047 bi->generation = 0; 3115 bi->generation = 0;
3048 bi->sequence = 0; 3116 bi->sequence = 0;
@@ -4634,47 +4702,36 @@ void btrfs_destroy_cachep(void)
4634 kmem_cache_destroy(btrfs_trans_handle_cachep); 4702 kmem_cache_destroy(btrfs_trans_handle_cachep);
4635 if (btrfs_transaction_cachep) 4703 if (btrfs_transaction_cachep)
4636 kmem_cache_destroy(btrfs_transaction_cachep); 4704 kmem_cache_destroy(btrfs_transaction_cachep);
4637 if (btrfs_bit_radix_cachep)
4638 kmem_cache_destroy(btrfs_bit_radix_cachep);
4639 if (btrfs_path_cachep) 4705 if (btrfs_path_cachep)
4640 kmem_cache_destroy(btrfs_path_cachep); 4706 kmem_cache_destroy(btrfs_path_cachep);
4641} 4707}
4642 4708
4643struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
4644 unsigned long extra_flags,
4645 void (*ctor)(void *))
4646{
4647 return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
4648 SLAB_MEM_SPREAD | extra_flags), ctor);
4649}
4650
4651int btrfs_init_cachep(void) 4709int btrfs_init_cachep(void)
4652{ 4710{
4653 btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache", 4711 btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
4654 sizeof(struct btrfs_inode), 4712 sizeof(struct btrfs_inode), 0,
4655 0, init_once); 4713 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once);
4656 if (!btrfs_inode_cachep) 4714 if (!btrfs_inode_cachep)
4657 goto fail; 4715 goto fail;
4658 btrfs_trans_handle_cachep = 4716
4659 btrfs_cache_create("btrfs_trans_handle_cache", 4717 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
4660 sizeof(struct btrfs_trans_handle), 4718 sizeof(struct btrfs_trans_handle), 0,
4661 0, NULL); 4719 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
4662 if (!btrfs_trans_handle_cachep) 4720 if (!btrfs_trans_handle_cachep)
4663 goto fail; 4721 goto fail;
4664 btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache", 4722
4665 sizeof(struct btrfs_transaction), 4723 btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
4666 0, NULL); 4724 sizeof(struct btrfs_transaction), 0,
4725 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
4667 if (!btrfs_transaction_cachep) 4726 if (!btrfs_transaction_cachep)
4668 goto fail; 4727 goto fail;
4669 btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache", 4728
4670 sizeof(struct btrfs_path), 4729 btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
4671 0, NULL); 4730 sizeof(struct btrfs_path), 0,
4731 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
4672 if (!btrfs_path_cachep) 4732 if (!btrfs_path_cachep)
4673 goto fail; 4733 goto fail;
4674 btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256, 4734
4675 SLAB_DESTROY_BY_RCU, NULL);
4676 if (!btrfs_bit_radix_cachep)
4677 goto fail;
4678 return 0; 4735 return 0;
4679fail: 4736fail:
4680 btrfs_destroy_cachep(); 4737 btrfs_destroy_cachep();
@@ -4970,10 +5027,10 @@ out_fail:
4970 return err; 5027 return err;
4971} 5028}
4972 5029
4973static int prealloc_file_range(struct inode *inode, u64 start, u64 end, 5030static int prealloc_file_range(struct btrfs_trans_handle *trans,
4974 u64 alloc_hint, int mode) 5031 struct inode *inode, u64 start, u64 end,
5032 u64 locked_end, u64 alloc_hint, int mode)
4975{ 5033{
4976 struct btrfs_trans_handle *trans;
4977 struct btrfs_root *root = BTRFS_I(inode)->root; 5034 struct btrfs_root *root = BTRFS_I(inode)->root;
4978 struct btrfs_key ins; 5035 struct btrfs_key ins;
4979 u64 alloc_size; 5036 u64 alloc_size;
@@ -4981,10 +5038,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
4981 u64 num_bytes = end - start; 5038 u64 num_bytes = end - start;
4982 int ret = 0; 5039 int ret = 0;
4983 5040
4984 trans = btrfs_join_transaction(root, 1);
4985 BUG_ON(!trans);
4986 btrfs_set_trans_block_group(trans, inode);
4987
4988 while (num_bytes > 0) { 5041 while (num_bytes > 0) {
4989 alloc_size = min(num_bytes, root->fs_info->max_extent); 5042 alloc_size = min(num_bytes, root->fs_info->max_extent);
4990 ret = btrfs_reserve_extent(trans, root, alloc_size, 5043 ret = btrfs_reserve_extent(trans, root, alloc_size,
@@ -4997,7 +5050,8 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
4997 ret = insert_reserved_file_extent(trans, inode, 5050 ret = insert_reserved_file_extent(trans, inode,
4998 cur_offset, ins.objectid, 5051 cur_offset, ins.objectid,
4999 ins.offset, ins.offset, 5052 ins.offset, ins.offset,
5000 ins.offset, 0, 0, 0, 5053 ins.offset, locked_end,
5054 0, 0, 0,
5001 BTRFS_FILE_EXTENT_PREALLOC); 5055 BTRFS_FILE_EXTENT_PREALLOC);
5002 BUG_ON(ret); 5056 BUG_ON(ret);
5003 num_bytes -= ins.offset; 5057 num_bytes -= ins.offset;
@@ -5015,7 +5069,6 @@ out:
5015 BUG_ON(ret); 5069 BUG_ON(ret);
5016 } 5070 }
5017 5071
5018 btrfs_end_transaction(trans, root);
5019 return ret; 5072 return ret;
5020} 5073}
5021 5074
@@ -5027,13 +5080,21 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5027 u64 alloc_start; 5080 u64 alloc_start;
5028 u64 alloc_end; 5081 u64 alloc_end;
5029 u64 alloc_hint = 0; 5082 u64 alloc_hint = 0;
5083 u64 locked_end;
5030 u64 mask = BTRFS_I(inode)->root->sectorsize - 1; 5084 u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
5031 struct extent_map *em; 5085 struct extent_map *em;
5086 struct btrfs_trans_handle *trans;
5032 int ret; 5087 int ret;
5033 5088
5034 alloc_start = offset & ~mask; 5089 alloc_start = offset & ~mask;
5035 alloc_end = (offset + len + mask) & ~mask; 5090 alloc_end = (offset + len + mask) & ~mask;
5036 5091
5092 /*
5093 * wait for ordered IO before we have any locks. We'll loop again
5094 * below with the locks held.
5095 */
5096 btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
5097
5037 mutex_lock(&inode->i_mutex); 5098 mutex_lock(&inode->i_mutex);
5038 if (alloc_start > inode->i_size) { 5099 if (alloc_start > inode->i_size) {
5039 ret = btrfs_cont_expand(inode, alloc_start); 5100 ret = btrfs_cont_expand(inode, alloc_start);
@@ -5041,10 +5102,21 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5041 goto out; 5102 goto out;
5042 } 5103 }
5043 5104
5105 locked_end = alloc_end - 1;
5044 while (1) { 5106 while (1) {
5045 struct btrfs_ordered_extent *ordered; 5107 struct btrfs_ordered_extent *ordered;
5046 lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, 5108
5047 alloc_end - 1, GFP_NOFS); 5109 trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
5110 if (!trans) {
5111 ret = -EIO;
5112 goto out;
5113 }
5114
5115 /* the extent lock is ordered inside the running
5116 * transaction
5117 */
5118 lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5119 GFP_NOFS);
5048 ordered = btrfs_lookup_first_ordered_extent(inode, 5120 ordered = btrfs_lookup_first_ordered_extent(inode,
5049 alloc_end - 1); 5121 alloc_end - 1);
5050 if (ordered && 5122 if (ordered &&
@@ -5052,7 +5124,13 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5052 ordered->file_offset < alloc_end) { 5124 ordered->file_offset < alloc_end) {
5053 btrfs_put_ordered_extent(ordered); 5125 btrfs_put_ordered_extent(ordered);
5054 unlock_extent(&BTRFS_I(inode)->io_tree, 5126 unlock_extent(&BTRFS_I(inode)->io_tree,
5055 alloc_start, alloc_end - 1, GFP_NOFS); 5127 alloc_start, locked_end, GFP_NOFS);
5128 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5129
5130 /*
5131 * we can't wait on the range with the transaction
5132 * running or with the extent lock held
5133 */
5056 btrfs_wait_ordered_range(inode, alloc_start, 5134 btrfs_wait_ordered_range(inode, alloc_start,
5057 alloc_end - alloc_start); 5135 alloc_end - alloc_start);
5058 } else { 5136 } else {
@@ -5070,8 +5148,9 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5070 last_byte = min(extent_map_end(em), alloc_end); 5148 last_byte = min(extent_map_end(em), alloc_end);
5071 last_byte = (last_byte + mask) & ~mask; 5149 last_byte = (last_byte + mask) & ~mask;
5072 if (em->block_start == EXTENT_MAP_HOLE) { 5150 if (em->block_start == EXTENT_MAP_HOLE) {
5073 ret = prealloc_file_range(inode, cur_offset, 5151 ret = prealloc_file_range(trans, inode, cur_offset,
5074 last_byte, alloc_hint, mode); 5152 last_byte, locked_end + 1,
5153 alloc_hint, mode);
5075 if (ret < 0) { 5154 if (ret < 0) {
5076 free_extent_map(em); 5155 free_extent_map(em);
5077 break; 5156 break;
@@ -5087,8 +5166,10 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5087 break; 5166 break;
5088 } 5167 }
5089 } 5168 }
5090 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, 5169 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5091 GFP_NOFS); 5170 GFP_NOFS);
5171
5172 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
5092out: 5173out:
5093 mutex_unlock(&inode->i_mutex); 5174 mutex_unlock(&inode->i_mutex);
5094 return ret; 5175 return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 7594bec1be10..5e94ea6e1cbe 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -461,15 +461,9 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
461 if (!capable(CAP_SYS_ADMIN)) 461 if (!capable(CAP_SYS_ADMIN))
462 return -EPERM; 462 return -EPERM;
463 463
464 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); 464 vol_args = memdup_user(arg, sizeof(*vol_args));
465 465 if (IS_ERR(vol_args))
466 if (!vol_args) 466 return PTR_ERR(vol_args);
467 return -ENOMEM;
468
469 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
470 ret = -EFAULT;
471 goto out;
472 }
473 467
474 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 468 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
475 namelen = strlen(vol_args->name); 469 namelen = strlen(vol_args->name);
@@ -483,11 +477,13 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
483 *devstr = '\0'; 477 *devstr = '\0';
484 devstr = vol_args->name; 478 devstr = vol_args->name;
485 devid = simple_strtoull(devstr, &end, 10); 479 devid = simple_strtoull(devstr, &end, 10);
486 printk(KERN_INFO "resizing devid %llu\n", devid); 480 printk(KERN_INFO "resizing devid %llu\n",
481 (unsigned long long)devid);
487 } 482 }
488 device = btrfs_find_device(root, devid, NULL, NULL); 483 device = btrfs_find_device(root, devid, NULL, NULL);
489 if (!device) { 484 if (!device) {
490 printk(KERN_INFO "resizer unable to find device %llu\n", devid); 485 printk(KERN_INFO "resizer unable to find device %llu\n",
486 (unsigned long long)devid);
491 ret = -EINVAL; 487 ret = -EINVAL;
492 goto out_unlock; 488 goto out_unlock;
493 } 489 }
@@ -545,7 +541,6 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
545 541
546out_unlock: 542out_unlock:
547 mutex_unlock(&root->fs_info->volume_mutex); 543 mutex_unlock(&root->fs_info->volume_mutex);
548out:
549 kfree(vol_args); 544 kfree(vol_args);
550 return ret; 545 return ret;
551} 546}
@@ -565,15 +560,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
565 if (root->fs_info->sb->s_flags & MS_RDONLY) 560 if (root->fs_info->sb->s_flags & MS_RDONLY)
566 return -EROFS; 561 return -EROFS;
567 562
568 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); 563 vol_args = memdup_user(arg, sizeof(*vol_args));
569 564 if (IS_ERR(vol_args))
570 if (!vol_args) 565 return PTR_ERR(vol_args);
571 return -ENOMEM;
572
573 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
574 ret = -EFAULT;
575 goto out;
576 }
577 566
578 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 567 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
579 namelen = strlen(vol_args->name); 568 namelen = strlen(vol_args->name);
@@ -675,19 +664,13 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
675 if (!capable(CAP_SYS_ADMIN)) 664 if (!capable(CAP_SYS_ADMIN))
676 return -EPERM; 665 return -EPERM;
677 666
678 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); 667 vol_args = memdup_user(arg, sizeof(*vol_args));
668 if (IS_ERR(vol_args))
669 return PTR_ERR(vol_args);
679 670
680 if (!vol_args)
681 return -ENOMEM;
682
683 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
684 ret = -EFAULT;
685 goto out;
686 }
687 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 671 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
688 ret = btrfs_init_new_device(root, vol_args->name); 672 ret = btrfs_init_new_device(root, vol_args->name);
689 673
690out:
691 kfree(vol_args); 674 kfree(vol_args);
692 return ret; 675 return ret;
693} 676}
@@ -703,19 +686,13 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
703 if (root->fs_info->sb->s_flags & MS_RDONLY) 686 if (root->fs_info->sb->s_flags & MS_RDONLY)
704 return -EROFS; 687 return -EROFS;
705 688
706 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); 689 vol_args = memdup_user(arg, sizeof(*vol_args));
690 if (IS_ERR(vol_args))
691 return PTR_ERR(vol_args);
707 692
708 if (!vol_args)
709 return -ENOMEM;
710
711 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
712 ret = -EFAULT;
713 goto out;
714 }
715 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 693 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
716 ret = btrfs_rm_device(root, vol_args->name); 694 ret = btrfs_rm_device(root, vol_args->name);
717 695
718out:
719 kfree(vol_args); 696 kfree(vol_args);
720 return ret; 697 return ret;
721} 698}
@@ -830,7 +807,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
830 BUG_ON(!trans); 807 BUG_ON(!trans);
831 808
832 /* punch hole in destination first */ 809 /* punch hole in destination first */
833 btrfs_drop_extents(trans, root, inode, off, off+len, 0, &hint_byte); 810 btrfs_drop_extents(trans, root, inode, off, off + len,
811 off + len, 0, &hint_byte);
834 812
835 /* clone data */ 813 /* clone data */
836 key.objectid = src->i_ino; 814 key.objectid = src->i_ino;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 53c87b197d70..d6f0806c682f 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -489,7 +489,7 @@ again:
489 /* start IO across the range first to instantiate any delalloc 489 /* start IO across the range first to instantiate any delalloc
490 * extents 490 * extents
491 */ 491 */
492 btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); 492 btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL);
493 493
494 /* The compression code will leave pages locked but return from 494 /* The compression code will leave pages locked but return from
495 * writepage without setting the page writeback. Starting again 495 * writepage without setting the page writeback. Starting again
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9744af9d71e9..3536bdb2d7cb 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -68,7 +68,7 @@ enum {
68 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, 68 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
69 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, 69 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
70 Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog, 70 Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog,
71 Opt_flushoncommit, Opt_err, 71 Opt_ratio, Opt_flushoncommit, Opt_err,
72}; 72};
73 73
74static match_table_t tokens = { 74static match_table_t tokens = {
@@ -87,6 +87,7 @@ static match_table_t tokens = {
87 {Opt_noacl, "noacl"}, 87 {Opt_noacl, "noacl"},
88 {Opt_notreelog, "notreelog"}, 88 {Opt_notreelog, "notreelog"},
89 {Opt_flushoncommit, "flushoncommit"}, 89 {Opt_flushoncommit, "flushoncommit"},
90 {Opt_ratio, "metadata_ratio=%d"},
90 {Opt_err, NULL}, 91 {Opt_err, NULL},
91}; 92};
92 93
@@ -195,7 +196,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
195 info->max_extent = max_t(u64, 196 info->max_extent = max_t(u64,
196 info->max_extent, root->sectorsize); 197 info->max_extent, root->sectorsize);
197 printk(KERN_INFO "btrfs: max_extent at %llu\n", 198 printk(KERN_INFO "btrfs: max_extent at %llu\n",
198 info->max_extent); 199 (unsigned long long)info->max_extent);
199 } 200 }
200 break; 201 break;
201 case Opt_max_inline: 202 case Opt_max_inline:
@@ -210,7 +211,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
210 root->sectorsize); 211 root->sectorsize);
211 } 212 }
212 printk(KERN_INFO "btrfs: max_inline at %llu\n", 213 printk(KERN_INFO "btrfs: max_inline at %llu\n",
213 info->max_inline); 214 (unsigned long long)info->max_inline);
214 } 215 }
215 break; 216 break;
216 case Opt_alloc_start: 217 case Opt_alloc_start:
@@ -220,7 +221,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
220 kfree(num); 221 kfree(num);
221 printk(KERN_INFO 222 printk(KERN_INFO
222 "btrfs: allocations start at %llu\n", 223 "btrfs: allocations start at %llu\n",
223 info->alloc_start); 224 (unsigned long long)info->alloc_start);
224 } 225 }
225 break; 226 break;
226 case Opt_noacl: 227 case Opt_noacl:
@@ -234,6 +235,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
234 printk(KERN_INFO "btrfs: turning on flush-on-commit\n"); 235 printk(KERN_INFO "btrfs: turning on flush-on-commit\n");
235 btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); 236 btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT);
236 break; 237 break;
238 case Opt_ratio:
239 intarg = 0;
240 match_int(&args[0], &intarg);
241 if (intarg) {
242 info->metadata_ratio = intarg;
243 printk(KERN_INFO "btrfs: metadata ratio %d\n",
244 info->metadata_ratio);
245 }
246 break;
237 default: 247 default:
238 break; 248 break;
239 } 249 }
@@ -410,11 +420,14 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
410 if (btrfs_test_opt(root, NOBARRIER)) 420 if (btrfs_test_opt(root, NOBARRIER))
411 seq_puts(seq, ",nobarrier"); 421 seq_puts(seq, ",nobarrier");
412 if (info->max_extent != (u64)-1) 422 if (info->max_extent != (u64)-1)
413 seq_printf(seq, ",max_extent=%llu", info->max_extent); 423 seq_printf(seq, ",max_extent=%llu",
424 (unsigned long long)info->max_extent);
414 if (info->max_inline != 8192 * 1024) 425 if (info->max_inline != 8192 * 1024)
415 seq_printf(seq, ",max_inline=%llu", info->max_inline); 426 seq_printf(seq, ",max_inline=%llu",
427 (unsigned long long)info->max_inline);
416 if (info->alloc_start != 0) 428 if (info->alloc_start != 0)
417 seq_printf(seq, ",alloc_start=%llu", info->alloc_start); 429 seq_printf(seq, ",alloc_start=%llu",
430 (unsigned long long)info->alloc_start);
418 if (info->thread_pool_size != min_t(unsigned long, 431 if (info->thread_pool_size != min_t(unsigned long,
419 num_online_cpus() + 2, 8)) 432 num_online_cpus() + 2, 8))
420 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); 433 seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
@@ -635,14 +648,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
635 if (!capable(CAP_SYS_ADMIN)) 648 if (!capable(CAP_SYS_ADMIN))
636 return -EPERM; 649 return -EPERM;
637 650
638 vol = kmalloc(sizeof(*vol), GFP_KERNEL); 651 vol = memdup_user((void __user *)arg, sizeof(*vol));
639 if (!vol) 652 if (IS_ERR(vol))
640 return -ENOMEM; 653 return PTR_ERR(vol);
641
642 if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) {
643 ret = -EFAULT;
644 goto out;
645 }
646 654
647 switch (cmd) { 655 switch (cmd) {
648 case BTRFS_IOC_SCAN_DEV: 656 case BTRFS_IOC_SCAN_DEV:
@@ -650,7 +658,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
650 &btrfs_fs_type, &fs_devices); 658 &btrfs_fs_type, &fs_devices);
651 break; 659 break;
652 } 660 }
653out: 661
654 kfree(vol); 662 kfree(vol);
655 return ret; 663 return ret;
656} 664}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2869b3361eb6..01b143605ec1 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -687,7 +687,13 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
687 prepare_to_wait(&info->transaction_wait, &wait, 687 prepare_to_wait(&info->transaction_wait, &wait,
688 TASK_UNINTERRUPTIBLE); 688 TASK_UNINTERRUPTIBLE);
689 mutex_unlock(&info->trans_mutex); 689 mutex_unlock(&info->trans_mutex);
690
691 atomic_dec(&info->throttles);
692 wake_up(&info->transaction_throttle);
693
690 schedule(); 694 schedule();
695
696 atomic_inc(&info->throttles);
691 mutex_lock(&info->trans_mutex); 697 mutex_lock(&info->trans_mutex);
692 finish_wait(&info->transaction_wait, &wait); 698 finish_wait(&info->transaction_wait, &wait);
693 } 699 }
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 25f20ea11f27..db5e212e8445 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -536,7 +536,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
536 saved_nbytes = inode_get_bytes(inode); 536 saved_nbytes = inode_get_bytes(inode);
537 /* drop any overlapping extents */ 537 /* drop any overlapping extents */
538 ret = btrfs_drop_extents(trans, root, inode, 538 ret = btrfs_drop_extents(trans, root, inode,
539 start, extent_end, start, &alloc_hint); 539 start, extent_end, extent_end, start, &alloc_hint);
540 BUG_ON(ret); 540 BUG_ON(ret);
541 541
542 if (found_type == BTRFS_FILE_EXTENT_REG || 542 if (found_type == BTRFS_FILE_EXTENT_REG ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e0913e469728..5f01dad4b696 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
125 return NULL; 125 return NULL;
126} 126}
127 127
128static void requeue_list(struct btrfs_pending_bios *pending_bios,
129 struct bio *head, struct bio *tail)
130{
131
132 struct bio *old_head;
133
134 old_head = pending_bios->head;
135 pending_bios->head = head;
136 if (pending_bios->tail)
137 tail->bi_next = old_head;
138 else
139 pending_bios->tail = tail;
140}
141
128/* 142/*
129 * we try to collect pending bios for a device so we don't get a large 143 * we try to collect pending bios for a device so we don't get a large
130 * number of procs sending bios down to the same device. This greatly 144 * number of procs sending bios down to the same device. This greatly
@@ -141,10 +155,12 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
141 struct bio *pending; 155 struct bio *pending;
142 struct backing_dev_info *bdi; 156 struct backing_dev_info *bdi;
143 struct btrfs_fs_info *fs_info; 157 struct btrfs_fs_info *fs_info;
158 struct btrfs_pending_bios *pending_bios;
144 struct bio *tail; 159 struct bio *tail;
145 struct bio *cur; 160 struct bio *cur;
146 int again = 0; 161 int again = 0;
147 unsigned long num_run = 0; 162 unsigned long num_run;
163 unsigned long num_sync_run;
148 unsigned long limit; 164 unsigned long limit;
149 unsigned long last_waited = 0; 165 unsigned long last_waited = 0;
150 166
@@ -153,20 +169,30 @@ static noinline int run_scheduled_bios(struct btrfs_device *device)
153 limit = btrfs_async_submit_limit(fs_info); 169 limit = btrfs_async_submit_limit(fs_info);
154 limit = limit * 2 / 3; 170 limit = limit * 2 / 3;
155 171
172 /* we want to make sure that every time we switch from the sync
173 * list to the normal list, we unplug
174 */
175 num_sync_run = 0;
176
156loop: 177loop:
157 spin_lock(&device->io_lock); 178 spin_lock(&device->io_lock);
179 num_run = 0;
158 180
159loop_lock: 181loop_lock:
182
160 /* take all the bios off the list at once and process them 183 /* take all the bios off the list at once and process them
161 * later on (without the lock held). But, remember the 184 * later on (without the lock held). But, remember the
162 * tail and other pointers so the bios can be properly reinserted 185 * tail and other pointers so the bios can be properly reinserted
163 * into the list if we hit congestion 186 * into the list if we hit congestion
164 */ 187 */
165 pending = device->pending_bios; 188 if (device->pending_sync_bios.head)
166 tail = device->pending_bio_tail; 189 pending_bios = &device->pending_sync_bios;
190 else
191 pending_bios = &device->pending_bios;
192
193 pending = pending_bios->head;
194 tail = pending_bios->tail;
167 WARN_ON(pending && !tail); 195 WARN_ON(pending && !tail);
168 device->pending_bios = NULL;
169 device->pending_bio_tail = NULL;
170 196
171 /* 197 /*
172 * if pending was null this time around, no bios need processing 198 * if pending was null this time around, no bios need processing
@@ -176,16 +202,41 @@ loop_lock:
176 * device->running_pending is used to synchronize with the 202 * device->running_pending is used to synchronize with the
177 * schedule_bio code. 203 * schedule_bio code.
178 */ 204 */
179 if (pending) { 205 if (device->pending_sync_bios.head == NULL &&
180 again = 1; 206 device->pending_bios.head == NULL) {
181 device->running_pending = 1;
182 } else {
183 again = 0; 207 again = 0;
184 device->running_pending = 0; 208 device->running_pending = 0;
209 } else {
210 again = 1;
211 device->running_pending = 1;
185 } 212 }
213
214 pending_bios->head = NULL;
215 pending_bios->tail = NULL;
216
186 spin_unlock(&device->io_lock); 217 spin_unlock(&device->io_lock);
187 218
219 /*
220 * if we're doing the regular priority list, make sure we unplug
221 * for any high prio bios we've sent down
222 */
223 if (pending_bios == &device->pending_bios && num_sync_run > 0) {
224 num_sync_run = 0;
225 blk_run_backing_dev(bdi, NULL);
226 }
227
188 while (pending) { 228 while (pending) {
229
230 rmb();
231 if (pending_bios != &device->pending_sync_bios &&
232 device->pending_sync_bios.head &&
233 num_run > 16) {
234 cond_resched();
235 spin_lock(&device->io_lock);
236 requeue_list(pending_bios, pending, tail);
237 goto loop_lock;
238 }
239
189 cur = pending; 240 cur = pending;
190 pending = pending->bi_next; 241 pending = pending->bi_next;
191 cur->bi_next = NULL; 242 cur->bi_next = NULL;
@@ -196,10 +247,18 @@ loop_lock:
196 wake_up(&fs_info->async_submit_wait); 247 wake_up(&fs_info->async_submit_wait);
197 248
198 BUG_ON(atomic_read(&cur->bi_cnt) == 0); 249 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
199 bio_get(cur);
200 submit_bio(cur->bi_rw, cur); 250 submit_bio(cur->bi_rw, cur);
201 bio_put(cur);
202 num_run++; 251 num_run++;
252 if (bio_sync(cur))
253 num_sync_run++;
254
255 if (need_resched()) {
256 if (num_sync_run) {
257 blk_run_backing_dev(bdi, NULL);
258 num_sync_run = 0;
259 }
260 cond_resched();
261 }
203 262
204 /* 263 /*
205 * we made progress, there is more work to do and the bdi 264 * we made progress, there is more work to do and the bdi
@@ -208,7 +267,6 @@ loop_lock:
208 */ 267 */
209 if (pending && bdi_write_congested(bdi) && num_run > 16 && 268 if (pending && bdi_write_congested(bdi) && num_run > 16 &&
210 fs_info->fs_devices->open_devices > 1) { 269 fs_info->fs_devices->open_devices > 1) {
211 struct bio *old_head;
212 struct io_context *ioc; 270 struct io_context *ioc;
213 271
214 ioc = current->io_context; 272 ioc = current->io_context;
@@ -233,17 +291,17 @@ loop_lock:
233 * against it before looping 291 * against it before looping
234 */ 292 */
235 last_waited = ioc->last_waited; 293 last_waited = ioc->last_waited;
294 if (need_resched()) {
295 if (num_sync_run) {
296 blk_run_backing_dev(bdi, NULL);
297 num_sync_run = 0;
298 }
299 cond_resched();
300 }
236 continue; 301 continue;
237 } 302 }
238 spin_lock(&device->io_lock); 303 spin_lock(&device->io_lock);
239 304 requeue_list(pending_bios, pending, tail);
240 old_head = device->pending_bios;
241 device->pending_bios = pending;
242 if (device->pending_bio_tail)
243 tail->bi_next = old_head;
244 else
245 device->pending_bio_tail = tail;
246
247 device->running_pending = 1; 305 device->running_pending = 1;
248 306
249 spin_unlock(&device->io_lock); 307 spin_unlock(&device->io_lock);
@@ -251,11 +309,18 @@ loop_lock:
251 goto done; 309 goto done;
252 } 310 }
253 } 311 }
312
313 if (num_sync_run) {
314 num_sync_run = 0;
315 blk_run_backing_dev(bdi, NULL);
316 }
317
318 cond_resched();
254 if (again) 319 if (again)
255 goto loop; 320 goto loop;
256 321
257 spin_lock(&device->io_lock); 322 spin_lock(&device->io_lock);
258 if (device->pending_bios) 323 if (device->pending_bios.head || device->pending_sync_bios.head)
259 goto loop_lock; 324 goto loop_lock;
260 spin_unlock(&device->io_lock); 325 spin_unlock(&device->io_lock);
261 326
@@ -1478,7 +1543,7 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
1478 btrfs_set_device_io_align(leaf, dev_item, device->io_align); 1543 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1479 btrfs_set_device_io_width(leaf, dev_item, device->io_width); 1544 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1480 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); 1545 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
1481 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); 1546 btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes);
1482 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); 1547 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1483 btrfs_mark_buffer_dirty(leaf); 1548 btrfs_mark_buffer_dirty(leaf);
1484 1549
@@ -1875,14 +1940,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1875 device->total_bytes = new_size; 1940 device->total_bytes = new_size;
1876 if (device->writeable) 1941 if (device->writeable)
1877 device->fs_devices->total_rw_bytes -= diff; 1942 device->fs_devices->total_rw_bytes -= diff;
1878 ret = btrfs_update_device(trans, device);
1879 if (ret) {
1880 unlock_chunks(root);
1881 btrfs_end_transaction(trans, root);
1882 goto done;
1883 }
1884 WARN_ON(diff > old_total);
1885 btrfs_set_super_total_bytes(super_copy, old_total - diff);
1886 unlock_chunks(root); 1943 unlock_chunks(root);
1887 btrfs_end_transaction(trans, root); 1944 btrfs_end_transaction(trans, root);
1888 1945
@@ -1914,7 +1971,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1914 length = btrfs_dev_extent_length(l, dev_extent); 1971 length = btrfs_dev_extent_length(l, dev_extent);
1915 1972
1916 if (key.offset + length <= new_size) 1973 if (key.offset + length <= new_size)
1917 goto done; 1974 break;
1918 1975
1919 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); 1976 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1920 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); 1977 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
@@ -1927,6 +1984,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1927 goto done; 1984 goto done;
1928 } 1985 }
1929 1986
1987 /* Shrinking succeeded, else we would be at "done". */
1988 trans = btrfs_start_transaction(root, 1);
1989 if (!trans) {
1990 ret = -ENOMEM;
1991 goto done;
1992 }
1993 lock_chunks(root);
1994
1995 device->disk_total_bytes = new_size;
1996 /* Now btrfs_update_device() will change the on-disk size. */
1997 ret = btrfs_update_device(trans, device);
1998 if (ret) {
1999 unlock_chunks(root);
2000 btrfs_end_transaction(trans, root);
2001 goto done;
2002 }
2003 WARN_ON(diff > old_total);
2004 btrfs_set_super_total_bytes(super_copy, old_total - diff);
2005 unlock_chunks(root);
2006 btrfs_end_transaction(trans, root);
1930done: 2007done:
1931 btrfs_free_path(path); 2008 btrfs_free_path(path);
1932 return ret; 2009 return ret;
@@ -2497,7 +2574,7 @@ again:
2497 max_errors = 1; 2574 max_errors = 1;
2498 } 2575 }
2499 } 2576 }
2500 if (multi_ret && rw == WRITE && 2577 if (multi_ret && (rw & (1 << BIO_RW)) &&
2501 stripes_allocated < stripes_required) { 2578 stripes_allocated < stripes_required) {
2502 stripes_allocated = map->num_stripes; 2579 stripes_allocated = map->num_stripes;
2503 free_extent_map(em); 2580 free_extent_map(em);
@@ -2762,6 +2839,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
2762 int rw, struct bio *bio) 2839 int rw, struct bio *bio)
2763{ 2840{
2764 int should_queue = 1; 2841 int should_queue = 1;
2842 struct btrfs_pending_bios *pending_bios;
2765 2843
2766 /* don't bother with additional async steps for reads, right now */ 2844 /* don't bother with additional async steps for reads, right now */
2767 if (!(rw & (1 << BIO_RW))) { 2845 if (!(rw & (1 << BIO_RW))) {
@@ -2783,13 +2861,17 @@ static noinline int schedule_bio(struct btrfs_root *root,
2783 bio->bi_rw |= rw; 2861 bio->bi_rw |= rw;
2784 2862
2785 spin_lock(&device->io_lock); 2863 spin_lock(&device->io_lock);
2864 if (bio_sync(bio))
2865 pending_bios = &device->pending_sync_bios;
2866 else
2867 pending_bios = &device->pending_bios;
2786 2868
2787 if (device->pending_bio_tail) 2869 if (pending_bios->tail)
2788 device->pending_bio_tail->bi_next = bio; 2870 pending_bios->tail->bi_next = bio;
2789 2871
2790 device->pending_bio_tail = bio; 2872 pending_bios->tail = bio;
2791 if (!device->pending_bios) 2873 if (!pending_bios->head)
2792 device->pending_bios = bio; 2874 pending_bios->head = bio;
2793 if (device->running_pending) 2875 if (device->running_pending)
2794 should_queue = 0; 2876 should_queue = 0;
2795 2877
@@ -3006,7 +3088,8 @@ static int fill_device_from_item(struct extent_buffer *leaf,
3006 unsigned long ptr; 3088 unsigned long ptr;
3007 3089
3008 device->devid = btrfs_device_id(leaf, dev_item); 3090 device->devid = btrfs_device_id(leaf, dev_item);
3009 device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); 3091 device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
3092 device->total_bytes = device->disk_total_bytes;
3010 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); 3093 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
3011 device->type = btrfs_device_type(leaf, dev_item); 3094 device->type = btrfs_device_type(leaf, dev_item);
3012 device->io_align = btrfs_device_io_align(leaf, dev_item); 3095 device->io_align = btrfs_device_io_align(leaf, dev_item);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2185de72ff7d..5c3ff6d02fd7 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -23,13 +23,22 @@
23#include "async-thread.h" 23#include "async-thread.h"
24 24
25struct buffer_head; 25struct buffer_head;
26struct btrfs_pending_bios {
27 struct bio *head;
28 struct bio *tail;
29};
30
26struct btrfs_device { 31struct btrfs_device {
27 struct list_head dev_list; 32 struct list_head dev_list;
28 struct list_head dev_alloc_list; 33 struct list_head dev_alloc_list;
29 struct btrfs_fs_devices *fs_devices; 34 struct btrfs_fs_devices *fs_devices;
30 struct btrfs_root *dev_root; 35 struct btrfs_root *dev_root;
31 struct bio *pending_bios; 36
32 struct bio *pending_bio_tail; 37 /* regular prio bios */
38 struct btrfs_pending_bios pending_bios;
39 /* WRITE_SYNC bios */
40 struct btrfs_pending_bios pending_sync_bios;
41
33 int running_pending; 42 int running_pending;
34 u64 generation; 43 u64 generation;
35 44
@@ -52,6 +61,9 @@ struct btrfs_device {
52 /* size of the device */ 61 /* size of the device */
53 u64 total_bytes; 62 u64 total_bytes;
54 63
64 /* size of the disk */
65 u64 disk_total_bytes;
66
55 /* bytes used */ 67 /* bytes used */
56 u64 bytes_used; 68 u64 bytes_used;
57 69
diff --git a/fs/compat.c b/fs/compat.c
index 3f84d5f15889..681ed81e6be0 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -181,22 +181,24 @@ asmlinkage long compat_sys_newstat(char __user * filename,
181 struct compat_stat __user *statbuf) 181 struct compat_stat __user *statbuf)
182{ 182{
183 struct kstat stat; 183 struct kstat stat;
184 int error = vfs_stat_fd(AT_FDCWD, filename, &stat); 184 int error;
185 185
186 if (!error) 186 error = vfs_stat(filename, &stat);
187 error = cp_compat_stat(&stat, statbuf); 187 if (error)
188 return error; 188 return error;
189 return cp_compat_stat(&stat, statbuf);
189} 190}
190 191
191asmlinkage long compat_sys_newlstat(char __user * filename, 192asmlinkage long compat_sys_newlstat(char __user * filename,
192 struct compat_stat __user *statbuf) 193 struct compat_stat __user *statbuf)
193{ 194{
194 struct kstat stat; 195 struct kstat stat;
195 int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); 196 int error;
196 197
197 if (!error) 198 error = vfs_lstat(filename, &stat);
198 error = cp_compat_stat(&stat, statbuf); 199 if (error)
199 return error; 200 return error;
201 return cp_compat_stat(&stat, statbuf);
200} 202}
201 203
202#ifndef __ARCH_WANT_STAT64 204#ifndef __ARCH_WANT_STAT64
@@ -204,21 +206,12 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename,
204 struct compat_stat __user *statbuf, int flag) 206 struct compat_stat __user *statbuf, int flag)
205{ 207{
206 struct kstat stat; 208 struct kstat stat;
207 int error = -EINVAL; 209 int error;
208
209 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
210 goto out;
211
212 if (flag & AT_SYMLINK_NOFOLLOW)
213 error = vfs_lstat_fd(dfd, filename, &stat);
214 else
215 error = vfs_stat_fd(dfd, filename, &stat);
216
217 if (!error)
218 error = cp_compat_stat(&stat, statbuf);
219 210
220out: 211 error = vfs_fstatat(dfd, filename, &stat, flag);
221 return error; 212 if (error)
213 return error;
214 return cp_compat_stat(&stat, statbuf);
222} 215}
223#endif 216#endif
224 217
@@ -1483,6 +1476,7 @@ int compat_do_execve(char * filename,
1483 struct linux_binprm *bprm; 1476 struct linux_binprm *bprm;
1484 struct file *file; 1477 struct file *file;
1485 struct files_struct *displaced; 1478 struct files_struct *displaced;
1479 bool clear_in_exec;
1486 int retval; 1480 int retval;
1487 1481
1488 retval = unshare_files(&displaced); 1482 retval = unshare_files(&displaced);
@@ -1505,8 +1499,9 @@ int compat_do_execve(char * filename,
1505 goto out_unlock; 1499 goto out_unlock;
1506 1500
1507 retval = check_unsafe_exec(bprm); 1501 retval = check_unsafe_exec(bprm);
1508 if (retval) 1502 if (retval < 0)
1509 goto out_unlock; 1503 goto out_unlock;
1504 clear_in_exec = retval;
1510 1505
1511 file = open_exec(filename); 1506 file = open_exec(filename);
1512 retval = PTR_ERR(file); 1507 retval = PTR_ERR(file);
@@ -1553,9 +1548,7 @@ int compat_do_execve(char * filename,
1553 goto out; 1548 goto out;
1554 1549
1555 /* execve succeeded */ 1550 /* execve succeeded */
1556 write_lock(&current->fs->lock);
1557 current->fs->in_exec = 0; 1551 current->fs->in_exec = 0;
1558 write_unlock(&current->fs->lock);
1559 current->in_execve = 0; 1552 current->in_execve = 0;
1560 mutex_unlock(&current->cred_exec_mutex); 1553 mutex_unlock(&current->cred_exec_mutex);
1561 acct_update_integrals(current); 1554 acct_update_integrals(current);
@@ -1575,9 +1568,8 @@ out_file:
1575 } 1568 }
1576 1569
1577out_unmark: 1570out_unmark:
1578 write_lock(&current->fs->lock); 1571 if (clear_in_exec)
1579 current->fs->in_exec = 0; 1572 current->fs->in_exec = 0;
1580 write_unlock(&current->fs->lock);
1581 1573
1582out_unlock: 1574out_unlock:
1583 current->in_execve = 0; 1575 current->in_execve = 0;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 3e87ce443ea2..b83f6bcfa51a 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -58,7 +58,6 @@
58#include <linux/i2c.h> 58#include <linux/i2c.h>
59#include <linux/i2c-dev.h> 59#include <linux/i2c-dev.h>
60#include <linux/atalk.h> 60#include <linux/atalk.h>
61#include <linux/loop.h>
62 61
63#include <net/bluetooth/bluetooth.h> 62#include <net/bluetooth/bluetooth.h>
64#include <net/bluetooth/hci.h> 63#include <net/bluetooth/hci.h>
@@ -68,6 +67,7 @@
68#include <linux/gigaset_dev.h> 67#include <linux/gigaset_dev.h>
69 68
70#ifdef CONFIG_BLOCK 69#ifdef CONFIG_BLOCK
70#include <linux/loop.h>
71#include <scsi/scsi.h> 71#include <scsi/scsi.h>
72#include <scsi/scsi_ioctl.h> 72#include <scsi/scsi_ioctl.h>
73#include <scsi/sg.h> 73#include <scsi/sg.h>
@@ -2660,6 +2660,8 @@ HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
2660HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl) 2660HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
2661/* block stuff */ 2661/* block stuff */
2662#ifdef CONFIG_BLOCK 2662#ifdef CONFIG_BLOCK
2663/* loop */
2664IGNORE_IOCTL(LOOP_CLR_FD)
2663/* Raw devices */ 2665/* Raw devices */
2664HANDLE_IOCTL(RAW_SETBIND, raw_ioctl) 2666HANDLE_IOCTL(RAW_SETBIND, raw_ioctl)
2665HANDLE_IOCTL(RAW_GETBIND, raw_ioctl) 2667HANDLE_IOCTL(RAW_GETBIND, raw_ioctl)
@@ -2728,9 +2730,6 @@ HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans)
2728IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32) 2730IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32)
2729IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32) 2731IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32)
2730 2732
2731/* loop */
2732IGNORE_IOCTL(LOOP_CLR_FD)
2733
2734#ifdef CONFIG_SPARC 2733#ifdef CONFIG_SPARC
2735/* Sparc framebuffers, handled in sbusfb_compat_ioctl() */ 2734/* Sparc framebuffers, handled in sbusfb_compat_ioctl() */
2736IGNORE_IOCTL(FBIOGTYPE) 2735IGNORE_IOCTL(FBIOGTYPE)
diff --git a/fs/dcache.c b/fs/dcache.c
index 761d30be2683..1fcffebfb44f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2149,7 +2149,6 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2149 int result; 2149 int result;
2150 unsigned long seq; 2150 unsigned long seq;
2151 2151
2152 /* FIXME: This is old behavior, needed? Please check callers. */
2153 if (new_dentry == old_dentry) 2152 if (new_dentry == old_dentry)
2154 return 1; 2153 return 1;
2155 2154
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 8b65f289ee00..b91851f1cda3 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -483,15 +483,7 @@ int ecryptfs_encrypt_page(struct page *page)
483 ecryptfs_inode = page->mapping->host; 483 ecryptfs_inode = page->mapping->host;
484 crypt_stat = 484 crypt_stat =
485 &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); 485 &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
486 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 486 BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
487 rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page,
488 0, PAGE_CACHE_SIZE);
489 if (rc)
490 printk(KERN_ERR "%s: Error attempting to copy "
491 "page at index [%ld]\n", __func__,
492 page->index);
493 goto out;
494 }
495 enc_extent_page = alloc_page(GFP_USER); 487 enc_extent_page = alloc_page(GFP_USER);
496 if (!enc_extent_page) { 488 if (!enc_extent_page) {
497 rc = -ENOMEM; 489 rc = -ENOMEM;
@@ -620,16 +612,7 @@ int ecryptfs_decrypt_page(struct page *page)
620 ecryptfs_inode = page->mapping->host; 612 ecryptfs_inode = page->mapping->host;
621 crypt_stat = 613 crypt_stat =
622 &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); 614 &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat);
623 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 615 BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
624 rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
625 PAGE_CACHE_SIZE,
626 ecryptfs_inode);
627 if (rc)
628 printk(KERN_ERR "%s: Error attempting to copy "
629 "page at index [%ld]\n", __func__,
630 page->index);
631 goto out;
632 }
633 enc_extent_page = alloc_page(GFP_USER); 616 enc_extent_page = alloc_page(GFP_USER);
634 if (!enc_extent_page) { 617 if (!enc_extent_page) {
635 rc = -ENOMEM; 618 rc = -ENOMEM;
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 064c5820e4e5..00b30a2d5466 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -269,6 +269,7 @@ struct ecryptfs_crypt_stat {
269#define ECRYPTFS_ENCRYPT_FILENAMES 0x00000800 269#define ECRYPTFS_ENCRYPT_FILENAMES 0x00000800
270#define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00001000 270#define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00001000
271#define ECRYPTFS_ENCFN_USE_FEK 0x00002000 271#define ECRYPTFS_ENCFN_USE_FEK 0x00002000
272#define ECRYPTFS_UNLINK_SIGS 0x00004000
272 u32 flags; 273 u32 flags;
273 unsigned int file_version; 274 unsigned int file_version;
274 size_t iv_bytes; 275 size_t iv_bytes;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 55b3145b8072..2f0945d63297 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -379,9 +379,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
379 goto out_d_drop; 379 goto out_d_drop;
380 } 380 }
381 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); 381 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
382 mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
382 lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, 383 lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
383 lower_dir_dentry, 384 lower_dir_dentry,
384 ecryptfs_dentry->d_name.len); 385 ecryptfs_dentry->d_name.len);
386 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
385 if (IS_ERR(lower_dentry)) { 387 if (IS_ERR(lower_dentry)) {
386 rc = PTR_ERR(lower_dentry); 388 rc = PTR_ERR(lower_dentry);
387 printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " 389 printk(KERN_ERR "%s: lookup_one_len() returned [%d] on "
@@ -406,9 +408,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
406 "filename; rc = [%d]\n", __func__, rc); 408 "filename; rc = [%d]\n", __func__, rc);
407 goto out_d_drop; 409 goto out_d_drop;
408 } 410 }
411 mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
409 lower_dentry = lookup_one_len(encrypted_and_encoded_name, 412 lower_dentry = lookup_one_len(encrypted_and_encoded_name,
410 lower_dir_dentry, 413 lower_dir_dentry,
411 encrypted_and_encoded_name_size - 1); 414 encrypted_and_encoded_name_size - 1);
415 mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
412 if (IS_ERR(lower_dentry)) { 416 if (IS_ERR(lower_dentry)) {
413 rc = PTR_ERR(lower_dentry); 417 rc = PTR_ERR(lower_dentry);
414 printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " 418 printk(KERN_ERR "%s: lookup_one_len() returned [%d] on "
@@ -636,8 +640,9 @@ static int
636ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) 640ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
637{ 641{
638 char *lower_buf; 642 char *lower_buf;
643 size_t lower_bufsiz;
639 struct dentry *lower_dentry; 644 struct dentry *lower_dentry;
640 struct ecryptfs_crypt_stat *crypt_stat; 645 struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
641 char *plaintext_name; 646 char *plaintext_name;
642 size_t plaintext_name_size; 647 size_t plaintext_name_size;
643 mm_segment_t old_fs; 648 mm_segment_t old_fs;
@@ -648,12 +653,21 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
648 rc = -EINVAL; 653 rc = -EINVAL;
649 goto out; 654 goto out;
650 } 655 }
651 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 656 mount_crypt_stat = &ecryptfs_superblock_to_private(
657 dentry->d_sb)->mount_crypt_stat;
658 /*
659 * If the lower filename is encrypted, it will result in a significantly
660 * longer name. If needed, truncate the name after decode and decrypt.
661 */
662 if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)
663 lower_bufsiz = PATH_MAX;
664 else
665 lower_bufsiz = bufsiz;
652 /* Released in this function */ 666 /* Released in this function */
653 lower_buf = kmalloc(bufsiz, GFP_KERNEL); 667 lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL);
654 if (lower_buf == NULL) { 668 if (lower_buf == NULL) {
655 printk(KERN_ERR "%s: Out of memory whilst attempting to " 669 printk(KERN_ERR "%s: Out of memory whilst attempting to "
656 "kmalloc [%d] bytes\n", __func__, bufsiz); 670 "kmalloc [%zd] bytes\n", __func__, lower_bufsiz);
657 rc = -ENOMEM; 671 rc = -ENOMEM;
658 goto out; 672 goto out;
659 } 673 }
@@ -661,7 +675,7 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
661 set_fs(get_ds()); 675 set_fs(get_ds());
662 rc = lower_dentry->d_inode->i_op->readlink(lower_dentry, 676 rc = lower_dentry->d_inode->i_op->readlink(lower_dentry,
663 (char __user *)lower_buf, 677 (char __user *)lower_buf,
664 bufsiz); 678 lower_bufsiz);
665 set_fs(old_fs); 679 set_fs(old_fs);
666 if (rc >= 0) { 680 if (rc >= 0) {
667 rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name, 681 rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name,
@@ -674,7 +688,9 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
674 rc); 688 rc);
675 goto out_free_lower_buf; 689 goto out_free_lower_buf;
676 } 690 }
677 rc = copy_to_user(buf, plaintext_name, plaintext_name_size); 691 /* Check for bufsiz <= 0 done in sys_readlinkat() */
692 rc = copy_to_user(buf, plaintext_name,
693 min((size_t) bufsiz, plaintext_name_size));
678 if (rc) 694 if (rc)
679 rc = -EFAULT; 695 rc = -EFAULT;
680 else 696 else
@@ -814,6 +830,13 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
814 size_t num_zeros = (PAGE_CACHE_SIZE 830 size_t num_zeros = (PAGE_CACHE_SIZE
815 - (new_length & ~PAGE_CACHE_MASK)); 831 - (new_length & ~PAGE_CACHE_MASK));
816 832
833 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
834 rc = vmtruncate(inode, new_length);
835 if (rc)
836 goto out_free;
837 rc = vmtruncate(lower_dentry->d_inode, new_length);
838 goto out_free;
839 }
817 if (num_zeros) { 840 if (num_zeros) {
818 char *zeros_virt; 841 char *zeros_virt;
819 842
@@ -915,8 +938,6 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
915 } 938 }
916 rc = 0; 939 rc = 0;
917 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); 940 crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
918 mutex_unlock(&crypt_stat->cs_mutex);
919 goto out;
920 } 941 }
921 } 942 }
922 mutex_unlock(&crypt_stat->cs_mutex); 943 mutex_unlock(&crypt_stat->cs_mutex);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index aed56c25539b..ccabd5faa04d 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -190,14 +190,14 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
190 init_special_inode(inode, lower_inode->i_mode, 190 init_special_inode(inode, lower_inode->i_mode,
191 lower_inode->i_rdev); 191 lower_inode->i_rdev);
192 dentry->d_op = &ecryptfs_dops; 192 dentry->d_op = &ecryptfs_dops;
193 if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD)
194 d_add(dentry, inode);
195 else
196 d_instantiate(dentry, inode);
197 fsstack_copy_attr_all(inode, lower_inode, NULL); 193 fsstack_copy_attr_all(inode, lower_inode, NULL);
198 /* This size will be overwritten for real files w/ headers and 194 /* This size will be overwritten for real files w/ headers and
199 * other metadata */ 195 * other metadata */
200 fsstack_copy_inode_size(inode, lower_inode); 196 fsstack_copy_inode_size(inode, lower_inode);
197 if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD)
198 d_add(dentry, inode);
199 else
200 d_instantiate(dentry, inode);
201out: 201out:
202 return rc; 202 return rc;
203} 203}
@@ -208,7 +208,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
208 ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, 208 ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata,
209 ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig, 209 ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig,
210 ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes, 210 ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes,
211 ecryptfs_opt_err }; 211 ecryptfs_opt_unlink_sigs, ecryptfs_opt_err };
212 212
213static const match_table_t tokens = { 213static const match_table_t tokens = {
214 {ecryptfs_opt_sig, "sig=%s"}, 214 {ecryptfs_opt_sig, "sig=%s"},
@@ -222,6 +222,7 @@ static const match_table_t tokens = {
222 {ecryptfs_opt_fnek_sig, "ecryptfs_fnek_sig=%s"}, 222 {ecryptfs_opt_fnek_sig, "ecryptfs_fnek_sig=%s"},
223 {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"}, 223 {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"},
224 {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"}, 224 {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"},
225 {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"},
225 {ecryptfs_opt_err, NULL} 226 {ecryptfs_opt_err, NULL}
226}; 227};
227 228
@@ -402,6 +403,9 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
402 fn_cipher_key_bytes; 403 fn_cipher_key_bytes;
403 fn_cipher_key_bytes_set = 1; 404 fn_cipher_key_bytes_set = 1;
404 break; 405 break;
406 case ecryptfs_opt_unlink_sigs:
407 mount_crypt_stat->flags |= ECRYPTFS_UNLINK_SIGS;
408 break;
405 case ecryptfs_opt_err: 409 case ecryptfs_opt_err:
406 default: 410 default:
407 printk(KERN_WARNING 411 printk(KERN_WARNING
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 295e7fa56755..f1c17e87c5fb 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -133,45 +133,6 @@ out:
133 return rc; 133 return rc;
134} 134}
135 135
136static int
137ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type,
138 struct ecryptfs_msg_ctx **msg_ctx);
139
140/**
141 * ecryptfs_send_raw_message
142 * @msg_type: Message type
143 * @daemon: Daemon struct for recipient of message
144 *
145 * A raw message is one that does not include an ecryptfs_message
146 * struct. It simply has a type.
147 *
148 * Must be called with ecryptfs_daemon_hash_mux held.
149 *
150 * Returns zero on success; non-zero otherwise
151 */
152static int ecryptfs_send_raw_message(u8 msg_type,
153 struct ecryptfs_daemon *daemon)
154{
155 struct ecryptfs_msg_ctx *msg_ctx;
156 int rc;
157
158 rc = ecryptfs_send_message_locked(NULL, 0, msg_type, &msg_ctx);
159 if (rc) {
160 printk(KERN_ERR "%s: Error whilst attempting to send "
161 "message to ecryptfsd; rc = [%d]\n", __func__, rc);
162 goto out;
163 }
164 /* Raw messages are logically context-free (e.g., no
165 * reply is expected), so we set the state of the
166 * ecryptfs_msg_ctx object to indicate that it should
167 * be freed as soon as the message is sent. */
168 mutex_lock(&msg_ctx->mux);
169 msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY;
170 mutex_unlock(&msg_ctx->mux);
171out:
172 return rc;
173}
174
175/** 136/**
176 * ecryptfs_spawn_daemon - Create and initialize a new daemon struct 137 * ecryptfs_spawn_daemon - Create and initialize a new daemon struct
177 * @daemon: Pointer to set to newly allocated daemon struct 138 * @daemon: Pointer to set to newly allocated daemon struct
@@ -212,49 +173,6 @@ out:
212} 173}
213 174
214/** 175/**
215 * ecryptfs_process_helo
216 * @euid: The user ID owner of the message
217 * @user_ns: The namespace in which @euid applies
218 * @pid: The process ID for the userspace program that sent the
219 * message
220 *
221 * Adds the euid and pid values to the daemon euid hash. If an euid
222 * already has a daemon pid registered, the daemon will be
223 * unregistered before the new daemon is put into the hash list.
224 * Returns zero after adding a new daemon to the hash list;
225 * non-zero otherwise.
226 */
227int ecryptfs_process_helo(uid_t euid, struct user_namespace *user_ns,
228 struct pid *pid)
229{
230 struct ecryptfs_daemon *new_daemon;
231 struct ecryptfs_daemon *old_daemon;
232 int rc;
233
234 mutex_lock(&ecryptfs_daemon_hash_mux);
235 rc = ecryptfs_find_daemon_by_euid(&old_daemon, euid, user_ns);
236 if (rc != 0) {
237 printk(KERN_WARNING "Received request from user [%d] "
238 "to register daemon [0x%p]; unregistering daemon "
239 "[0x%p]\n", euid, pid, old_daemon->pid);
240 rc = ecryptfs_send_raw_message(ECRYPTFS_MSG_QUIT, old_daemon);
241 if (rc)
242 printk(KERN_WARNING "Failed to send QUIT "
243 "message to daemon [0x%p]; rc = [%d]\n",
244 old_daemon->pid, rc);
245 hlist_del(&old_daemon->euid_chain);
246 kfree(old_daemon);
247 }
248 rc = ecryptfs_spawn_daemon(&new_daemon, euid, user_ns, pid);
249 if (rc)
250 printk(KERN_ERR "%s: The gods are displeased with this attempt "
251 "to create a new daemon object for euid [%d]; pid "
252 "[0x%p]; rc = [%d]\n", __func__, euid, pid, rc);
253 mutex_unlock(&ecryptfs_daemon_hash_mux);
254 return rc;
255}
256
257/**
258 * ecryptfs_exorcise_daemon - Destroy the daemon struct 176 * ecryptfs_exorcise_daemon - Destroy the daemon struct
259 * 177 *
260 * Must be called ceremoniously while in possession of 178 * Must be called ceremoniously while in possession of
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index a67fea655f49..4ec8f61ccf5a 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -193,26 +193,20 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
193 int rc = 0; 193 int rc = 0;
194 194
195 mutex_lock(&msg_ctx->mux); 195 mutex_lock(&msg_ctx->mux);
196 if (data) { 196 msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size),
197 msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size), 197 GFP_KERNEL);
198 GFP_KERNEL); 198 if (!msg_ctx->msg) {
199 if (!msg_ctx->msg) { 199 rc = -ENOMEM;
200 rc = -ENOMEM; 200 printk(KERN_ERR "%s: Out of memory whilst attempting "
201 printk(KERN_ERR "%s: Out of memory whilst attempting " 201 "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
202 "to kmalloc(%zd, GFP_KERNEL)\n", __func__, 202 (sizeof(*msg_ctx->msg) + data_size));
203 (sizeof(*msg_ctx->msg) + data_size)); 203 goto out_unlock;
204 goto out_unlock; 204 }
205 }
206 } else
207 msg_ctx->msg = NULL;
208 msg_ctx->msg->index = msg_ctx->index; 205 msg_ctx->msg->index = msg_ctx->index;
209 msg_ctx->msg->data_len = data_size; 206 msg_ctx->msg->data_len = data_size;
210 msg_ctx->type = msg_type; 207 msg_ctx->type = msg_type;
211 if (data) { 208 memcpy(msg_ctx->msg->data, data, data_size);
212 memcpy(msg_ctx->msg->data, data, data_size); 209 msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
213 msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
214 } else
215 msg_ctx->msg_size = 0;
216 mutex_lock(&daemon->mux); 210 mutex_lock(&daemon->mux);
217 list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue); 211 list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
218 daemon->num_queued_msg_ctx++; 212 daemon->num_queued_msg_ctx++;
@@ -418,18 +412,13 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
418 412
419 if (count == 0) 413 if (count == 0)
420 goto out; 414 goto out;
421 data = kmalloc(count, GFP_KERNEL); 415
422 if (!data) { 416 data = memdup_user(buf, count);
423 printk(KERN_ERR "%s: Out of memory whilst attempting to " 417 if (IS_ERR(data)) {
424 "kmalloc([%zd], GFP_KERNEL)\n", __func__, count); 418 printk(KERN_ERR "%s: memdup_user returned error [%ld]\n",
419 __func__, PTR_ERR(data));
425 goto out; 420 goto out;
426 } 421 }
427 rc = copy_from_user(data, buf, count);
428 if (rc) {
429 printk(KERN_ERR "%s: copy_from_user returned error [%d]\n",
430 __func__, rc);
431 goto out_free;
432 }
433 sz = count; 422 sz = count;
434 i = 0; 423 i = 0;
435 switch (data[i++]) { 424 switch (data[i++]) {
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 46cec2b69796..5c6bab9786e3 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -449,6 +449,7 @@ int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode)
449 struct ecryptfs_crypt_stat *crypt_stat; 449 struct ecryptfs_crypt_stat *crypt_stat;
450 450
451 crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; 451 crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
452 BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
452 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) 453 if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
453 return ecryptfs_write_inode_size_to_xattr(ecryptfs_inode); 454 return ecryptfs_write_inode_size_to_xattr(ecryptfs_inode);
454 else 455 else
@@ -490,6 +491,16 @@ static int ecryptfs_write_end(struct file *file,
490 ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); 491 ecryptfs_printk(KERN_DEBUG, "Not a new file\n");
491 ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" 492 ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
492 "(page w/ index = [0x%.16x], to = [%d])\n", index, to); 493 "(page w/ index = [0x%.16x], to = [%d])\n", index, to);
494 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
495 rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page, 0,
496 to);
497 if (!rc) {
498 rc = copied;
499 fsstack_copy_inode_size(ecryptfs_inode,
500 ecryptfs_inode_to_lower(ecryptfs_inode));
501 }
502 goto out;
503 }
493 /* Fills in zeros if 'to' goes beyond inode size */ 504 /* Fills in zeros if 'to' goes beyond inode size */
494 rc = fill_zeros_to_end_of_page(page, to); 505 rc = fill_zeros_to_end_of_page(page, to);
495 if (rc) { 506 if (rc) {
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 75c2ea9fee35..a137c6ea2fee 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -117,13 +117,15 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
117 size_t size) 117 size_t size)
118{ 118{
119 struct page *ecryptfs_page; 119 struct page *ecryptfs_page;
120 struct ecryptfs_crypt_stat *crypt_stat;
121 struct inode *ecryptfs_inode = ecryptfs_file->f_dentry->d_inode;
120 char *ecryptfs_page_virt; 122 char *ecryptfs_page_virt;
121 loff_t ecryptfs_file_size = 123 loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode);
122 i_size_read(ecryptfs_file->f_dentry->d_inode);
123 loff_t data_offset = 0; 124 loff_t data_offset = 0;
124 loff_t pos; 125 loff_t pos;
125 int rc = 0; 126 int rc = 0;
126 127
128 crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
127 /* 129 /*
128 * if we are writing beyond current size, then start pos 130 * if we are writing beyond current size, then start pos
129 * at the current size - we'll fill in zeros from there. 131 * at the current size - we'll fill in zeros from there.
@@ -184,7 +186,13 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
184 flush_dcache_page(ecryptfs_page); 186 flush_dcache_page(ecryptfs_page);
185 SetPageUptodate(ecryptfs_page); 187 SetPageUptodate(ecryptfs_page);
186 unlock_page(ecryptfs_page); 188 unlock_page(ecryptfs_page);
187 rc = ecryptfs_encrypt_page(ecryptfs_page); 189 if (crypt_stat->flags & ECRYPTFS_ENCRYPTED)
190 rc = ecryptfs_encrypt_page(ecryptfs_page);
191 else
192 rc = ecryptfs_write_lower_page_segment(ecryptfs_inode,
193 ecryptfs_page,
194 start_offset_in_page,
195 data_offset);
188 page_cache_release(ecryptfs_page); 196 page_cache_release(ecryptfs_page);
189 if (rc) { 197 if (rc) {
190 printk(KERN_ERR "%s: Error encrypting " 198 printk(KERN_ERR "%s: Error encrypting "
@@ -194,14 +202,16 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset,
194 pos += num_bytes; 202 pos += num_bytes;
195 } 203 }
196 if ((offset + size) > ecryptfs_file_size) { 204 if ((offset + size) > ecryptfs_file_size) {
197 i_size_write(ecryptfs_file->f_dentry->d_inode, (offset + size)); 205 i_size_write(ecryptfs_inode, (offset + size));
198 rc = ecryptfs_write_inode_size_to_metadata( 206 if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) {
199 ecryptfs_file->f_dentry->d_inode); 207 rc = ecryptfs_write_inode_size_to_metadata(
200 if (rc) { 208 ecryptfs_inode);
201 printk(KERN_ERR "Problem with " 209 if (rc) {
202 "ecryptfs_write_inode_size_to_metadata; " 210 printk(KERN_ERR "Problem with "
203 "rc = [%d]\n", rc); 211 "ecryptfs_write_inode_size_to_metadata; "
204 goto out; 212 "rc = [%d]\n", rc);
213 goto out;
214 }
205 } 215 }
206 } 216 }
207out: 217out:
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index c27ac2b358a1..fa4c7e7d15d9 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -170,7 +170,10 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
170 list_for_each_entry(walker, 170 list_for_each_entry(walker,
171 &mount_crypt_stat->global_auth_tok_list, 171 &mount_crypt_stat->global_auth_tok_list,
172 mount_crypt_stat_list) { 172 mount_crypt_stat_list) {
173 seq_printf(m, ",ecryptfs_sig=%s", walker->sig); 173 if (walker->flags & ECRYPTFS_AUTH_TOK_FNEK)
174 seq_printf(m, ",ecryptfs_fnek_sig=%s", walker->sig);
175 else
176 seq_printf(m, ",ecryptfs_sig=%s", walker->sig);
174 } 177 }
175 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); 178 mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex);
176 179
@@ -186,6 +189,8 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
186 seq_printf(m, ",ecryptfs_xattr_metadata"); 189 seq_printf(m, ",ecryptfs_xattr_metadata");
187 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) 190 if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
188 seq_printf(m, ",ecryptfs_encrypted_view"); 191 seq_printf(m, ",ecryptfs_encrypted_view");
192 if (mount_crypt_stat->flags & ECRYPTFS_UNLINK_SIGS)
193 seq_printf(m, ",ecryptfs_unlink_sigs");
189 194
190 return 0; 195 return 0;
191} 196}
diff --git a/fs/exec.c b/fs/exec.c
index 052a961e41aa..a3a8ce83940f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1060,7 +1060,6 @@ EXPORT_SYMBOL(install_exec_creds);
1060int check_unsafe_exec(struct linux_binprm *bprm) 1060int check_unsafe_exec(struct linux_binprm *bprm)
1061{ 1061{
1062 struct task_struct *p = current, *t; 1062 struct task_struct *p = current, *t;
1063 unsigned long flags;
1064 unsigned n_fs; 1063 unsigned n_fs;
1065 int res = 0; 1064 int res = 0;
1066 1065
@@ -1068,21 +1067,22 @@ int check_unsafe_exec(struct linux_binprm *bprm)
1068 1067
1069 n_fs = 1; 1068 n_fs = 1;
1070 write_lock(&p->fs->lock); 1069 write_lock(&p->fs->lock);
1071 lock_task_sighand(p, &flags); 1070 rcu_read_lock();
1072 for (t = next_thread(p); t != p; t = next_thread(t)) { 1071 for (t = next_thread(p); t != p; t = next_thread(t)) {
1073 if (t->fs == p->fs) 1072 if (t->fs == p->fs)
1074 n_fs++; 1073 n_fs++;
1075 } 1074 }
1075 rcu_read_unlock();
1076 1076
1077 if (p->fs->users > n_fs) { 1077 if (p->fs->users > n_fs) {
1078 bprm->unsafe |= LSM_UNSAFE_SHARE; 1078 bprm->unsafe |= LSM_UNSAFE_SHARE;
1079 } else { 1079 } else {
1080 if (p->fs->in_exec) 1080 res = -EAGAIN;
1081 res = -EAGAIN; 1081 if (!p->fs->in_exec) {
1082 p->fs->in_exec = 1; 1082 p->fs->in_exec = 1;
1083 res = 1;
1084 }
1083 } 1085 }
1084
1085 unlock_task_sighand(p, &flags);
1086 write_unlock(&p->fs->lock); 1086 write_unlock(&p->fs->lock);
1087 1087
1088 return res; 1088 return res;
@@ -1284,6 +1284,7 @@ int do_execve(char * filename,
1284 struct linux_binprm *bprm; 1284 struct linux_binprm *bprm;
1285 struct file *file; 1285 struct file *file;
1286 struct files_struct *displaced; 1286 struct files_struct *displaced;
1287 bool clear_in_exec;
1287 int retval; 1288 int retval;
1288 1289
1289 retval = unshare_files(&displaced); 1290 retval = unshare_files(&displaced);
@@ -1306,8 +1307,9 @@ int do_execve(char * filename,
1306 goto out_unlock; 1307 goto out_unlock;
1307 1308
1308 retval = check_unsafe_exec(bprm); 1309 retval = check_unsafe_exec(bprm);
1309 if (retval) 1310 if (retval < 0)
1310 goto out_unlock; 1311 goto out_unlock;
1312 clear_in_exec = retval;
1311 1313
1312 file = open_exec(filename); 1314 file = open_exec(filename);
1313 retval = PTR_ERR(file); 1315 retval = PTR_ERR(file);
@@ -1355,9 +1357,7 @@ int do_execve(char * filename,
1355 goto out; 1357 goto out;
1356 1358
1357 /* execve succeeded */ 1359 /* execve succeeded */
1358 write_lock(&current->fs->lock);
1359 current->fs->in_exec = 0; 1360 current->fs->in_exec = 0;
1360 write_unlock(&current->fs->lock);
1361 current->in_execve = 0; 1361 current->in_execve = 0;
1362 mutex_unlock(&current->cred_exec_mutex); 1362 mutex_unlock(&current->cred_exec_mutex);
1363 acct_update_integrals(current); 1363 acct_update_integrals(current);
@@ -1377,9 +1377,8 @@ out_file:
1377 } 1377 }
1378 1378
1379out_unmark: 1379out_unmark:
1380 write_lock(&current->fs->lock); 1380 if (clear_in_exec)
1381 current->fs->in_exec = 0; 1381 current->fs->in_exec = 0;
1382 write_unlock(&current->fs->lock);
1383 1382
1384out_unlock: 1383out_unlock:
1385 current->in_execve = 0; 1384 current->in_execve = 0;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index f983225266dc..5c4afe652245 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1395,8 +1395,10 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
1395 blk++; 1395 blk++;
1396 } 1396 }
1397out: 1397out:
1398 if (len == towrite) 1398 if (len == towrite) {
1399 mutex_unlock(&inode->i_mutex);
1399 return err; 1400 return err;
1401 }
1400 if (inode->i_size < off+len-towrite) 1402 if (inode->i_size < off+len-towrite)
1401 i_size_write(inode, off+len-towrite); 1403 i_size_write(inode, off+len-towrite);
1402 inode->i_version++; 1404 inode->i_version++;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2a1cb0979768..e40332158340 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -326,11 +326,14 @@ ext4_ext_max_entries(struct inode *inode, int depth)
326 326
327static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) 327static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
328{ 328{
329 ext4_fsblk_t block = ext_pblock(ext); 329 ext4_fsblk_t block = ext_pblock(ext), valid_block;
330 int len = ext4_ext_get_actual_len(ext); 330 int len = ext4_ext_get_actual_len(ext);
331 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; 331 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
332 if (unlikely(block < le32_to_cpu(es->s_first_data_block) || 332
333 ((block + len) > ext4_blocks_count(es)))) 333 valid_block = le32_to_cpu(es->s_first_data_block) +
334 EXT4_SB(inode->i_sb)->s_gdb_count;
335 if (unlikely(block <= valid_block ||
336 ((block + len) > ext4_blocks_count(es))))
334 return 0; 337 return 0;
335 else 338 else
336 return 1; 339 return 1;
@@ -339,10 +342,13 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
339static int ext4_valid_extent_idx(struct inode *inode, 342static int ext4_valid_extent_idx(struct inode *inode,
340 struct ext4_extent_idx *ext_idx) 343 struct ext4_extent_idx *ext_idx)
341{ 344{
342 ext4_fsblk_t block = idx_pblock(ext_idx); 345 ext4_fsblk_t block = idx_pblock(ext_idx), valid_block;
343 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; 346 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
344 if (unlikely(block < le32_to_cpu(es->s_first_data_block) || 347
345 (block >= ext4_blocks_count(es)))) 348 valid_block = le32_to_cpu(es->s_first_data_block) +
349 EXT4_SB(inode->i_sb)->s_gdb_count;
350 if (unlikely(block <= valid_block ||
351 (block >= ext4_blocks_count(es))))
346 return 0; 352 return 0;
347 else 353 else
348 return 1; 354 return 1;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 47b84e8df568..f18e0a08a6b5 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -585,6 +585,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
585fallback: 585fallback:
586 ngroups = sbi->s_groups_count; 586 ngroups = sbi->s_groups_count;
587 avefreei = freei / ngroups; 587 avefreei = freei / ngroups;
588fallback_retry:
588 parent_group = EXT4_I(parent)->i_block_group; 589 parent_group = EXT4_I(parent)->i_block_group;
589 for (i = 0; i < ngroups; i++) { 590 for (i = 0; i < ngroups; i++) {
590 grp = (parent_group + i) % ngroups; 591 grp = (parent_group + i) % ngroups;
@@ -602,7 +603,7 @@ fallback:
602 * filesystems the above test can fail to find any blockgroups 603 * filesystems the above test can fail to find any blockgroups
603 */ 604 */
604 avefreei = 0; 605 avefreei = 0;
605 goto fallback; 606 goto fallback_retry;
606 } 607 }
607 608
608 return -1; 609 return -1;
@@ -831,11 +832,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
831 ret2 = find_group_flex(sb, dir, &group); 832 ret2 = find_group_flex(sb, dir, &group);
832 if (ret2 == -1) { 833 if (ret2 == -1) {
833 ret2 = find_group_other(sb, dir, &group, mode); 834 ret2 = find_group_other(sb, dir, &group, mode);
834 if (ret2 == 0 && once) 835 if (ret2 == 0 && once) {
835 once = 0; 836 once = 0;
836 printk(KERN_NOTICE "ext4: find_group_flex " 837 printk(KERN_NOTICE "ext4: find_group_flex "
837 "failed, fallback succeeded dir %lu\n", 838 "failed, fallback succeeded dir %lu\n",
838 dir->i_ino); 839 dir->i_ino);
840 }
839 } 841 }
840 goto got_group; 842 goto got_group;
841 } 843 }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c6bd6ced3bb7..e91f978c7f12 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4357,11 +4357,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4357 ei->i_flags = le32_to_cpu(raw_inode->i_flags); 4357 ei->i_flags = le32_to_cpu(raw_inode->i_flags);
4358 inode->i_blocks = ext4_inode_blocks(raw_inode, ei); 4358 inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
4359 ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); 4359 ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
4360 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 4360 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))
4361 cpu_to_le32(EXT4_OS_HURD)) {
4362 ei->i_file_acl |= 4361 ei->i_file_acl |=
4363 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; 4362 ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
4364 }
4365 inode->i_size = ext4_isize(raw_inode); 4363 inode->i_size = ext4_isize(raw_inode);
4366 ei->i_disksize = inode->i_size; 4364 ei->i_disksize = inode->i_size;
4367 inode->i_generation = le32_to_cpu(raw_inode->i_generation); 4365 inode->i_generation = le32_to_cpu(raw_inode->i_generation);
@@ -4409,9 +4407,23 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4409 (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; 4407 (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
4410 } 4408 }
4411 4409
4412 if (ei->i_flags & EXT4_EXTENTS_FL) { 4410 ret = 0;
4413 /* Validate extent which is part of inode */ 4411 if (ei->i_file_acl &&
4414 ret = ext4_ext_check_inode(inode); 4412 ((ei->i_file_acl <
4413 (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
4414 EXT4_SB(sb)->s_gdb_count)) ||
4415 (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
4416 ext4_error(sb, __func__,
4417 "bad extended attribute block %llu in inode #%lu",
4418 ei->i_file_acl, inode->i_ino);
4419 ret = -EIO;
4420 goto bad_inode;
4421 } else if (ei->i_flags & EXT4_EXTENTS_FL) {
4422 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
4423 (S_ISLNK(inode->i_mode) &&
4424 !ext4_inode_is_fast_symlink(inode)))
4425 /* Validate extent which is part of inode */
4426 ret = ext4_ext_check_inode(inode);
4415 } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 4427 } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
4416 (S_ISLNK(inode->i_mode) && 4428 (S_ISLNK(inode->i_mode) &&
4417 !ext4_inode_is_fast_symlink(inode))) { 4429 !ext4_inode_is_fast_symlink(inode))) {
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 1aa70260e6d1..a24c58e181db 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -199,7 +199,7 @@ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2)
199 return retval; 199 return retval;
200} 200}
201 201
202int get_filesystem_list(char * buf) 202int __init get_filesystem_list(char *buf)
203{ 203{
204 int len = 0; 204 int len = 0;
205 struct file_system_type * tmp; 205 struct file_system_type * tmp;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index bf23a62aa925..70f87f43afa2 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -156,6 +156,12 @@ static void inode_go_sync(struct gfs2_glock *gl)
156 error = filemap_fdatawait(metamapping); 156 error = filemap_fdatawait(metamapping);
157 mapping_set_error(metamapping, error); 157 mapping_set_error(metamapping, error);
158 gfs2_ail_empty_gl(gl); 158 gfs2_ail_empty_gl(gl);
159 /*
160 * Writeback of the data mapping may cause the dirty flag to be set
161 * so we have to clear it again here.
162 */
163 smp_mb__before_clear_bit();
164 clear_bit(GLF_DIRTY, &gl->gl_flags);
159} 165}
160 166
161/** 167/**
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 101caf3ee861..5d82e91887e3 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -413,7 +413,9 @@ out_unlock:
413 gfs2_glock_dq(&gh); 413 gfs2_glock_dq(&gh);
414out: 414out:
415 gfs2_holder_uninit(&gh); 415 gfs2_holder_uninit(&gh);
416 if (ret) 416 if (ret == -ENOMEM)
417 ret = VM_FAULT_OOM;
418 else if (ret)
417 ret = VM_FAULT_SIGBUS; 419 ret = VM_FAULT_SIGBUS;
418 return ret; 420 return ret;
419} 421}
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index f03d024038ea..565038243fa2 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -212,8 +212,7 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
212 if (tmp == 0) 212 if (tmp == 0)
213 return BFITNOENT; 213 return BFITNOENT;
214 ptr--; 214 ptr--;
215 bit = fls64(tmp); 215 bit = __ffs64(tmp);
216 bit--; /* fls64 always adds one to the bit count */
217 bit /= 2; /* two bits per entry in the bitmap */ 216 bit /= 2; /* two bits per entry in the bitmap */
218 return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit; 217 return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit;
219} 218}
@@ -1445,10 +1444,12 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1445u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) 1444u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
1446{ 1445{
1447 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1446 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1447 struct buffer_head *dibh;
1448 struct gfs2_alloc *al = ip->i_alloc; 1448 struct gfs2_alloc *al = ip->i_alloc;
1449 struct gfs2_rgrpd *rgd = al->al_rgd; 1449 struct gfs2_rgrpd *rgd = al->al_rgd;
1450 u32 goal, blk; 1450 u32 goal, blk;
1451 u64 block; 1451 u64 block;
1452 int error;
1452 1453
1453 if (rgrp_contains_block(rgd, ip->i_goal)) 1454 if (rgrp_contains_block(rgd, ip->i_goal))
1454 goal = ip->i_goal - rgd->rd_data0; 1455 goal = ip->i_goal - rgd->rd_data0;
@@ -1461,7 +1462,13 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n)
1461 rgd->rd_last_alloc = blk; 1462 rgd->rd_last_alloc = blk;
1462 block = rgd->rd_data0 + blk; 1463 block = rgd->rd_data0 + blk;
1463 ip->i_goal = block; 1464 ip->i_goal = block;
1464 1465 error = gfs2_meta_inode_buffer(ip, &dibh);
1466 if (error == 0) {
1467 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
1468 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1469 di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal);
1470 brelse(dibh);
1471 }
1465 gfs2_assert_withdraw(sdp, rgd->rd_free >= *n); 1472 gfs2_assert_withdraw(sdp, rgd->rd_free >= *n);
1466 rgd->rd_free -= *n; 1473 rgd->rd_free -= *n;
1467 1474
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 23a3c76711e0..153d9681192b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -26,7 +26,6 @@
26#include <linux/pagevec.h> 26#include <linux/pagevec.h>
27#include <linux/parser.h> 27#include <linux/parser.h>
28#include <linux/mman.h> 28#include <linux/mman.h>
29#include <linux/quotaops.h>
30#include <linux/slab.h> 29#include <linux/slab.h>
31#include <linux/dnotify.h> 30#include <linux/dnotify.h>
32#include <linux/statfs.h> 31#include <linux/statfs.h>
@@ -842,7 +841,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
842bad_val: 841bad_val:
843 printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n", 842 printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n",
844 args[0].from, p); 843 args[0].from, p);
845 return 1; 844 return -EINVAL;
846} 845}
847 846
848static int 847static int
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index a8e8513a78a9..06560c520f49 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -502,7 +502,7 @@ void journal_commit_transaction(journal_t *journal)
502 err = 0; 502 err = 0;
503 } 503 }
504 504
505 journal_write_revoke_records(journal, commit_transaction); 505 journal_write_revoke_records(journal, commit_transaction, write_op);
506 506
507 /* 507 /*
508 * If we found any dirty or locked buffers, then we should have 508 * If we found any dirty or locked buffers, then we should have
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 3e9afc2a91d2..da6cd9bdaabc 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -86,6 +86,7 @@
86#include <linux/slab.h> 86#include <linux/slab.h>
87#include <linux/list.h> 87#include <linux/list.h>
88#include <linux/init.h> 88#include <linux/init.h>
89#include <linux/bio.h>
89#endif 90#endif
90#include <linux/log2.h> 91#include <linux/log2.h>
91 92
@@ -118,8 +119,8 @@ struct jbd_revoke_table_s
118#ifdef __KERNEL__ 119#ifdef __KERNEL__
119static void write_one_revoke_record(journal_t *, transaction_t *, 120static void write_one_revoke_record(journal_t *, transaction_t *,
120 struct journal_head **, int *, 121 struct journal_head **, int *,
121 struct jbd_revoke_record_s *); 122 struct jbd_revoke_record_s *, int);
122static void flush_descriptor(journal_t *, struct journal_head *, int); 123static void flush_descriptor(journal_t *, struct journal_head *, int, int);
123#endif 124#endif
124 125
125/* Utility functions to maintain the revoke table */ 126/* Utility functions to maintain the revoke table */
@@ -500,7 +501,7 @@ void journal_switch_revoke_table(journal_t *journal)
500 * revoke hash, deleting the entries as we go. 501 * revoke hash, deleting the entries as we go.
501 */ 502 */
502void journal_write_revoke_records(journal_t *journal, 503void journal_write_revoke_records(journal_t *journal,
503 transaction_t *transaction) 504 transaction_t *transaction, int write_op)
504{ 505{
505 struct journal_head *descriptor; 506 struct journal_head *descriptor;
506 struct jbd_revoke_record_s *record; 507 struct jbd_revoke_record_s *record;
@@ -524,14 +525,14 @@ void journal_write_revoke_records(journal_t *journal,
524 hash_list->next; 525 hash_list->next;
525 write_one_revoke_record(journal, transaction, 526 write_one_revoke_record(journal, transaction,
526 &descriptor, &offset, 527 &descriptor, &offset,
527 record); 528 record, write_op);
528 count++; 529 count++;
529 list_del(&record->hash); 530 list_del(&record->hash);
530 kmem_cache_free(revoke_record_cache, record); 531 kmem_cache_free(revoke_record_cache, record);
531 } 532 }
532 } 533 }
533 if (descriptor) 534 if (descriptor)
534 flush_descriptor(journal, descriptor, offset); 535 flush_descriptor(journal, descriptor, offset, write_op);
535 jbd_debug(1, "Wrote %d revoke records\n", count); 536 jbd_debug(1, "Wrote %d revoke records\n", count);
536} 537}
537 538
@@ -544,7 +545,8 @@ static void write_one_revoke_record(journal_t *journal,
544 transaction_t *transaction, 545 transaction_t *transaction,
545 struct journal_head **descriptorp, 546 struct journal_head **descriptorp,
546 int *offsetp, 547 int *offsetp,
547 struct jbd_revoke_record_s *record) 548 struct jbd_revoke_record_s *record,
549 int write_op)
548{ 550{
549 struct journal_head *descriptor; 551 struct journal_head *descriptor;
550 int offset; 552 int offset;
@@ -563,7 +565,7 @@ static void write_one_revoke_record(journal_t *journal,
563 /* Make sure we have a descriptor with space left for the record */ 565 /* Make sure we have a descriptor with space left for the record */
564 if (descriptor) { 566 if (descriptor) {
565 if (offset == journal->j_blocksize) { 567 if (offset == journal->j_blocksize) {
566 flush_descriptor(journal, descriptor, offset); 568 flush_descriptor(journal, descriptor, offset, write_op);
567 descriptor = NULL; 569 descriptor = NULL;
568 } 570 }
569 } 571 }
@@ -600,7 +602,7 @@ static void write_one_revoke_record(journal_t *journal,
600 602
601static void flush_descriptor(journal_t *journal, 603static void flush_descriptor(journal_t *journal,
602 struct journal_head *descriptor, 604 struct journal_head *descriptor,
603 int offset) 605 int offset, int write_op)
604{ 606{
605 journal_revoke_header_t *header; 607 journal_revoke_header_t *header;
606 struct buffer_head *bh = jh2bh(descriptor); 608 struct buffer_head *bh = jh2bh(descriptor);
@@ -615,7 +617,7 @@ static void flush_descriptor(journal_t *journal,
615 set_buffer_jwrite(bh); 617 set_buffer_jwrite(bh);
616 BUFFER_TRACE(bh, "write"); 618 BUFFER_TRACE(bh, "write");
617 set_buffer_dirty(bh); 619 set_buffer_dirty(bh);
618 ll_rw_block(SWRITE, 1, &bh); 620 ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh);
619} 621}
620#endif 622#endif
621 623
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 073c8c3df7cd..0b7d3b8226fd 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -506,7 +506,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
506 if (err) 506 if (err)
507 jbd2_journal_abort(journal, err); 507 jbd2_journal_abort(journal, err);
508 508
509 jbd2_journal_write_revoke_records(journal, commit_transaction); 509 jbd2_journal_write_revoke_records(journal, commit_transaction,
510 write_op);
510 511
511 jbd_debug(3, "JBD: commit phase 2\n"); 512 jbd_debug(3, "JBD: commit phase 2\n");
512 513
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index bbe6d592d8b3..a360b06af2e3 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -86,6 +86,7 @@
86#include <linux/slab.h> 86#include <linux/slab.h>
87#include <linux/list.h> 87#include <linux/list.h>
88#include <linux/init.h> 88#include <linux/init.h>
89#include <linux/bio.h>
89#endif 90#endif
90#include <linux/log2.h> 91#include <linux/log2.h>
91 92
@@ -118,8 +119,8 @@ struct jbd2_revoke_table_s
118#ifdef __KERNEL__ 119#ifdef __KERNEL__
119static void write_one_revoke_record(journal_t *, transaction_t *, 120static void write_one_revoke_record(journal_t *, transaction_t *,
120 struct journal_head **, int *, 121 struct journal_head **, int *,
121 struct jbd2_revoke_record_s *); 122 struct jbd2_revoke_record_s *, int);
122static void flush_descriptor(journal_t *, struct journal_head *, int); 123static void flush_descriptor(journal_t *, struct journal_head *, int, int);
123#endif 124#endif
124 125
125/* Utility functions to maintain the revoke table */ 126/* Utility functions to maintain the revoke table */
@@ -499,7 +500,8 @@ void jbd2_journal_switch_revoke_table(journal_t *journal)
499 * revoke hash, deleting the entries as we go. 500 * revoke hash, deleting the entries as we go.
500 */ 501 */
501void jbd2_journal_write_revoke_records(journal_t *journal, 502void jbd2_journal_write_revoke_records(journal_t *journal,
502 transaction_t *transaction) 503 transaction_t *transaction,
504 int write_op)
503{ 505{
504 struct journal_head *descriptor; 506 struct journal_head *descriptor;
505 struct jbd2_revoke_record_s *record; 507 struct jbd2_revoke_record_s *record;
@@ -523,14 +525,14 @@ void jbd2_journal_write_revoke_records(journal_t *journal,
523 hash_list->next; 525 hash_list->next;
524 write_one_revoke_record(journal, transaction, 526 write_one_revoke_record(journal, transaction,
525 &descriptor, &offset, 527 &descriptor, &offset,
526 record); 528 record, write_op);
527 count++; 529 count++;
528 list_del(&record->hash); 530 list_del(&record->hash);
529 kmem_cache_free(jbd2_revoke_record_cache, record); 531 kmem_cache_free(jbd2_revoke_record_cache, record);
530 } 532 }
531 } 533 }
532 if (descriptor) 534 if (descriptor)
533 flush_descriptor(journal, descriptor, offset); 535 flush_descriptor(journal, descriptor, offset, write_op);
534 jbd_debug(1, "Wrote %d revoke records\n", count); 536 jbd_debug(1, "Wrote %d revoke records\n", count);
535} 537}
536 538
@@ -543,7 +545,8 @@ static void write_one_revoke_record(journal_t *journal,
543 transaction_t *transaction, 545 transaction_t *transaction,
544 struct journal_head **descriptorp, 546 struct journal_head **descriptorp,
545 int *offsetp, 547 int *offsetp,
546 struct jbd2_revoke_record_s *record) 548 struct jbd2_revoke_record_s *record,
549 int write_op)
547{ 550{
548 struct journal_head *descriptor; 551 struct journal_head *descriptor;
549 int offset; 552 int offset;
@@ -562,7 +565,7 @@ static void write_one_revoke_record(journal_t *journal,
562 /* Make sure we have a descriptor with space left for the record */ 565 /* Make sure we have a descriptor with space left for the record */
563 if (descriptor) { 566 if (descriptor) {
564 if (offset == journal->j_blocksize) { 567 if (offset == journal->j_blocksize) {
565 flush_descriptor(journal, descriptor, offset); 568 flush_descriptor(journal, descriptor, offset, write_op);
566 descriptor = NULL; 569 descriptor = NULL;
567 } 570 }
568 } 571 }
@@ -607,7 +610,7 @@ static void write_one_revoke_record(journal_t *journal,
607 610
608static void flush_descriptor(journal_t *journal, 611static void flush_descriptor(journal_t *journal,
609 struct journal_head *descriptor, 612 struct journal_head *descriptor,
610 int offset) 613 int offset, int write_op)
611{ 614{
612 jbd2_journal_revoke_header_t *header; 615 jbd2_journal_revoke_header_t *header;
613 struct buffer_head *bh = jh2bh(descriptor); 616 struct buffer_head *bh = jh2bh(descriptor);
@@ -622,7 +625,7 @@ static void flush_descriptor(journal_t *journal,
622 set_buffer_jwrite(bh); 625 set_buffer_jwrite(bh);
623 BUFFER_TRACE(bh, "write"); 626 BUFFER_TRACE(bh, "write");
624 set_buffer_dirty(bh); 627 set_buffer_dirty(bh);
625 ll_rw_block(SWRITE, 1, &bh); 628 ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh);
626} 629}
627#endif 630#endif
628 631
diff --git a/fs/namei.c b/fs/namei.c
index b8433ebfae05..78f253cd2d4f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1248,6 +1248,8 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1248 int err; 1248 int err;
1249 struct qstr this; 1249 struct qstr this;
1250 1250
1251 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1252
1251 err = __lookup_one_len(name, &this, base, len); 1253 err = __lookup_one_len(name, &this, base, len);
1252 if (err) 1254 if (err)
1253 return ERR_PTR(err); 1255 return ERR_PTR(err);
diff --git a/fs/namespace.c b/fs/namespace.c
index d9138f81ec10..41196209a906 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1377,7 +1377,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1377 if (parent_path) { 1377 if (parent_path) {
1378 detach_mnt(source_mnt, parent_path); 1378 detach_mnt(source_mnt, parent_path);
1379 attach_mnt(source_mnt, path); 1379 attach_mnt(source_mnt, path);
1380 touch_mnt_namespace(current->nsproxy->mnt_ns); 1380 touch_mnt_namespace(parent_path->mnt->mnt_ns);
1381 } else { 1381 } else {
1382 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); 1382 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
1383 commit_tree(source_mnt); 1383 commit_tree(source_mnt);
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index f54360f50a9c..fa038df63ac8 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -660,13 +660,10 @@ outrel:
660 if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN) 660 if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN)
661 return -ENOMEM; 661 return -ENOMEM;
662 if (user.object_name_len) { 662 if (user.object_name_len) {
663 newname = kmalloc(user.object_name_len, GFP_USER); 663 newname = memdup_user(user.object_name,
664 if (!newname) 664 user.object_name_len);
665 return -ENOMEM; 665 if (IS_ERR(newname))
666 if (copy_from_user(newname, user.object_name, user.object_name_len)) { 666 return PTR_ERR(newname);
667 kfree(newname);
668 return -EFAULT;
669 }
670 } else { 667 } else {
671 newname = NULL; 668 newname = NULL;
672 } 669 }
@@ -760,13 +757,9 @@ outrel:
760 if (user.len > NCP_PRIVATE_DATA_MAX_LEN) 757 if (user.len > NCP_PRIVATE_DATA_MAX_LEN)
761 return -ENOMEM; 758 return -ENOMEM;
762 if (user.len) { 759 if (user.len) {
763 new = kmalloc(user.len, GFP_USER); 760 new = memdup_user(user.data, user.len);
764 if (!new) 761 if (IS_ERR(new))
765 return -ENOMEM; 762 return PTR_ERR(new);
766 if (copy_from_user(new, user.data, user.len)) {
767 kfree(new);
768 return -EFAULT;
769 }
770 } else { 763 } else {
771 new = NULL; 764 new = NULL;
772 } 765 }
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index e6a1932c7110..35869a4921f1 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -713,7 +713,8 @@ nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p,
713 if (args->npages != 0) 713 if (args->npages != 0)
714 xdr_encode_pages(buf, args->pages, 0, args->len); 714 xdr_encode_pages(buf, args->pages, 0, args->len);
715 else 715 else
716 req->rq_slen += args->len; 716 req->rq_slen = xdr_adjust_iovec(req->rq_svec,
717 p + XDR_QUADLEN(args->len));
717 718
718 err = nfsacl_encode(buf, base, args->inode, 719 err = nfsacl_encode(buf, base, args->inode,
719 (args->mask & NFS_ACL) ? 720 (args->mask & NFS_ACL) ?
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 3444c0052a87..5275097a7565 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -229,21 +229,23 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
229 goto out; 229 goto out;
230 status = vfs_readdir(filp, nfsd4_build_namelist, &names); 230 status = vfs_readdir(filp, nfsd4_build_namelist, &names);
231 fput(filp); 231 fput(filp);
232 mutex_lock(&dir->d_inode->i_mutex);
232 while (!list_empty(&names)) { 233 while (!list_empty(&names)) {
233 entry = list_entry(names.next, struct name_list, list); 234 entry = list_entry(names.next, struct name_list, list);
234 235
235 dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); 236 dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
236 if (IS_ERR(dentry)) { 237 if (IS_ERR(dentry)) {
237 status = PTR_ERR(dentry); 238 status = PTR_ERR(dentry);
238 goto out; 239 break;
239 } 240 }
240 status = f(dir, dentry); 241 status = f(dir, dentry);
241 dput(dentry); 242 dput(dentry);
242 if (status) 243 if (status)
243 goto out; 244 break;
244 list_del(&entry->list); 245 list_del(&entry->list);
245 kfree(entry); 246 kfree(entry);
246 } 247 }
248 mutex_unlock(&dir->d_inode->i_mutex);
247out: 249out:
248 while (!list_empty(&names)) { 250 while (!list_empty(&names)) {
249 entry = list_entry(names.next, struct name_list, list); 251 entry = list_entry(names.next, struct name_list, list);
@@ -255,36 +257,6 @@ out:
255} 257}
256 258
257static int 259static int
258nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry)
259{
260 int status;
261
262 if (!S_ISREG(dir->d_inode->i_mode)) {
263 printk("nfsd4: non-file found in client recovery directory\n");
264 return -EINVAL;
265 }
266 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
267 status = vfs_unlink(dir->d_inode, dentry);
268 mutex_unlock(&dir->d_inode->i_mutex);
269 return status;
270}
271
272static int
273nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
274{
275 int status;
276
277 /* For now this directory should already be empty, but we empty it of
278 * any regular files anyway, just in case the directory was created by
279 * a kernel from the future.... */
280 nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
281 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
282 status = vfs_rmdir(dir->d_inode, dentry);
283 mutex_unlock(&dir->d_inode->i_mutex);
284 return status;
285}
286
287static int
288nfsd4_unlink_clid_dir(char *name, int namlen) 260nfsd4_unlink_clid_dir(char *name, int namlen)
289{ 261{
290 struct dentry *dentry; 262 struct dentry *dentry;
@@ -294,18 +266,18 @@ nfsd4_unlink_clid_dir(char *name, int namlen)
294 266
295 mutex_lock(&rec_dir.dentry->d_inode->i_mutex); 267 mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
296 dentry = lookup_one_len(name, rec_dir.dentry, namlen); 268 dentry = lookup_one_len(name, rec_dir.dentry, namlen);
297 mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
298 if (IS_ERR(dentry)) { 269 if (IS_ERR(dentry)) {
299 status = PTR_ERR(dentry); 270 status = PTR_ERR(dentry);
300 return status; 271 goto out_unlock;
301 } 272 }
302 status = -ENOENT; 273 status = -ENOENT;
303 if (!dentry->d_inode) 274 if (!dentry->d_inode)
304 goto out; 275 goto out;
305 276 status = vfs_rmdir(rec_dir.dentry->d_inode, dentry);
306 status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry);
307out: 277out:
308 dput(dentry); 278 dput(dentry);
279out_unlock:
280 mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
309 return status; 281 return status;
310} 282}
311 283
@@ -348,7 +320,7 @@ purge_old(struct dentry *parent, struct dentry *child)
348 if (nfs4_has_reclaimed_state(child->d_name.name, false)) 320 if (nfs4_has_reclaimed_state(child->d_name.name, false))
349 return 0; 321 return 0;
350 322
351 status = nfsd4_clear_clid_dir(parent, child); 323 status = vfs_rmdir(parent->d_inode, child);
352 if (status) 324 if (status)
353 printk("failed to remove client recovery directory %s\n", 325 printk("failed to remove client recovery directory %s\n",
354 child->d_name.name); 326 child->d_name.name);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ab93fcfef254..6c68ffd6b4bb 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -116,10 +116,15 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
116 } 116 }
117 if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { 117 if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
118 /* successfully crossed mount point */ 118 /* successfully crossed mount point */
119 exp_put(exp); 119 /*
120 *expp = exp2; 120 * This is subtle: dentry is *not* under mnt at this point.
121 * The only reason we are safe is that original mnt is pinned
122 * down by exp, so we should dput before putting exp.
123 */
121 dput(dentry); 124 dput(dentry);
122 *dpp = mounts; 125 *dpp = mounts;
126 exp_put(exp);
127 *expp = exp2;
123 } else { 128 } else {
124 exp_put(exp2); 129 exp_put(exp2);
125 dput(mounts); 130 dput(mounts);
@@ -1885,8 +1890,8 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen,
1885 return 0; 1890 return 0;
1886} 1891}
1887 1892
1888static int nfsd_buffered_readdir(struct file *file, filldir_t func, 1893static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
1889 struct readdir_cd *cdp, loff_t *offsetp) 1894 struct readdir_cd *cdp, loff_t *offsetp)
1890{ 1895{
1891 struct readdir_data buf; 1896 struct readdir_data buf;
1892 struct buffered_dirent *de; 1897 struct buffered_dirent *de;
@@ -1896,11 +1901,12 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func,
1896 1901
1897 buf.dirent = (void *)__get_free_page(GFP_KERNEL); 1902 buf.dirent = (void *)__get_free_page(GFP_KERNEL);
1898 if (!buf.dirent) 1903 if (!buf.dirent)
1899 return -ENOMEM; 1904 return nfserrno(-ENOMEM);
1900 1905
1901 offset = *offsetp; 1906 offset = *offsetp;
1902 1907
1903 while (1) { 1908 while (1) {
1909 struct inode *dir_inode = file->f_path.dentry->d_inode;
1904 unsigned int reclen; 1910 unsigned int reclen;
1905 1911
1906 cdp->err = nfserr_eof; /* will be cleared on successful read */ 1912 cdp->err = nfserr_eof; /* will be cleared on successful read */
@@ -1919,26 +1925,38 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func,
1919 if (!size) 1925 if (!size)
1920 break; 1926 break;
1921 1927
1928 /*
1929 * Various filldir functions may end up calling back into
1930 * lookup_one_len() and the file system's ->lookup() method.
1931 * These expect i_mutex to be held, as it would within readdir.
1932 */
1933 host_err = mutex_lock_killable(&dir_inode->i_mutex);
1934 if (host_err)
1935 break;
1936
1922 de = (struct buffered_dirent *)buf.dirent; 1937 de = (struct buffered_dirent *)buf.dirent;
1923 while (size > 0) { 1938 while (size > 0) {
1924 offset = de->offset; 1939 offset = de->offset;
1925 1940
1926 if (func(cdp, de->name, de->namlen, de->offset, 1941 if (func(cdp, de->name, de->namlen, de->offset,
1927 de->ino, de->d_type)) 1942 de->ino, de->d_type))
1928 goto done; 1943 break;
1929 1944
1930 if (cdp->err != nfs_ok) 1945 if (cdp->err != nfs_ok)
1931 goto done; 1946 break;
1932 1947
1933 reclen = ALIGN(sizeof(*de) + de->namlen, 1948 reclen = ALIGN(sizeof(*de) + de->namlen,
1934 sizeof(u64)); 1949 sizeof(u64));
1935 size -= reclen; 1950 size -= reclen;
1936 de = (struct buffered_dirent *)((char *)de + reclen); 1951 de = (struct buffered_dirent *)((char *)de + reclen);
1937 } 1952 }
1953 mutex_unlock(&dir_inode->i_mutex);
1954 if (size > 0) /* We bailed out early */
1955 break;
1956
1938 offset = vfs_llseek(file, 0, SEEK_CUR); 1957 offset = vfs_llseek(file, 0, SEEK_CUR);
1939 } 1958 }
1940 1959
1941 done:
1942 free_page((unsigned long)(buf.dirent)); 1960 free_page((unsigned long)(buf.dirent));
1943 1961
1944 if (host_err) 1962 if (host_err)
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index f75efa22df5e..81e4eb60972e 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -18,6 +18,9 @@
18#ifndef arch_irq_stat 18#ifndef arch_irq_stat
19#define arch_irq_stat() 0 19#define arch_irq_stat() 0
20#endif 20#endif
21#ifndef arch_idle_time
22#define arch_idle_time(cpu) 0
23#endif
21 24
22static int show_stat(struct seq_file *p, void *v) 25static int show_stat(struct seq_file *p, void *v)
23{ 26{
@@ -40,6 +43,7 @@ static int show_stat(struct seq_file *p, void *v)
40 nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); 43 nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice);
41 system = cputime64_add(system, kstat_cpu(i).cpustat.system); 44 system = cputime64_add(system, kstat_cpu(i).cpustat.system);
42 idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); 45 idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle);
46 idle = cputime64_add(idle, arch_idle_time(i));
43 iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); 47 iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait);
44 irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); 48 irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
45 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); 49 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
@@ -69,6 +73,7 @@ static int show_stat(struct seq_file *p, void *v)
69 nice = kstat_cpu(i).cpustat.nice; 73 nice = kstat_cpu(i).cpustat.nice;
70 system = kstat_cpu(i).cpustat.system; 74 system = kstat_cpu(i).cpustat.system;
71 idle = kstat_cpu(i).cpustat.idle; 75 idle = kstat_cpu(i).cpustat.idle;
76 idle = cputime64_add(idle, arch_idle_time(i));
72 iowait = kstat_cpu(i).cpustat.iowait; 77 iowait = kstat_cpu(i).cpustat.iowait;
73 irq = kstat_cpu(i).cpustat.irq; 78 irq = kstat_cpu(i).cpustat.irq;
74 softirq = kstat_cpu(i).cpustat.softirq; 79 softirq = kstat_cpu(i).cpustat.softirq;
diff --git a/fs/quota/Makefile b/fs/quota/Makefile
index 385a0831cc99..68d4f6dc0578 100644
--- a/fs/quota/Makefile
+++ b/fs/quota/Makefile
@@ -1,12 +1,3 @@
1#
2# Makefile for the Linux filesystems.
3#
4# 14 Sep 2000, Christoph Hellwig <hch@infradead.org>
5# Rewritten to use lists instead of if-statements.
6#
7
8obj-y :=
9
10obj-$(CONFIG_QUOTA) += dquot.o 1obj-$(CONFIG_QUOTA) += dquot.o
11obj-$(CONFIG_QFMT_V1) += quota_v1.o 2obj-$(CONFIG_QFMT_V1) += quota_v1.o
12obj-$(CONFIG_QFMT_V2) += quota_v2.o 3obj-$(CONFIG_QFMT_V2) += quota_v2.o
diff --git a/fs/romfs/internal.h b/fs/romfs/internal.h
index 06044a9dc62d..95217b830118 100644
--- a/fs/romfs/internal.h
+++ b/fs/romfs/internal.h
@@ -43,5 +43,5 @@ extern int romfs_dev_read(struct super_block *sb, unsigned long pos,
43 void *buf, size_t buflen); 43 void *buf, size_t buflen);
44extern ssize_t romfs_dev_strnlen(struct super_block *sb, 44extern ssize_t romfs_dev_strnlen(struct super_block *sb,
45 unsigned long pos, size_t maxlen); 45 unsigned long pos, size_t maxlen);
46extern int romfs_dev_strncmp(struct super_block *sb, unsigned long pos, 46extern int romfs_dev_strcmp(struct super_block *sb, unsigned long pos,
47 const char *str, size_t size); 47 const char *str, size_t size);
diff --git a/fs/romfs/storage.c b/fs/romfs/storage.c
index 7e3e1e12a081..b3208adf8e71 100644
--- a/fs/romfs/storage.c
+++ b/fs/romfs/storage.c
@@ -67,26 +67,35 @@ static ssize_t romfs_mtd_strnlen(struct super_block *sb,
67 * compare a string to one in a romfs image on MTD 67 * compare a string to one in a romfs image on MTD
68 * - return 1 if matched, 0 if differ, -ve if error 68 * - return 1 if matched, 0 if differ, -ve if error
69 */ 69 */
70static int romfs_mtd_strncmp(struct super_block *sb, unsigned long pos, 70static int romfs_mtd_strcmp(struct super_block *sb, unsigned long pos,
71 const char *str, size_t size) 71 const char *str, size_t size)
72{ 72{
73 u_char buf[16]; 73 u_char buf[17];
74 size_t len, segment; 74 size_t len, segment;
75 int ret; 75 int ret;
76 76
77 /* scan the string up to 16 bytes at a time */ 77 /* scan the string up to 16 bytes at a time, and attempt to grab the
78 * trailing NUL whilst we're at it */
79 buf[0] = 0xff;
80
78 while (size > 0) { 81 while (size > 0) {
79 segment = min_t(size_t, size, 16); 82 segment = min_t(size_t, size + 1, 17);
80 ret = ROMFS_MTD_READ(sb, pos, segment, &len, buf); 83 ret = ROMFS_MTD_READ(sb, pos, segment, &len, buf);
81 if (ret < 0) 84 if (ret < 0)
82 return ret; 85 return ret;
86 len--;
83 if (memcmp(buf, str, len) != 0) 87 if (memcmp(buf, str, len) != 0)
84 return 0; 88 return 0;
89 buf[0] = buf[len];
85 size -= len; 90 size -= len;
86 pos += len; 91 pos += len;
87 str += len; 92 str += len;
88 } 93 }
89 94
95 /* check the trailing NUL was */
96 if (buf[0])
97 return 0;
98
90 return 1; 99 return 1;
91} 100}
92#endif /* CONFIG_ROMFS_ON_MTD */ 101#endif /* CONFIG_ROMFS_ON_MTD */
@@ -111,6 +120,7 @@ static int romfs_blk_read(struct super_block *sb, unsigned long pos,
111 return -EIO; 120 return -EIO;
112 memcpy(buf, bh->b_data + offset, segment); 121 memcpy(buf, bh->b_data + offset, segment);
113 brelse(bh); 122 brelse(bh);
123 buf += segment;
114 buflen -= segment; 124 buflen -= segment;
115 pos += segment; 125 pos += segment;
116 } 126 }
@@ -154,28 +164,48 @@ static ssize_t romfs_blk_strnlen(struct super_block *sb,
154 * compare a string to one in a romfs image on a block device 164 * compare a string to one in a romfs image on a block device
155 * - return 1 if matched, 0 if differ, -ve if error 165 * - return 1 if matched, 0 if differ, -ve if error
156 */ 166 */
157static int romfs_blk_strncmp(struct super_block *sb, unsigned long pos, 167static int romfs_blk_strcmp(struct super_block *sb, unsigned long pos,
158 const char *str, size_t size) 168 const char *str, size_t size)
159{ 169{
160 struct buffer_head *bh; 170 struct buffer_head *bh;
161 unsigned long offset; 171 unsigned long offset;
162 size_t segment; 172 size_t segment;
163 bool x; 173 bool matched, terminated = false;
164 174
165 /* scan the string up to 16 bytes at a time */ 175 /* compare string up to a block at a time */
166 while (size > 0) { 176 while (size > 0) {
167 offset = pos & (ROMBSIZE - 1); 177 offset = pos & (ROMBSIZE - 1);
168 segment = min_t(size_t, size, ROMBSIZE - offset); 178 segment = min_t(size_t, size, ROMBSIZE - offset);
169 bh = sb_bread(sb, pos >> ROMBSBITS); 179 bh = sb_bread(sb, pos >> ROMBSBITS);
170 if (!bh) 180 if (!bh)
171 return -EIO; 181 return -EIO;
172 x = (memcmp(bh->b_data + offset, str, segment) != 0); 182 matched = (memcmp(bh->b_data + offset, str, segment) == 0);
173 brelse(bh); 183
174 if (x)
175 return 0;
176 size -= segment; 184 size -= segment;
177 pos += segment; 185 pos += segment;
178 str += segment; 186 str += segment;
187 if (matched && size == 0 && offset + segment < ROMBSIZE) {
188 if (!bh->b_data[offset + segment])
189 terminated = true;
190 else
191 matched = false;
192 }
193 brelse(bh);
194 if (!matched)
195 return 0;
196 }
197
198 if (!terminated) {
199 /* the terminating NUL must be on the first byte of the next
200 * block */
201 BUG_ON((pos & (ROMBSIZE - 1)) != 0);
202 bh = sb_bread(sb, pos >> ROMBSBITS);
203 if (!bh)
204 return -EIO;
205 matched = !bh->b_data[0];
206 brelse(bh);
207 if (!matched)
208 return 0;
179 } 209 }
180 210
181 return 1; 211 return 1;
@@ -234,10 +264,12 @@ ssize_t romfs_dev_strnlen(struct super_block *sb,
234 264
235/* 265/*
236 * compare a string to one in romfs 266 * compare a string to one in romfs
267 * - the string to be compared to, str, may not be NUL-terminated; instead the
268 * string is of the specified size
237 * - return 1 if matched, 0 if differ, -ve if error 269 * - return 1 if matched, 0 if differ, -ve if error
238 */ 270 */
239int romfs_dev_strncmp(struct super_block *sb, unsigned long pos, 271int romfs_dev_strcmp(struct super_block *sb, unsigned long pos,
240 const char *str, size_t size) 272 const char *str, size_t size)
241{ 273{
242 size_t limit; 274 size_t limit;
243 275
@@ -246,16 +278,16 @@ int romfs_dev_strncmp(struct super_block *sb, unsigned long pos,
246 return -EIO; 278 return -EIO;
247 if (size > ROMFS_MAXFN) 279 if (size > ROMFS_MAXFN)
248 return -ENAMETOOLONG; 280 return -ENAMETOOLONG;
249 if (size > limit - pos) 281 if (size + 1 > limit - pos)
250 return -EIO; 282 return -EIO;
251 283
252#ifdef CONFIG_ROMFS_ON_MTD 284#ifdef CONFIG_ROMFS_ON_MTD
253 if (sb->s_mtd) 285 if (sb->s_mtd)
254 return romfs_mtd_strncmp(sb, pos, str, size); 286 return romfs_mtd_strcmp(sb, pos, str, size);
255#endif 287#endif
256#ifdef CONFIG_ROMFS_ON_BLOCK 288#ifdef CONFIG_ROMFS_ON_BLOCK
257 if (sb->s_bdev) 289 if (sb->s_bdev)
258 return romfs_blk_strncmp(sb, pos, str, size); 290 return romfs_blk_strcmp(sb, pos, str, size);
259#endif 291#endif
260 return -EIO; 292 return -EIO;
261} 293}
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 10ca7d984a8b..c53b5ef8a02f 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -240,8 +240,8 @@ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry,
240 goto error; 240 goto error;
241 241
242 /* try to match the first 16 bytes of name */ 242 /* try to match the first 16 bytes of name */
243 ret = romfs_dev_strncmp(dir->i_sb, offset + ROMFH_SIZE, name, 243 ret = romfs_dev_strcmp(dir->i_sb, offset + ROMFH_SIZE, name,
244 len); 244 len);
245 if (ret < 0) 245 if (ret < 0)
246 goto error; 246 goto error;
247 if (ret == 1) 247 if (ret == 1)
diff --git a/fs/stat.c b/fs/stat.c
index 2db740a0cfb5..075694e31d8b 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -55,59 +55,54 @@ int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
55 55
56EXPORT_SYMBOL(vfs_getattr); 56EXPORT_SYMBOL(vfs_getattr);
57 57
58int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat) 58int vfs_fstat(unsigned int fd, struct kstat *stat)
59{ 59{
60 struct path path; 60 struct file *f = fget(fd);
61 int error; 61 int error = -EBADF;
62 62
63 error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path); 63 if (f) {
64 if (!error) { 64 error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
65 error = vfs_getattr(path.mnt, path.dentry, stat); 65 fput(f);
66 path_put(&path);
67 } 66 }
68 return error; 67 return error;
69} 68}
69EXPORT_SYMBOL(vfs_fstat);
70 70
71int vfs_stat(char __user *name, struct kstat *stat) 71int vfs_fstatat(int dfd, char __user *filename, struct kstat *stat, int flag)
72{ 72{
73 return vfs_stat_fd(AT_FDCWD, name, stat); 73 struct path path;
74} 74 int error = -EINVAL;
75 int lookup_flags = 0;
75 76
76EXPORT_SYMBOL(vfs_stat); 77 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
78 goto out;
77 79
78int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat) 80 if (!(flag & AT_SYMLINK_NOFOLLOW))
79{ 81 lookup_flags |= LOOKUP_FOLLOW;
80 struct path path;
81 int error;
82 82
83 error = user_path_at(dfd, name, 0, &path); 83 error = user_path_at(dfd, filename, lookup_flags, &path);
84 if (!error) { 84 if (error)
85 error = vfs_getattr(path.mnt, path.dentry, stat); 85 goto out;
86 path_put(&path); 86
87 } 87 error = vfs_getattr(path.mnt, path.dentry, stat);
88 path_put(&path);
89out:
88 return error; 90 return error;
89} 91}
92EXPORT_SYMBOL(vfs_fstatat);
90 93
91int vfs_lstat(char __user *name, struct kstat *stat) 94int vfs_stat(char __user *name, struct kstat *stat)
92{ 95{
93 return vfs_lstat_fd(AT_FDCWD, name, stat); 96 return vfs_fstatat(AT_FDCWD, name, stat, 0);
94} 97}
98EXPORT_SYMBOL(vfs_stat);
95 99
96EXPORT_SYMBOL(vfs_lstat); 100int vfs_lstat(char __user *name, struct kstat *stat)
97
98int vfs_fstat(unsigned int fd, struct kstat *stat)
99{ 101{
100 struct file *f = fget(fd); 102 return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW);
101 int error = -EBADF;
102
103 if (f) {
104 error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat);
105 fput(f);
106 }
107 return error;
108} 103}
104EXPORT_SYMBOL(vfs_lstat);
109 105
110EXPORT_SYMBOL(vfs_fstat);
111 106
112#ifdef __ARCH_WANT_OLD_STAT 107#ifdef __ARCH_WANT_OLD_STAT
113 108
@@ -155,23 +150,25 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta
155SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) 150SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf)
156{ 151{
157 struct kstat stat; 152 struct kstat stat;
158 int error = vfs_stat_fd(AT_FDCWD, filename, &stat); 153 int error;
159 154
160 if (!error) 155 error = vfs_stat(filename, &stat);
161 error = cp_old_stat(&stat, statbuf); 156 if (error)
157 return error;
162 158
163 return error; 159 return cp_old_stat(&stat, statbuf);
164} 160}
165 161
166SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) 162SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf)
167{ 163{
168 struct kstat stat; 164 struct kstat stat;
169 int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); 165 int error;
170 166
171 if (!error) 167 error = vfs_lstat(filename, &stat);
172 error = cp_old_stat(&stat, statbuf); 168 if (error)
169 return error;
173 170
174 return error; 171 return cp_old_stat(&stat, statbuf);
175} 172}
176 173
177SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf) 174SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf)
@@ -240,23 +237,23 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
240SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf) 237SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf)
241{ 238{
242 struct kstat stat; 239 struct kstat stat;
243 int error = vfs_stat_fd(AT_FDCWD, filename, &stat); 240 int error = vfs_stat(filename, &stat);
244
245 if (!error)
246 error = cp_new_stat(&stat, statbuf);
247 241
248 return error; 242 if (error)
243 return error;
244 return cp_new_stat(&stat, statbuf);
249} 245}
250 246
251SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf) 247SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf)
252{ 248{
253 struct kstat stat; 249 struct kstat stat;
254 int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); 250 int error;
255 251
256 if (!error) 252 error = vfs_lstat(filename, &stat);
257 error = cp_new_stat(&stat, statbuf); 253 if (error)
254 return error;
258 255
259 return error; 256 return cp_new_stat(&stat, statbuf);
260} 257}
261 258
262#if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) 259#if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT)
@@ -264,21 +261,12 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, char __user *, filename,
264 struct stat __user *, statbuf, int, flag) 261 struct stat __user *, statbuf, int, flag)
265{ 262{
266 struct kstat stat; 263 struct kstat stat;
267 int error = -EINVAL; 264 int error;
268
269 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
270 goto out;
271
272 if (flag & AT_SYMLINK_NOFOLLOW)
273 error = vfs_lstat_fd(dfd, filename, &stat);
274 else
275 error = vfs_stat_fd(dfd, filename, &stat);
276
277 if (!error)
278 error = cp_new_stat(&stat, statbuf);
279 265
280out: 266 error = vfs_fstatat(dfd, filename, &stat, flag);
281 return error; 267 if (error)
268 return error;
269 return cp_new_stat(&stat, statbuf);
282} 270}
283#endif 271#endif
284 272
@@ -404,21 +392,12 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename,
404 struct stat64 __user *, statbuf, int, flag) 392 struct stat64 __user *, statbuf, int, flag)
405{ 393{
406 struct kstat stat; 394 struct kstat stat;
407 int error = -EINVAL; 395 int error;
408
409 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
410 goto out;
411
412 if (flag & AT_SYMLINK_NOFOLLOW)
413 error = vfs_lstat_fd(dfd, filename, &stat);
414 else
415 error = vfs_stat_fd(dfd, filename, &stat);
416
417 if (!error)
418 error = cp_new_stat64(&stat, statbuf);
419 396
420out: 397 error = vfs_fstatat(dfd, filename, &stat, flag);
421 return error; 398 if (error)
399 return error;
400 return cp_new_stat64(&stat, statbuf);
422} 401}
423#endif /* __ARCH_WANT_STAT64 */ 402#endif /* __ARCH_WANT_STAT64 */
424 403
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 93e0c0281d45..9345806c8853 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -157,14 +157,9 @@ static ssize_t write(struct file *file, const char __user *userbuf,
157 count = size - offs; 157 count = size - offs;
158 } 158 }
159 159
160 temp = kmalloc(count, GFP_KERNEL); 160 temp = memdup_user(userbuf, count);
161 if (!temp) 161 if (IS_ERR(temp))
162 return -ENOMEM; 162 return PTR_ERR(temp);
163
164 if (copy_from_user(temp, userbuf, count)) {
165 count = -EFAULT;
166 goto out_free;
167 }
168 163
169 mutex_lock(&bb->mutex); 164 mutex_lock(&bb->mutex);
170 165
@@ -176,8 +171,6 @@ static ssize_t write(struct file *file, const char __user *userbuf,
176 if (count > 0) 171 if (count > 0)
177 *off = offs + count; 172 *off = offs + count;
178 173
179out_free:
180 kfree(temp);
181 return count; 174 return count;
182} 175}
183 176
diff --git a/fs/xattr.c b/fs/xattr.c
index 197c4fcac032..d51b8f9db921 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -237,13 +237,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
237 if (size) { 237 if (size) {
238 if (size > XATTR_SIZE_MAX) 238 if (size > XATTR_SIZE_MAX)
239 return -E2BIG; 239 return -E2BIG;
240 kvalue = kmalloc(size, GFP_KERNEL); 240 kvalue = memdup_user(value, size);
241 if (!kvalue) 241 if (IS_ERR(kvalue))
242 return -ENOMEM; 242 return PTR_ERR(kvalue);
243 if (copy_from_user(kvalue, value, size)) {
244 kfree(kvalue);
245 return -EFAULT;
246 }
247 } 243 }
248 244
249 error = vfs_setxattr(d, kname, kvalue, size, flags); 245 error = vfs_setxattr(d, kname, kvalue, size, flags);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index d0b499418a7d..34eaab608e6e 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -489,17 +489,12 @@ xfs_attrmulti_attr_set(
489 if (len > XATTR_SIZE_MAX) 489 if (len > XATTR_SIZE_MAX)
490 return EINVAL; 490 return EINVAL;
491 491
492 kbuf = kmalloc(len, GFP_KERNEL); 492 kbuf = memdup_user(ubuf, len);
493 if (!kbuf) 493 if (IS_ERR(kbuf))
494 return ENOMEM; 494 return PTR_ERR(kbuf);
495
496 if (copy_from_user(kbuf, ubuf, len))
497 goto out_kfree;
498 495
499 error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); 496 error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
500 497
501 out_kfree:
502 kfree(kbuf);
503 return error; 498 return error;
504} 499}
505 500
@@ -540,20 +535,16 @@ xfs_attrmulti_by_handle(
540 if (!size || size > 16 * PAGE_SIZE) 535 if (!size || size > 16 * PAGE_SIZE)
541 goto out_dput; 536 goto out_dput;
542 537
543 error = ENOMEM; 538 ops = memdup_user(am_hreq.ops, size);
544 ops = kmalloc(size, GFP_KERNEL); 539 if (IS_ERR(ops)) {
545 if (!ops) 540 error = PTR_ERR(ops);
546 goto out_dput; 541 goto out_dput;
547 542 }
548 error = EFAULT;
549 if (copy_from_user(ops, am_hreq.ops, size))
550 goto out_kfree_ops;
551 543
552 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); 544 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
553 if (!attr_name) 545 if (!attr_name)
554 goto out_kfree_ops; 546 goto out_kfree_ops;
555 547
556
557 error = 0; 548 error = 0;
558 for (i = 0; i < am_hreq.opcount; i++) { 549 for (i = 0; i < am_hreq.opcount; i++) {
559 ops[i].am_error = strncpy_from_user(attr_name, 550 ops[i].am_error = strncpy_from_user(attr_name,
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index c70c4e3db790..0882d166239a 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -427,20 +427,16 @@ xfs_compat_attrmulti_by_handle(
427 if (!size || size > 16 * PAGE_SIZE) 427 if (!size || size > 16 * PAGE_SIZE)
428 goto out_dput; 428 goto out_dput;
429 429
430 error = ENOMEM; 430 ops = memdup_user(compat_ptr(am_hreq.ops), size);
431 ops = kmalloc(size, GFP_KERNEL); 431 if (IS_ERR(ops)) {
432 if (!ops) 432 error = PTR_ERR(ops);
433 goto out_dput; 433 goto out_dput;
434 434 }
435 error = EFAULT;
436 if (copy_from_user(ops, compat_ptr(am_hreq.ops), size))
437 goto out_kfree_ops;
438 435
439 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); 436 attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
440 if (!attr_name) 437 if (!attr_name)
441 goto out_kfree_ops; 438 goto out_kfree_ops;
442 439
443
444 error = 0; 440 error = 0;
445 for (i = 0; i < am_hreq.opcount; i++) { 441 for (i = 0; i < am_hreq.opcount; i++) {
446 ops[i].am_error = strncpy_from_user(attr_name, 442 ops[i].am_error = strncpy_from_user(attr_name,