diff options
author | Steve French <sfrench@us.ibm.com> | 2009-04-30 11:36:52 -0400 |
---|---|---|
committer | Steve French <sfrench@us.ibm.com> | 2009-04-30 11:36:52 -0400 |
commit | 912bc6ae3de99c7bada4577d4341ace4ee59b5be (patch) | |
tree | 28fd1a4bb9e4b05aa833285b46df169f12c0e24d /fs | |
parent | d37dc42ab6f040b8f0f2962ab219c5b2accf748d (diff) | |
parent | 091438dd5668396328a3419abcbc6591159eb8d1 (diff) |
Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs')
66 files changed, 1047 insertions, 1009 deletions
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c index bf8c8af98004..4eb4d8dfb2f1 100644 --- a/fs/autofs/dirhash.c +++ b/fs/autofs/dirhash.c | |||
@@ -39,10 +39,12 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, | |||
39 | { | 39 | { |
40 | struct autofs_dirhash *dh = &sbi->dirhash; | 40 | struct autofs_dirhash *dh = &sbi->dirhash; |
41 | struct autofs_dir_ent *ent; | 41 | struct autofs_dir_ent *ent; |
42 | struct dentry *dentry; | ||
43 | unsigned long timeout = sbi->exp_timeout; | 42 | unsigned long timeout = sbi->exp_timeout; |
44 | 43 | ||
45 | while (1) { | 44 | while (1) { |
45 | struct path path; | ||
46 | int umount_ok; | ||
47 | |||
46 | if ( list_empty(&dh->expiry_head) || sbi->catatonic ) | 48 | if ( list_empty(&dh->expiry_head) || sbi->catatonic ) |
47 | return NULL; /* No entries */ | 49 | return NULL; /* No entries */ |
48 | /* We keep the list sorted by last_usage and want old stuff */ | 50 | /* We keep the list sorted by last_usage and want old stuff */ |
@@ -57,17 +59,17 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, | |||
57 | return ent; /* Symlinks are always expirable */ | 59 | return ent; /* Symlinks are always expirable */ |
58 | 60 | ||
59 | /* Get the dentry for the autofs subdirectory */ | 61 | /* Get the dentry for the autofs subdirectory */ |
60 | dentry = ent->dentry; | 62 | path.dentry = ent->dentry; |
61 | 63 | ||
62 | if ( !dentry ) { | 64 | if (!path.dentry) { |
63 | /* Should only happen in catatonic mode */ | 65 | /* Should only happen in catatonic mode */ |
64 | printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name); | 66 | printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name); |
65 | autofs_delete_usage(ent); | 67 | autofs_delete_usage(ent); |
66 | continue; | 68 | continue; |
67 | } | 69 | } |
68 | 70 | ||
69 | if ( !dentry->d_inode ) { | 71 | if (!path.dentry->d_inode) { |
70 | dput(dentry); | 72 | dput(path.dentry); |
71 | printk("autofs: negative dentry on expiry queue: %s\n", | 73 | printk("autofs: negative dentry on expiry queue: %s\n", |
72 | ent->name); | 74 | ent->name); |
73 | autofs_delete_usage(ent); | 75 | autofs_delete_usage(ent); |
@@ -76,29 +78,29 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, | |||
76 | 78 | ||
77 | /* Make sure entry is mounted and unused; note that dentry will | 79 | /* Make sure entry is mounted and unused; note that dentry will |
78 | point to the mounted-on-top root. */ | 80 | point to the mounted-on-top root. */ |
79 | if (!S_ISDIR(dentry->d_inode->i_mode)||!d_mountpoint(dentry)) { | 81 | if (!S_ISDIR(path.dentry->d_inode->i_mode) || |
82 | !d_mountpoint(path.dentry)) { | ||
80 | DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); | 83 | DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); |
81 | continue; | 84 | continue; |
82 | } | 85 | } |
83 | mntget(mnt); | 86 | path.mnt = mnt; |
84 | dget(dentry); | 87 | path_get(&path); |
85 | if (!follow_down(&mnt, &dentry)) { | 88 | if (!follow_down(&path.mnt, &path.dentry)) { |
86 | dput(dentry); | 89 | path_put(&path); |
87 | mntput(mnt); | ||
88 | DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); | 90 | DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); |
89 | continue; | 91 | continue; |
90 | } | 92 | } |
91 | while (d_mountpoint(dentry) && follow_down(&mnt, &dentry)) | 93 | while (d_mountpoint(path.dentry) && |
94 | follow_down(&path.mnt, &path.dentry)) | ||
92 | ; | 95 | ; |
93 | dput(dentry); | 96 | umount_ok = may_umount(path.mnt); |
97 | path_put(&path); | ||
94 | 98 | ||
95 | if ( may_umount(mnt) ) { | 99 | if (umount_ok) { |
96 | mntput(mnt); | ||
97 | DPRINTK(("autofs: signaling expire on %s\n", ent->name)); | 100 | DPRINTK(("autofs: signaling expire on %s\n", ent->name)); |
98 | return ent; /* Expirable! */ | 101 | return ent; /* Expirable! */ |
99 | } | 102 | } |
100 | DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name)); | 103 | DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name)); |
101 | mntput(mnt); | ||
102 | } | 104 | } |
103 | return NULL; /* No expirable entries */ | 105 | return NULL; /* No expirable entries */ |
104 | } | 106 | } |
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 9e5ae8a4f5c8..84168c0dcc2d 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c | |||
@@ -54,11 +54,10 @@ static int check_name(const char *name) | |||
54 | * Check a string doesn't overrun the chunk of | 54 | * Check a string doesn't overrun the chunk of |
55 | * memory we copied from user land. | 55 | * memory we copied from user land. |
56 | */ | 56 | */ |
57 | static int invalid_str(char *str, void *end) | 57 | static int invalid_str(char *str, size_t size) |
58 | { | 58 | { |
59 | while ((void *) str <= end) | 59 | if (memchr(str, 0, size)) |
60 | if (!*str++) | 60 | return 0; |
61 | return 0; | ||
62 | return -EINVAL; | 61 | return -EINVAL; |
63 | } | 62 | } |
64 | 63 | ||
@@ -138,8 +137,7 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) | |||
138 | } | 137 | } |
139 | 138 | ||
140 | if (param->size > sizeof(*param)) { | 139 | if (param->size > sizeof(*param)) { |
141 | err = invalid_str(param->path, | 140 | err = invalid_str(param->path, param->size - sizeof(*param)); |
142 | (void *) ((size_t) param + param->size)); | ||
143 | if (err) { | 141 | if (err) { |
144 | AUTOFS_WARN( | 142 | AUTOFS_WARN( |
145 | "path string terminator missing for cmd(0x%08x)", | 143 | "path string terminator missing for cmd(0x%08x)", |
@@ -488,7 +486,7 @@ static int autofs_dev_ioctl_requester(struct file *fp, | |||
488 | } | 486 | } |
489 | 487 | ||
490 | path = param->path; | 488 | path = param->path; |
491 | devid = sbi->sb->s_dev; | 489 | devid = new_encode_dev(sbi->sb->s_dev); |
492 | 490 | ||
493 | param->requester.uid = param->requester.gid = -1; | 491 | param->requester.uid = param->requester.gid = -1; |
494 | 492 | ||
@@ -175,14 +175,6 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, | |||
175 | struct bio_vec *bvl; | 175 | struct bio_vec *bvl; |
176 | 176 | ||
177 | /* | 177 | /* |
178 | * If 'bs' is given, lookup the pool and do the mempool alloc. | ||
179 | * If not, this is a bio_kmalloc() allocation and just do a | ||
180 | * kzalloc() for the exact number of vecs right away. | ||
181 | */ | ||
182 | if (!bs) | ||
183 | bvl = kmalloc(nr * sizeof(struct bio_vec), gfp_mask); | ||
184 | |||
185 | /* | ||
186 | * see comment near bvec_array define! | 178 | * see comment near bvec_array define! |
187 | */ | 179 | */ |
188 | switch (nr) { | 180 | switch (nr) { |
@@ -260,21 +252,6 @@ void bio_free(struct bio *bio, struct bio_set *bs) | |||
260 | mempool_free(p, bs->bio_pool); | 252 | mempool_free(p, bs->bio_pool); |
261 | } | 253 | } |
262 | 254 | ||
263 | /* | ||
264 | * default destructor for a bio allocated with bio_alloc_bioset() | ||
265 | */ | ||
266 | static void bio_fs_destructor(struct bio *bio) | ||
267 | { | ||
268 | bio_free(bio, fs_bio_set); | ||
269 | } | ||
270 | |||
271 | static void bio_kmalloc_destructor(struct bio *bio) | ||
272 | { | ||
273 | if (bio_has_allocated_vec(bio)) | ||
274 | kfree(bio->bi_io_vec); | ||
275 | kfree(bio); | ||
276 | } | ||
277 | |||
278 | void bio_init(struct bio *bio) | 255 | void bio_init(struct bio *bio) |
279 | { | 256 | { |
280 | memset(bio, 0, sizeof(*bio)); | 257 | memset(bio, 0, sizeof(*bio)); |
@@ -301,21 +278,15 @@ void bio_init(struct bio *bio) | |||
301 | **/ | 278 | **/ |
302 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | 279 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) |
303 | { | 280 | { |
281 | unsigned long idx = BIO_POOL_NONE; | ||
304 | struct bio_vec *bvl = NULL; | 282 | struct bio_vec *bvl = NULL; |
305 | struct bio *bio = NULL; | 283 | struct bio *bio; |
306 | unsigned long idx = 0; | 284 | void *p; |
307 | void *p = NULL; | 285 | |
308 | 286 | p = mempool_alloc(bs->bio_pool, gfp_mask); | |
309 | if (bs) { | 287 | if (unlikely(!p)) |
310 | p = mempool_alloc(bs->bio_pool, gfp_mask); | 288 | return NULL; |
311 | if (!p) | 289 | bio = p + bs->front_pad; |
312 | goto err; | ||
313 | bio = p + bs->front_pad; | ||
314 | } else { | ||
315 | bio = kmalloc(sizeof(*bio), gfp_mask); | ||
316 | if (!bio) | ||
317 | goto err; | ||
318 | } | ||
319 | 290 | ||
320 | bio_init(bio); | 291 | bio_init(bio); |
321 | 292 | ||
@@ -332,22 +303,50 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | |||
332 | 303 | ||
333 | nr_iovecs = bvec_nr_vecs(idx); | 304 | nr_iovecs = bvec_nr_vecs(idx); |
334 | } | 305 | } |
306 | out_set: | ||
335 | bio->bi_flags |= idx << BIO_POOL_OFFSET; | 307 | bio->bi_flags |= idx << BIO_POOL_OFFSET; |
336 | bio->bi_max_vecs = nr_iovecs; | 308 | bio->bi_max_vecs = nr_iovecs; |
337 | out_set: | ||
338 | bio->bi_io_vec = bvl; | 309 | bio->bi_io_vec = bvl; |
339 | |||
340 | return bio; | 310 | return bio; |
341 | 311 | ||
342 | err_free: | 312 | err_free: |
343 | if (bs) | 313 | mempool_free(p, bs->bio_pool); |
344 | mempool_free(p, bs->bio_pool); | ||
345 | else | ||
346 | kfree(bio); | ||
347 | err: | ||
348 | return NULL; | 314 | return NULL; |
349 | } | 315 | } |
350 | 316 | ||
317 | static void bio_fs_destructor(struct bio *bio) | ||
318 | { | ||
319 | bio_free(bio, fs_bio_set); | ||
320 | } | ||
321 | |||
322 | /** | ||
323 | * bio_alloc - allocate a new bio, memory pool backed | ||
324 | * @gfp_mask: allocation mask to use | ||
325 | * @nr_iovecs: number of iovecs | ||
326 | * | ||
327 | * Allocate a new bio with @nr_iovecs bvecs. If @gfp_mask | ||
328 | * contains __GFP_WAIT, the allocation is guaranteed to succeed. | ||
329 | * | ||
330 | * RETURNS: | ||
331 | * Pointer to new bio on success, NULL on failure. | ||
332 | */ | ||
333 | struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) | ||
334 | { | ||
335 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); | ||
336 | |||
337 | if (bio) | ||
338 | bio->bi_destructor = bio_fs_destructor; | ||
339 | |||
340 | return bio; | ||
341 | } | ||
342 | |||
343 | static void bio_kmalloc_destructor(struct bio *bio) | ||
344 | { | ||
345 | if (bio_integrity(bio)) | ||
346 | bio_integrity_free(bio); | ||
347 | kfree(bio); | ||
348 | } | ||
349 | |||
351 | /** | 350 | /** |
352 | * bio_alloc - allocate a bio for I/O | 351 | * bio_alloc - allocate a bio for I/O |
353 | * @gfp_mask: the GFP_ mask given to the slab allocator | 352 | * @gfp_mask: the GFP_ mask given to the slab allocator |
@@ -366,29 +365,20 @@ err: | |||
366 | * do so can cause livelocks under memory pressure. | 365 | * do so can cause livelocks under memory pressure. |
367 | * | 366 | * |
368 | **/ | 367 | **/ |
369 | struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) | ||
370 | { | ||
371 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); | ||
372 | |||
373 | if (bio) | ||
374 | bio->bi_destructor = bio_fs_destructor; | ||
375 | |||
376 | return bio; | ||
377 | } | ||
378 | |||
379 | /* | ||
380 | * Like bio_alloc(), but doesn't use a mempool backing. This means that | ||
381 | * it CAN fail, but while bio_alloc() can only be used for allocations | ||
382 | * that have a short (finite) life span, bio_kmalloc() should be used | ||
383 | * for more permanent bio allocations (like allocating some bio's for | ||
384 | * initalization or setup purposes). | ||
385 | */ | ||
386 | struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) | 368 | struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) |
387 | { | 369 | { |
388 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); | 370 | struct bio *bio; |
389 | 371 | ||
390 | if (bio) | 372 | bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec), |
391 | bio->bi_destructor = bio_kmalloc_destructor; | 373 | gfp_mask); |
374 | if (unlikely(!bio)) | ||
375 | return NULL; | ||
376 | |||
377 | bio_init(bio); | ||
378 | bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET; | ||
379 | bio->bi_max_vecs = nr_iovecs; | ||
380 | bio->bi_io_vec = bio->bi_inline_vecs; | ||
381 | bio->bi_destructor = bio_kmalloc_destructor; | ||
392 | 382 | ||
393 | return bio; | 383 | return bio; |
394 | } | 384 | } |
@@ -827,12 +817,15 @@ struct bio *bio_copy_user_iov(struct request_queue *q, | |||
827 | len += iov[i].iov_len; | 817 | len += iov[i].iov_len; |
828 | } | 818 | } |
829 | 819 | ||
820 | if (offset) | ||
821 | nr_pages++; | ||
822 | |||
830 | bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); | 823 | bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); |
831 | if (!bmd) | 824 | if (!bmd) |
832 | return ERR_PTR(-ENOMEM); | 825 | return ERR_PTR(-ENOMEM); |
833 | 826 | ||
834 | ret = -ENOMEM; | 827 | ret = -ENOMEM; |
835 | bio = bio_alloc(gfp_mask, nr_pages); | 828 | bio = bio_kmalloc(gfp_mask, nr_pages); |
836 | if (!bio) | 829 | if (!bio) |
837 | goto out_bmd; | 830 | goto out_bmd; |
838 | 831 | ||
@@ -956,7 +949,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, | |||
956 | if (!nr_pages) | 949 | if (!nr_pages) |
957 | return ERR_PTR(-EINVAL); | 950 | return ERR_PTR(-EINVAL); |
958 | 951 | ||
959 | bio = bio_alloc(gfp_mask, nr_pages); | 952 | bio = bio_kmalloc(gfp_mask, nr_pages); |
960 | if (!bio) | 953 | if (!bio) |
961 | return ERR_PTR(-ENOMEM); | 954 | return ERR_PTR(-ENOMEM); |
962 | 955 | ||
@@ -1140,7 +1133,7 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data, | |||
1140 | int offset, i; | 1133 | int offset, i; |
1141 | struct bio *bio; | 1134 | struct bio *bio; |
1142 | 1135 | ||
1143 | bio = bio_alloc(gfp_mask, nr_pages); | 1136 | bio = bio_kmalloc(gfp_mask, nr_pages); |
1144 | if (!bio) | 1137 | if (!bio) |
1145 | return ERR_PTR(-ENOMEM); | 1138 | return ERR_PTR(-ENOMEM); |
1146 | 1139 | ||
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9adf5e4f7e96..94212844a9bc 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -1,25 +1,10 @@ | |||
1 | ifneq ($(KERNELRELEASE),) | ||
2 | # kbuild part of makefile | ||
3 | 1 | ||
4 | obj-$(CONFIG_BTRFS_FS) := btrfs.o | 2 | obj-$(CONFIG_BTRFS_FS) := btrfs.o |
5 | btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | 3 | |
4 | btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | ||
6 | file-item.o inode-item.o inode-map.o disk-io.o \ | 5 | file-item.o inode-item.o inode-map.o disk-io.o \ |
7 | transaction.o inode.o file.o tree-defrag.o \ | 6 | transaction.o inode.o file.o tree-defrag.o \ |
8 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
9 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
10 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ | 9 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ |
11 | compression.o delayed-ref.o | 10 | compression.o delayed-ref.o |
12 | else | ||
13 | |||
14 | # Normal Makefile | ||
15 | |||
16 | KERNELDIR := /lib/modules/`uname -r`/build | ||
17 | all: | ||
18 | $(MAKE) -C $(KERNELDIR) M=`pwd` CONFIG_BTRFS_FS=m modules | ||
19 | |||
20 | modules_install: | ||
21 | $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install | ||
22 | clean: | ||
23 | $(MAKE) -C $(KERNELDIR) M=`pwd` clean | ||
24 | |||
25 | endif | ||
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 7fdd184a528d..cbba000dccbe 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -60,15 +60,20 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
60 | return ERR_PTR(-EINVAL); | 60 | return ERR_PTR(-EINVAL); |
61 | } | 61 | } |
62 | 62 | ||
63 | /* Handle the cached NULL acl case without locking */ | ||
64 | acl = ACCESS_ONCE(*p_acl); | ||
65 | if (!acl) | ||
66 | return acl; | ||
67 | |||
63 | spin_lock(&inode->i_lock); | 68 | spin_lock(&inode->i_lock); |
64 | if (*p_acl != BTRFS_ACL_NOT_CACHED) | 69 | acl = *p_acl; |
65 | acl = posix_acl_dup(*p_acl); | 70 | if (acl != BTRFS_ACL_NOT_CACHED) |
71 | acl = posix_acl_dup(acl); | ||
66 | spin_unlock(&inode->i_lock); | 72 | spin_unlock(&inode->i_lock); |
67 | 73 | ||
68 | if (acl) | 74 | if (acl != BTRFS_ACL_NOT_CACHED) |
69 | return acl; | 75 | return acl; |
70 | 76 | ||
71 | |||
72 | size = __btrfs_getxattr(inode, name, "", 0); | 77 | size = __btrfs_getxattr(inode, name, "", 0); |
73 | if (size > 0) { | 78 | if (size > 0) { |
74 | value = kzalloc(size, GFP_NOFS); | 79 | value = kzalloc(size, GFP_NOFS); |
@@ -80,9 +85,12 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
80 | btrfs_update_cached_acl(inode, p_acl, acl); | 85 | btrfs_update_cached_acl(inode, p_acl, acl); |
81 | } | 86 | } |
82 | kfree(value); | 87 | kfree(value); |
83 | } else if (size == -ENOENT) { | 88 | } else if (size == -ENOENT || size == -ENODATA || size == 0) { |
89 | /* FIXME, who returns -ENOENT? I think nobody */ | ||
84 | acl = NULL; | 90 | acl = NULL; |
85 | btrfs_update_cached_acl(inode, p_acl, acl); | 91 | btrfs_update_cached_acl(inode, p_acl, acl); |
92 | } else { | ||
93 | acl = ERR_PTR(-EIO); | ||
86 | } | 94 | } |
87 | 95 | ||
88 | return acl; | 96 | return acl; |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 51bfdfc8fcda..502c3d61de62 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #define WORK_QUEUED_BIT 0 | 25 | #define WORK_QUEUED_BIT 0 |
26 | #define WORK_DONE_BIT 1 | 26 | #define WORK_DONE_BIT 1 |
27 | #define WORK_ORDER_DONE_BIT 2 | 27 | #define WORK_ORDER_DONE_BIT 2 |
28 | #define WORK_HIGH_PRIO_BIT 3 | ||
28 | 29 | ||
29 | /* | 30 | /* |
30 | * container for the kthread task pointer and the list of pending work | 31 | * container for the kthread task pointer and the list of pending work |
@@ -36,6 +37,7 @@ struct btrfs_worker_thread { | |||
36 | 37 | ||
37 | /* list of struct btrfs_work that are waiting for service */ | 38 | /* list of struct btrfs_work that are waiting for service */ |
38 | struct list_head pending; | 39 | struct list_head pending; |
40 | struct list_head prio_pending; | ||
39 | 41 | ||
40 | /* list of worker threads from struct btrfs_workers */ | 42 | /* list of worker threads from struct btrfs_workers */ |
41 | struct list_head worker_list; | 43 | struct list_head worker_list; |
@@ -103,10 +105,16 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers, | |||
103 | 105 | ||
104 | spin_lock_irqsave(&workers->lock, flags); | 106 | spin_lock_irqsave(&workers->lock, flags); |
105 | 107 | ||
106 | while (!list_empty(&workers->order_list)) { | 108 | while (1) { |
107 | work = list_entry(workers->order_list.next, | 109 | if (!list_empty(&workers->prio_order_list)) { |
108 | struct btrfs_work, order_list); | 110 | work = list_entry(workers->prio_order_list.next, |
109 | 111 | struct btrfs_work, order_list); | |
112 | } else if (!list_empty(&workers->order_list)) { | ||
113 | work = list_entry(workers->order_list.next, | ||
114 | struct btrfs_work, order_list); | ||
115 | } else { | ||
116 | break; | ||
117 | } | ||
110 | if (!test_bit(WORK_DONE_BIT, &work->flags)) | 118 | if (!test_bit(WORK_DONE_BIT, &work->flags)) |
111 | break; | 119 | break; |
112 | 120 | ||
@@ -143,8 +151,14 @@ static int worker_loop(void *arg) | |||
143 | do { | 151 | do { |
144 | spin_lock_irq(&worker->lock); | 152 | spin_lock_irq(&worker->lock); |
145 | again_locked: | 153 | again_locked: |
146 | while (!list_empty(&worker->pending)) { | 154 | while (1) { |
147 | cur = worker->pending.next; | 155 | if (!list_empty(&worker->prio_pending)) |
156 | cur = worker->prio_pending.next; | ||
157 | else if (!list_empty(&worker->pending)) | ||
158 | cur = worker->pending.next; | ||
159 | else | ||
160 | break; | ||
161 | |||
148 | work = list_entry(cur, struct btrfs_work, list); | 162 | work = list_entry(cur, struct btrfs_work, list); |
149 | list_del(&work->list); | 163 | list_del(&work->list); |
150 | clear_bit(WORK_QUEUED_BIT, &work->flags); | 164 | clear_bit(WORK_QUEUED_BIT, &work->flags); |
@@ -163,7 +177,6 @@ again_locked: | |||
163 | 177 | ||
164 | spin_lock_irq(&worker->lock); | 178 | spin_lock_irq(&worker->lock); |
165 | check_idle_worker(worker); | 179 | check_idle_worker(worker); |
166 | |||
167 | } | 180 | } |
168 | if (freezing(current)) { | 181 | if (freezing(current)) { |
169 | worker->working = 0; | 182 | worker->working = 0; |
@@ -178,7 +191,8 @@ again_locked: | |||
178 | * jump_in? | 191 | * jump_in? |
179 | */ | 192 | */ |
180 | smp_mb(); | 193 | smp_mb(); |
181 | if (!list_empty(&worker->pending)) | 194 | if (!list_empty(&worker->pending) || |
195 | !list_empty(&worker->prio_pending)) | ||
182 | continue; | 196 | continue; |
183 | 197 | ||
184 | /* | 198 | /* |
@@ -191,7 +205,8 @@ again_locked: | |||
191 | */ | 205 | */ |
192 | schedule_timeout(1); | 206 | schedule_timeout(1); |
193 | smp_mb(); | 207 | smp_mb(); |
194 | if (!list_empty(&worker->pending)) | 208 | if (!list_empty(&worker->pending) || |
209 | !list_empty(&worker->prio_pending)) | ||
195 | continue; | 210 | continue; |
196 | 211 | ||
197 | if (kthread_should_stop()) | 212 | if (kthread_should_stop()) |
@@ -200,7 +215,8 @@ again_locked: | |||
200 | /* still no more work?, sleep for real */ | 215 | /* still no more work?, sleep for real */ |
201 | spin_lock_irq(&worker->lock); | 216 | spin_lock_irq(&worker->lock); |
202 | set_current_state(TASK_INTERRUPTIBLE); | 217 | set_current_state(TASK_INTERRUPTIBLE); |
203 | if (!list_empty(&worker->pending)) | 218 | if (!list_empty(&worker->pending) || |
219 | !list_empty(&worker->prio_pending)) | ||
204 | goto again_locked; | 220 | goto again_locked; |
205 | 221 | ||
206 | /* | 222 | /* |
@@ -248,6 +264,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) | |||
248 | INIT_LIST_HEAD(&workers->worker_list); | 264 | INIT_LIST_HEAD(&workers->worker_list); |
249 | INIT_LIST_HEAD(&workers->idle_list); | 265 | INIT_LIST_HEAD(&workers->idle_list); |
250 | INIT_LIST_HEAD(&workers->order_list); | 266 | INIT_LIST_HEAD(&workers->order_list); |
267 | INIT_LIST_HEAD(&workers->prio_order_list); | ||
251 | spin_lock_init(&workers->lock); | 268 | spin_lock_init(&workers->lock); |
252 | workers->max_workers = max; | 269 | workers->max_workers = max; |
253 | workers->idle_thresh = 32; | 270 | workers->idle_thresh = 32; |
@@ -273,6 +290,7 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | |||
273 | } | 290 | } |
274 | 291 | ||
275 | INIT_LIST_HEAD(&worker->pending); | 292 | INIT_LIST_HEAD(&worker->pending); |
293 | INIT_LIST_HEAD(&worker->prio_pending); | ||
276 | INIT_LIST_HEAD(&worker->worker_list); | 294 | INIT_LIST_HEAD(&worker->worker_list); |
277 | spin_lock_init(&worker->lock); | 295 | spin_lock_init(&worker->lock); |
278 | atomic_set(&worker->num_pending, 0); | 296 | atomic_set(&worker->num_pending, 0); |
@@ -396,7 +414,10 @@ int btrfs_requeue_work(struct btrfs_work *work) | |||
396 | goto out; | 414 | goto out; |
397 | 415 | ||
398 | spin_lock_irqsave(&worker->lock, flags); | 416 | spin_lock_irqsave(&worker->lock, flags); |
399 | list_add_tail(&work->list, &worker->pending); | 417 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) |
418 | list_add_tail(&work->list, &worker->prio_pending); | ||
419 | else | ||
420 | list_add_tail(&work->list, &worker->pending); | ||
400 | atomic_inc(&worker->num_pending); | 421 | atomic_inc(&worker->num_pending); |
401 | 422 | ||
402 | /* by definition we're busy, take ourselves off the idle | 423 | /* by definition we're busy, take ourselves off the idle |
@@ -422,6 +443,11 @@ out: | |||
422 | return 0; | 443 | return 0; |
423 | } | 444 | } |
424 | 445 | ||
446 | void btrfs_set_work_high_prio(struct btrfs_work *work) | ||
447 | { | ||
448 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); | ||
449 | } | ||
450 | |||
425 | /* | 451 | /* |
426 | * places a struct btrfs_work into the pending queue of one of the kthreads | 452 | * places a struct btrfs_work into the pending queue of one of the kthreads |
427 | */ | 453 | */ |
@@ -438,7 +464,12 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
438 | worker = find_worker(workers); | 464 | worker = find_worker(workers); |
439 | if (workers->ordered) { | 465 | if (workers->ordered) { |
440 | spin_lock_irqsave(&workers->lock, flags); | 466 | spin_lock_irqsave(&workers->lock, flags); |
441 | list_add_tail(&work->order_list, &workers->order_list); | 467 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { |
468 | list_add_tail(&work->order_list, | ||
469 | &workers->prio_order_list); | ||
470 | } else { | ||
471 | list_add_tail(&work->order_list, &workers->order_list); | ||
472 | } | ||
442 | spin_unlock_irqrestore(&workers->lock, flags); | 473 | spin_unlock_irqrestore(&workers->lock, flags); |
443 | } else { | 474 | } else { |
444 | INIT_LIST_HEAD(&work->order_list); | 475 | INIT_LIST_HEAD(&work->order_list); |
@@ -446,7 +477,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
446 | 477 | ||
447 | spin_lock_irqsave(&worker->lock, flags); | 478 | spin_lock_irqsave(&worker->lock, flags); |
448 | 479 | ||
449 | list_add_tail(&work->list, &worker->pending); | 480 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) |
481 | list_add_tail(&work->list, &worker->prio_pending); | ||
482 | else | ||
483 | list_add_tail(&work->list, &worker->pending); | ||
450 | atomic_inc(&worker->num_pending); | 484 | atomic_inc(&worker->num_pending); |
451 | check_busy_worker(worker); | 485 | check_busy_worker(worker); |
452 | 486 | ||
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 31be4ed8b63e..1b511c109db6 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
@@ -85,6 +85,7 @@ struct btrfs_workers { | |||
85 | * of work items waiting for completion | 85 | * of work items waiting for completion |
86 | */ | 86 | */ |
87 | struct list_head order_list; | 87 | struct list_head order_list; |
88 | struct list_head prio_order_list; | ||
88 | 89 | ||
89 | /* lock for finding the next worker thread to queue on */ | 90 | /* lock for finding the next worker thread to queue on */ |
90 | spinlock_t lock; | 91 | spinlock_t lock; |
@@ -98,4 +99,5 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); | |||
98 | int btrfs_stop_workers(struct btrfs_workers *workers); | 99 | int btrfs_stop_workers(struct btrfs_workers *workers); |
99 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); | 100 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); |
100 | int btrfs_requeue_work(struct btrfs_work *work); | 101 | int btrfs_requeue_work(struct btrfs_work *work); |
102 | void btrfs_set_work_high_prio(struct btrfs_work *work); | ||
101 | #endif | 103 | #endif |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e5b2533b691a..a99f1c2a710d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -1325,12 +1325,12 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1325 | int ret = 0; | 1325 | int ret = 0; |
1326 | int blocksize; | 1326 | int blocksize; |
1327 | 1327 | ||
1328 | parent = path->nodes[level - 1]; | 1328 | parent = path->nodes[level + 1]; |
1329 | if (!parent) | 1329 | if (!parent) |
1330 | return 0; | 1330 | return 0; |
1331 | 1331 | ||
1332 | nritems = btrfs_header_nritems(parent); | 1332 | nritems = btrfs_header_nritems(parent); |
1333 | slot = path->slots[level]; | 1333 | slot = path->slots[level + 1]; |
1334 | blocksize = btrfs_level_size(root, level); | 1334 | blocksize = btrfs_level_size(root, level); |
1335 | 1335 | ||
1336 | if (slot > 0) { | 1336 | if (slot > 0) { |
@@ -1341,7 +1341,7 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1341 | block1 = 0; | 1341 | block1 = 0; |
1342 | free_extent_buffer(eb); | 1342 | free_extent_buffer(eb); |
1343 | } | 1343 | } |
1344 | if (slot < nritems) { | 1344 | if (slot + 1 < nritems) { |
1345 | block2 = btrfs_node_blockptr(parent, slot + 1); | 1345 | block2 = btrfs_node_blockptr(parent, slot + 1); |
1346 | gen = btrfs_node_ptr_generation(parent, slot + 1); | 1346 | gen = btrfs_node_ptr_generation(parent, slot + 1); |
1347 | eb = btrfs_find_tree_block(root, block2, blocksize); | 1347 | eb = btrfs_find_tree_block(root, block2, blocksize); |
@@ -1351,7 +1351,11 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1351 | } | 1351 | } |
1352 | if (block1 || block2) { | 1352 | if (block1 || block2) { |
1353 | ret = -EAGAIN; | 1353 | ret = -EAGAIN; |
1354 | |||
1355 | /* release the whole path */ | ||
1354 | btrfs_release_path(root, path); | 1356 | btrfs_release_path(root, path); |
1357 | |||
1358 | /* read the blocks */ | ||
1355 | if (block1) | 1359 | if (block1) |
1356 | readahead_tree_block(root, block1, blocksize, 0); | 1360 | readahead_tree_block(root, block1, blocksize, 0); |
1357 | if (block2) | 1361 | if (block2) |
@@ -1361,7 +1365,7 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1361 | eb = read_tree_block(root, block1, blocksize, 0); | 1365 | eb = read_tree_block(root, block1, blocksize, 0); |
1362 | free_extent_buffer(eb); | 1366 | free_extent_buffer(eb); |
1363 | } | 1367 | } |
1364 | if (block1) { | 1368 | if (block2) { |
1365 | eb = read_tree_block(root, block2, blocksize, 0); | 1369 | eb = read_tree_block(root, block2, blocksize, 0); |
1366 | free_extent_buffer(eb); | 1370 | free_extent_buffer(eb); |
1367 | } | 1371 | } |
@@ -1481,12 +1485,15 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1481 | * of the btree by dropping locks before | 1485 | * of the btree by dropping locks before |
1482 | * we read. | 1486 | * we read. |
1483 | */ | 1487 | */ |
1484 | btrfs_release_path(NULL, p); | 1488 | btrfs_unlock_up_safe(p, level + 1); |
1489 | btrfs_set_path_blocking(p); | ||
1490 | |||
1485 | if (tmp) | 1491 | if (tmp) |
1486 | free_extent_buffer(tmp); | 1492 | free_extent_buffer(tmp); |
1487 | if (p->reada) | 1493 | if (p->reada) |
1488 | reada_for_search(root, p, level, slot, key->objectid); | 1494 | reada_for_search(root, p, level, slot, key->objectid); |
1489 | 1495 | ||
1496 | btrfs_release_path(NULL, p); | ||
1490 | tmp = read_tree_block(root, blocknr, blocksize, gen); | 1497 | tmp = read_tree_block(root, blocknr, blocksize, gen); |
1491 | if (tmp) | 1498 | if (tmp) |
1492 | free_extent_buffer(tmp); | 1499 | free_extent_buffer(tmp); |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ad96495dedc5..4414a5d9983a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -881,6 +881,9 @@ struct btrfs_fs_info { | |||
881 | u64 metadata_alloc_profile; | 881 | u64 metadata_alloc_profile; |
882 | u64 system_alloc_profile; | 882 | u64 system_alloc_profile; |
883 | 883 | ||
884 | unsigned data_chunk_allocations; | ||
885 | unsigned metadata_ratio; | ||
886 | |||
884 | void *bdev_holder; | 887 | void *bdev_holder; |
885 | }; | 888 | }; |
886 | 889 | ||
@@ -2174,7 +2177,8 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode); | |||
2174 | extern struct file_operations btrfs_file_operations; | 2177 | extern struct file_operations btrfs_file_operations; |
2175 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 2178 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
2176 | struct btrfs_root *root, struct inode *inode, | 2179 | struct btrfs_root *root, struct inode *inode, |
2177 | u64 start, u64 end, u64 inline_limit, u64 *hint_block); | 2180 | u64 start, u64 end, u64 locked_end, |
2181 | u64 inline_limit, u64 *hint_block); | ||
2178 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | 2182 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, |
2179 | struct btrfs_root *root, | 2183 | struct btrfs_root *root, |
2180 | struct inode *inode, u64 start, u64 end); | 2184 | struct inode *inode, u64 start, u64 end); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 92caa8035f36..0ff16d3331da 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -232,10 +232,14 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
232 | memcpy(&found, result, csum_size); | 232 | memcpy(&found, result, csum_size); |
233 | 233 | ||
234 | read_extent_buffer(buf, &val, 0, csum_size); | 234 | read_extent_buffer(buf, &val, 0, csum_size); |
235 | printk(KERN_INFO "btrfs: %s checksum verify failed " | 235 | if (printk_ratelimit()) { |
236 | "on %llu wanted %X found %X level %d\n", | 236 | printk(KERN_INFO "btrfs: %s checksum verify " |
237 | root->fs_info->sb->s_id, | 237 | "failed on %llu wanted %X found %X " |
238 | buf->start, val, found, btrfs_header_level(buf)); | 238 | "level %d\n", |
239 | root->fs_info->sb->s_id, | ||
240 | (unsigned long long)buf->start, val, found, | ||
241 | btrfs_header_level(buf)); | ||
242 | } | ||
239 | if (result != (char *)&inline_result) | 243 | if (result != (char *)&inline_result) |
240 | kfree(result); | 244 | kfree(result); |
241 | return 1; | 245 | return 1; |
@@ -268,10 +272,13 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
268 | ret = 0; | 272 | ret = 0; |
269 | goto out; | 273 | goto out; |
270 | } | 274 | } |
271 | printk("parent transid verify failed on %llu wanted %llu found %llu\n", | 275 | if (printk_ratelimit()) { |
272 | (unsigned long long)eb->start, | 276 | printk("parent transid verify failed on %llu wanted %llu " |
273 | (unsigned long long)parent_transid, | 277 | "found %llu\n", |
274 | (unsigned long long)btrfs_header_generation(eb)); | 278 | (unsigned long long)eb->start, |
279 | (unsigned long long)parent_transid, | ||
280 | (unsigned long long)btrfs_header_generation(eb)); | ||
281 | } | ||
275 | ret = 1; | 282 | ret = 1; |
276 | clear_extent_buffer_uptodate(io_tree, eb); | 283 | clear_extent_buffer_uptodate(io_tree, eb); |
277 | out: | 284 | out: |
@@ -415,9 +422,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
415 | 422 | ||
416 | found_start = btrfs_header_bytenr(eb); | 423 | found_start = btrfs_header_bytenr(eb); |
417 | if (found_start != start) { | 424 | if (found_start != start) { |
418 | printk(KERN_INFO "btrfs bad tree block start %llu %llu\n", | 425 | if (printk_ratelimit()) { |
419 | (unsigned long long)found_start, | 426 | printk(KERN_INFO "btrfs bad tree block start " |
420 | (unsigned long long)eb->start); | 427 | "%llu %llu\n", |
428 | (unsigned long long)found_start, | ||
429 | (unsigned long long)eb->start); | ||
430 | } | ||
421 | ret = -EIO; | 431 | ret = -EIO; |
422 | goto err; | 432 | goto err; |
423 | } | 433 | } |
@@ -429,8 +439,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
429 | goto err; | 439 | goto err; |
430 | } | 440 | } |
431 | if (check_tree_block_fsid(root, eb)) { | 441 | if (check_tree_block_fsid(root, eb)) { |
432 | printk(KERN_INFO "btrfs bad fsid on block %llu\n", | 442 | if (printk_ratelimit()) { |
433 | (unsigned long long)eb->start); | 443 | printk(KERN_INFO "btrfs bad fsid on block %llu\n", |
444 | (unsigned long long)eb->start); | ||
445 | } | ||
434 | ret = -EIO; | 446 | ret = -EIO; |
435 | goto err; | 447 | goto err; |
436 | } | 448 | } |
@@ -579,19 +591,12 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
579 | async->bio_flags = bio_flags; | 591 | async->bio_flags = bio_flags; |
580 | 592 | ||
581 | atomic_inc(&fs_info->nr_async_submits); | 593 | atomic_inc(&fs_info->nr_async_submits); |
594 | |||
595 | if (rw & (1 << BIO_RW_SYNCIO)) | ||
596 | btrfs_set_work_high_prio(&async->work); | ||
597 | |||
582 | btrfs_queue_worker(&fs_info->workers, &async->work); | 598 | btrfs_queue_worker(&fs_info->workers, &async->work); |
583 | #if 0 | ||
584 | int limit = btrfs_async_submit_limit(fs_info); | ||
585 | if (atomic_read(&fs_info->nr_async_submits) > limit) { | ||
586 | wait_event_timeout(fs_info->async_submit_wait, | ||
587 | (atomic_read(&fs_info->nr_async_submits) < limit), | ||
588 | HZ/10); | ||
589 | 599 | ||
590 | wait_event_timeout(fs_info->async_submit_wait, | ||
591 | (atomic_read(&fs_info->nr_async_bios) < limit), | ||
592 | HZ/10); | ||
593 | } | ||
594 | #endif | ||
595 | while (atomic_read(&fs_info->async_submit_draining) && | 600 | while (atomic_read(&fs_info->async_submit_draining) && |
596 | atomic_read(&fs_info->nr_async_submits)) { | 601 | atomic_read(&fs_info->nr_async_submits)) { |
597 | wait_event(fs_info->async_submit_wait, | 602 | wait_event(fs_info->async_submit_wait, |
@@ -656,6 +661,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
656 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 661 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
657 | mirror_num, 0); | 662 | mirror_num, 0); |
658 | } | 663 | } |
664 | |||
659 | /* | 665 | /* |
660 | * kthread helpers are used to submit writes so that checksumming | 666 | * kthread helpers are used to submit writes so that checksumming |
661 | * can happen in parallel across all CPUs | 667 | * can happen in parallel across all CPUs |
@@ -765,27 +771,6 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) | |||
765 | } | 771 | } |
766 | } | 772 | } |
767 | 773 | ||
768 | #if 0 | ||
769 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | ||
770 | { | ||
771 | struct buffer_head *bh; | ||
772 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
773 | struct buffer_head *head; | ||
774 | if (!page_has_buffers(page)) { | ||
775 | create_empty_buffers(page, root->fs_info->sb->s_blocksize, | ||
776 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | ||
777 | } | ||
778 | head = page_buffers(page); | ||
779 | bh = head; | ||
780 | do { | ||
781 | if (buffer_dirty(bh)) | ||
782 | csum_tree_block(root, bh, 0); | ||
783 | bh = bh->b_this_page; | ||
784 | } while (bh != head); | ||
785 | return block_write_full_page(page, btree_get_block, wbc); | ||
786 | } | ||
787 | #endif | ||
788 | |||
789 | static struct address_space_operations btree_aops = { | 774 | static struct address_space_operations btree_aops = { |
790 | .readpage = btree_readpage, | 775 | .readpage = btree_readpage, |
791 | .writepage = btree_writepage, | 776 | .writepage = btree_writepage, |
@@ -1273,11 +1258,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) | |||
1273 | int ret = 0; | 1258 | int ret = 0; |
1274 | struct btrfs_device *device; | 1259 | struct btrfs_device *device; |
1275 | struct backing_dev_info *bdi; | 1260 | struct backing_dev_info *bdi; |
1276 | #if 0 | 1261 | |
1277 | if ((bdi_bits & (1 << BDI_write_congested)) && | ||
1278 | btrfs_congested_async(info, 0)) | ||
1279 | return 1; | ||
1280 | #endif | ||
1281 | list_for_each_entry(device, &info->fs_devices->devices, dev_list) { | 1262 | list_for_each_entry(device, &info->fs_devices->devices, dev_list) { |
1282 | if (!device->bdev) | 1263 | if (!device->bdev) |
1283 | continue; | 1264 | continue; |
@@ -1599,6 +1580,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1599 | fs_info->btree_inode = new_inode(sb); | 1580 | fs_info->btree_inode = new_inode(sb); |
1600 | fs_info->btree_inode->i_ino = 1; | 1581 | fs_info->btree_inode->i_ino = 1; |
1601 | fs_info->btree_inode->i_nlink = 1; | 1582 | fs_info->btree_inode->i_nlink = 1; |
1583 | fs_info->metadata_ratio = 8; | ||
1602 | 1584 | ||
1603 | fs_info->thread_pool_size = min_t(unsigned long, | 1585 | fs_info->thread_pool_size = min_t(unsigned long, |
1604 | num_online_cpus() + 2, 8); | 1586 | num_online_cpus() + 2, 8); |
@@ -1689,7 +1671,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1689 | if (features) { | 1671 | if (features) { |
1690 | printk(KERN_ERR "BTRFS: couldn't mount because of " | 1672 | printk(KERN_ERR "BTRFS: couldn't mount because of " |
1691 | "unsupported optional features (%Lx).\n", | 1673 | "unsupported optional features (%Lx).\n", |
1692 | features); | 1674 | (unsigned long long)features); |
1693 | err = -EINVAL; | 1675 | err = -EINVAL; |
1694 | goto fail_iput; | 1676 | goto fail_iput; |
1695 | } | 1677 | } |
@@ -1699,7 +1681,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1699 | if (!(sb->s_flags & MS_RDONLY) && features) { | 1681 | if (!(sb->s_flags & MS_RDONLY) && features) { |
1700 | printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " | 1682 | printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " |
1701 | "unsupported option features (%Lx).\n", | 1683 | "unsupported option features (%Lx).\n", |
1702 | features); | 1684 | (unsigned long long)features); |
1703 | err = -EINVAL; | 1685 | err = -EINVAL; |
1704 | goto fail_iput; | 1686 | goto fail_iput; |
1705 | } | 1687 | } |
@@ -2095,10 +2077,10 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2095 | device->barriers = 0; | 2077 | device->barriers = 0; |
2096 | get_bh(bh); | 2078 | get_bh(bh); |
2097 | lock_buffer(bh); | 2079 | lock_buffer(bh); |
2098 | ret = submit_bh(WRITE, bh); | 2080 | ret = submit_bh(WRITE_SYNC, bh); |
2099 | } | 2081 | } |
2100 | } else { | 2082 | } else { |
2101 | ret = submit_bh(WRITE, bh); | 2083 | ret = submit_bh(WRITE_SYNC, bh); |
2102 | } | 2084 | } |
2103 | 2085 | ||
2104 | if (!ret && wait) { | 2086 | if (!ret && wait) { |
@@ -2291,7 +2273,7 @@ int close_ctree(struct btrfs_root *root) | |||
2291 | 2273 | ||
2292 | if (fs_info->delalloc_bytes) { | 2274 | if (fs_info->delalloc_bytes) { |
2293 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", | 2275 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", |
2294 | fs_info->delalloc_bytes); | 2276 | (unsigned long long)fs_info->delalloc_bytes); |
2295 | } | 2277 | } |
2296 | if (fs_info->total_ref_cache_size) { | 2278 | if (fs_info->total_ref_cache_size) { |
2297 | printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", | 2279 | printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", |
@@ -2328,16 +2310,6 @@ int close_ctree(struct btrfs_root *root) | |||
2328 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2310 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2329 | btrfs_stop_workers(&fs_info->submit_workers); | 2311 | btrfs_stop_workers(&fs_info->submit_workers); |
2330 | 2312 | ||
2331 | #if 0 | ||
2332 | while (!list_empty(&fs_info->hashers)) { | ||
2333 | struct btrfs_hasher *hasher; | ||
2334 | hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher, | ||
2335 | hashers); | ||
2336 | list_del(&hasher->hashers); | ||
2337 | crypto_free_hash(&fs_info->hash_tfm); | ||
2338 | kfree(hasher); | ||
2339 | } | ||
2340 | #endif | ||
2341 | btrfs_close_devices(fs_info->fs_devices); | 2313 | btrfs_close_devices(fs_info->fs_devices); |
2342 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2314 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
2343 | 2315 | ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 178df4c67de4..e4966444811b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -1844,10 +1844,14 @@ again: | |||
1844 | printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" | 1844 | printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" |
1845 | ", %llu bytes_used, %llu bytes_reserved, " | 1845 | ", %llu bytes_used, %llu bytes_reserved, " |
1846 | "%llu bytes_pinned, %llu bytes_readonly, %llu may use" | 1846 | "%llu bytes_pinned, %llu bytes_readonly, %llu may use" |
1847 | "%llu total\n", bytes, data_sinfo->bytes_delalloc, | 1847 | "%llu total\n", (unsigned long long)bytes, |
1848 | data_sinfo->bytes_used, data_sinfo->bytes_reserved, | 1848 | (unsigned long long)data_sinfo->bytes_delalloc, |
1849 | data_sinfo->bytes_pinned, data_sinfo->bytes_readonly, | 1849 | (unsigned long long)data_sinfo->bytes_used, |
1850 | data_sinfo->bytes_may_use, data_sinfo->total_bytes); | 1850 | (unsigned long long)data_sinfo->bytes_reserved, |
1851 | (unsigned long long)data_sinfo->bytes_pinned, | ||
1852 | (unsigned long long)data_sinfo->bytes_readonly, | ||
1853 | (unsigned long long)data_sinfo->bytes_may_use, | ||
1854 | (unsigned long long)data_sinfo->total_bytes); | ||
1851 | return -ENOSPC; | 1855 | return -ENOSPC; |
1852 | } | 1856 | } |
1853 | data_sinfo->bytes_may_use += bytes; | 1857 | data_sinfo->bytes_may_use += bytes; |
@@ -1918,15 +1922,29 @@ void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | |||
1918 | spin_unlock(&info->lock); | 1922 | spin_unlock(&info->lock); |
1919 | } | 1923 | } |
1920 | 1924 | ||
1925 | static void force_metadata_allocation(struct btrfs_fs_info *info) | ||
1926 | { | ||
1927 | struct list_head *head = &info->space_info; | ||
1928 | struct btrfs_space_info *found; | ||
1929 | |||
1930 | rcu_read_lock(); | ||
1931 | list_for_each_entry_rcu(found, head, list) { | ||
1932 | if (found->flags & BTRFS_BLOCK_GROUP_METADATA) | ||
1933 | found->force_alloc = 1; | ||
1934 | } | ||
1935 | rcu_read_unlock(); | ||
1936 | } | ||
1937 | |||
1921 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 1938 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
1922 | struct btrfs_root *extent_root, u64 alloc_bytes, | 1939 | struct btrfs_root *extent_root, u64 alloc_bytes, |
1923 | u64 flags, int force) | 1940 | u64 flags, int force) |
1924 | { | 1941 | { |
1925 | struct btrfs_space_info *space_info; | 1942 | struct btrfs_space_info *space_info; |
1943 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | ||
1926 | u64 thresh; | 1944 | u64 thresh; |
1927 | int ret = 0; | 1945 | int ret = 0; |
1928 | 1946 | ||
1929 | mutex_lock(&extent_root->fs_info->chunk_mutex); | 1947 | mutex_lock(&fs_info->chunk_mutex); |
1930 | 1948 | ||
1931 | flags = btrfs_reduce_alloc_profile(extent_root, flags); | 1949 | flags = btrfs_reduce_alloc_profile(extent_root, flags); |
1932 | 1950 | ||
@@ -1958,6 +1976,18 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
1958 | } | 1976 | } |
1959 | spin_unlock(&space_info->lock); | 1977 | spin_unlock(&space_info->lock); |
1960 | 1978 | ||
1979 | /* | ||
1980 | * if we're doing a data chunk, go ahead and make sure that | ||
1981 | * we keep a reasonable number of metadata chunks allocated in the | ||
1982 | * FS as well. | ||
1983 | */ | ||
1984 | if (flags & BTRFS_BLOCK_GROUP_DATA) { | ||
1985 | fs_info->data_chunk_allocations++; | ||
1986 | if (!(fs_info->data_chunk_allocations % | ||
1987 | fs_info->metadata_ratio)) | ||
1988 | force_metadata_allocation(fs_info); | ||
1989 | } | ||
1990 | |||
1961 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 1991 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
1962 | if (ret) | 1992 | if (ret) |
1963 | space_info->full = 1; | 1993 | space_info->full = 1; |
@@ -2798,9 +2828,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | |||
2798 | info->bytes_pinned - info->bytes_reserved), | 2828 | info->bytes_pinned - info->bytes_reserved), |
2799 | (info->full) ? "" : "not "); | 2829 | (info->full) ? "" : "not "); |
2800 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 2830 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," |
2801 | " may_use=%llu, used=%llu\n", info->total_bytes, | 2831 | " may_use=%llu, used=%llu\n", |
2802 | info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use, | 2832 | (unsigned long long)info->total_bytes, |
2803 | info->bytes_used); | 2833 | (unsigned long long)info->bytes_pinned, |
2834 | (unsigned long long)info->bytes_delalloc, | ||
2835 | (unsigned long long)info->bytes_may_use, | ||
2836 | (unsigned long long)info->bytes_used); | ||
2804 | 2837 | ||
2805 | down_read(&info->groups_sem); | 2838 | down_read(&info->groups_sem); |
2806 | list_for_each_entry(cache, &info->block_groups, list) { | 2839 | list_for_each_entry(cache, &info->block_groups, list) { |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index eb2bee8b7fbf..fe9eb990e443 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -17,12 +17,6 @@ | |||
17 | #include "ctree.h" | 17 | #include "ctree.h" |
18 | #include "btrfs_inode.h" | 18 | #include "btrfs_inode.h" |
19 | 19 | ||
20 | /* temporary define until extent_map moves out of btrfs */ | ||
21 | struct kmem_cache *btrfs_cache_create(const char *name, size_t size, | ||
22 | unsigned long extra_flags, | ||
23 | void (*ctor)(void *, struct kmem_cache *, | ||
24 | unsigned long)); | ||
25 | |||
26 | static struct kmem_cache *extent_state_cache; | 20 | static struct kmem_cache *extent_state_cache; |
27 | static struct kmem_cache *extent_buffer_cache; | 21 | static struct kmem_cache *extent_buffer_cache; |
28 | 22 | ||
@@ -50,20 +44,23 @@ struct extent_page_data { | |||
50 | /* tells writepage not to lock the state bits for this range | 44 | /* tells writepage not to lock the state bits for this range |
51 | * it still does the unlocking | 45 | * it still does the unlocking |
52 | */ | 46 | */ |
53 | int extent_locked; | 47 | unsigned int extent_locked:1; |
48 | |||
49 | /* tells the submit_bio code to use a WRITE_SYNC */ | ||
50 | unsigned int sync_io:1; | ||
54 | }; | 51 | }; |
55 | 52 | ||
56 | int __init extent_io_init(void) | 53 | int __init extent_io_init(void) |
57 | { | 54 | { |
58 | extent_state_cache = btrfs_cache_create("extent_state", | 55 | extent_state_cache = kmem_cache_create("extent_state", |
59 | sizeof(struct extent_state), 0, | 56 | sizeof(struct extent_state), 0, |
60 | NULL); | 57 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
61 | if (!extent_state_cache) | 58 | if (!extent_state_cache) |
62 | return -ENOMEM; | 59 | return -ENOMEM; |
63 | 60 | ||
64 | extent_buffer_cache = btrfs_cache_create("extent_buffers", | 61 | extent_buffer_cache = kmem_cache_create("extent_buffers", |
65 | sizeof(struct extent_buffer), 0, | 62 | sizeof(struct extent_buffer), 0, |
66 | NULL); | 63 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
67 | if (!extent_buffer_cache) | 64 | if (!extent_buffer_cache) |
68 | goto free_state_cache; | 65 | goto free_state_cache; |
69 | return 0; | 66 | return 0; |
@@ -1404,69 +1401,6 @@ out: | |||
1404 | return total_bytes; | 1401 | return total_bytes; |
1405 | } | 1402 | } |
1406 | 1403 | ||
1407 | #if 0 | ||
1408 | /* | ||
1409 | * helper function to lock both pages and extents in the tree. | ||
1410 | * pages must be locked first. | ||
1411 | */ | ||
1412 | static int lock_range(struct extent_io_tree *tree, u64 start, u64 end) | ||
1413 | { | ||
1414 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
1415 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
1416 | struct page *page; | ||
1417 | int err; | ||
1418 | |||
1419 | while (index <= end_index) { | ||
1420 | page = grab_cache_page(tree->mapping, index); | ||
1421 | if (!page) { | ||
1422 | err = -ENOMEM; | ||
1423 | goto failed; | ||
1424 | } | ||
1425 | if (IS_ERR(page)) { | ||
1426 | err = PTR_ERR(page); | ||
1427 | goto failed; | ||
1428 | } | ||
1429 | index++; | ||
1430 | } | ||
1431 | lock_extent(tree, start, end, GFP_NOFS); | ||
1432 | return 0; | ||
1433 | |||
1434 | failed: | ||
1435 | /* | ||
1436 | * we failed above in getting the page at 'index', so we undo here | ||
1437 | * up to but not including the page at 'index' | ||
1438 | */ | ||
1439 | end_index = index; | ||
1440 | index = start >> PAGE_CACHE_SHIFT; | ||
1441 | while (index < end_index) { | ||
1442 | page = find_get_page(tree->mapping, index); | ||
1443 | unlock_page(page); | ||
1444 | page_cache_release(page); | ||
1445 | index++; | ||
1446 | } | ||
1447 | return err; | ||
1448 | } | ||
1449 | |||
1450 | /* | ||
1451 | * helper function to unlock both pages and extents in the tree. | ||
1452 | */ | ||
1453 | static int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) | ||
1454 | { | ||
1455 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
1456 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
1457 | struct page *page; | ||
1458 | |||
1459 | while (index <= end_index) { | ||
1460 | page = find_get_page(tree->mapping, index); | ||
1461 | unlock_page(page); | ||
1462 | page_cache_release(page); | ||
1463 | index++; | ||
1464 | } | ||
1465 | unlock_extent(tree, start, end, GFP_NOFS); | ||
1466 | return 0; | ||
1467 | } | ||
1468 | #endif | ||
1469 | |||
1470 | /* | 1404 | /* |
1471 | * set the private field for a given byte offset in the tree. If there isn't | 1405 | * set the private field for a given byte offset in the tree. If there isn't |
1472 | * an extent_state there already, this does nothing. | 1406 | * an extent_state there already, this does nothing. |
@@ -2101,6 +2035,16 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | |||
2101 | return ret; | 2035 | return ret; |
2102 | } | 2036 | } |
2103 | 2037 | ||
2038 | static noinline void update_nr_written(struct page *page, | ||
2039 | struct writeback_control *wbc, | ||
2040 | unsigned long nr_written) | ||
2041 | { | ||
2042 | wbc->nr_to_write -= nr_written; | ||
2043 | if (wbc->range_cyclic || (wbc->nr_to_write > 0 && | ||
2044 | wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) | ||
2045 | page->mapping->writeback_index = page->index + nr_written; | ||
2046 | } | ||
2047 | |||
2104 | /* | 2048 | /* |
2105 | * the writepage semantics are similar to regular writepage. extent | 2049 | * the writepage semantics are similar to regular writepage. extent |
2106 | * records are inserted to lock ranges in the tree, and as dirty areas | 2050 | * records are inserted to lock ranges in the tree, and as dirty areas |
@@ -2136,8 +2080,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2136 | u64 delalloc_end; | 2080 | u64 delalloc_end; |
2137 | int page_started; | 2081 | int page_started; |
2138 | int compressed; | 2082 | int compressed; |
2083 | int write_flags; | ||
2139 | unsigned long nr_written = 0; | 2084 | unsigned long nr_written = 0; |
2140 | 2085 | ||
2086 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2087 | write_flags = WRITE_SYNC_PLUG; | ||
2088 | else | ||
2089 | write_flags = WRITE; | ||
2090 | |||
2141 | WARN_ON(!PageLocked(page)); | 2091 | WARN_ON(!PageLocked(page)); |
2142 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2092 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
2143 | if (page->index > end_index || | 2093 | if (page->index > end_index || |
@@ -2164,6 +2114,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2164 | delalloc_end = 0; | 2114 | delalloc_end = 0; |
2165 | page_started = 0; | 2115 | page_started = 0; |
2166 | if (!epd->extent_locked) { | 2116 | if (!epd->extent_locked) { |
2117 | /* | ||
2118 | * make sure the wbc mapping index is at least updated | ||
2119 | * to this page. | ||
2120 | */ | ||
2121 | update_nr_written(page, wbc, 0); | ||
2122 | |||
2167 | while (delalloc_end < page_end) { | 2123 | while (delalloc_end < page_end) { |
2168 | nr_delalloc = find_lock_delalloc_range(inode, tree, | 2124 | nr_delalloc = find_lock_delalloc_range(inode, tree, |
2169 | page, | 2125 | page, |
@@ -2185,7 +2141,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2185 | */ | 2141 | */ |
2186 | if (page_started) { | 2142 | if (page_started) { |
2187 | ret = 0; | 2143 | ret = 0; |
2188 | goto update_nr_written; | 2144 | /* |
2145 | * we've unlocked the page, so we can't update | ||
2146 | * the mapping's writeback index, just update | ||
2147 | * nr_to_write. | ||
2148 | */ | ||
2149 | wbc->nr_to_write -= nr_written; | ||
2150 | goto done_unlocked; | ||
2189 | } | 2151 | } |
2190 | } | 2152 | } |
2191 | lock_extent(tree, start, page_end, GFP_NOFS); | 2153 | lock_extent(tree, start, page_end, GFP_NOFS); |
@@ -2198,13 +2160,18 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2198 | if (ret == -EAGAIN) { | 2160 | if (ret == -EAGAIN) { |
2199 | unlock_extent(tree, start, page_end, GFP_NOFS); | 2161 | unlock_extent(tree, start, page_end, GFP_NOFS); |
2200 | redirty_page_for_writepage(wbc, page); | 2162 | redirty_page_for_writepage(wbc, page); |
2163 | update_nr_written(page, wbc, nr_written); | ||
2201 | unlock_page(page); | 2164 | unlock_page(page); |
2202 | ret = 0; | 2165 | ret = 0; |
2203 | goto update_nr_written; | 2166 | goto done_unlocked; |
2204 | } | 2167 | } |
2205 | } | 2168 | } |
2206 | 2169 | ||
2207 | nr_written++; | 2170 | /* |
2171 | * we don't want to touch the inode after unlocking the page, | ||
2172 | * so we update the mapping writeback index now | ||
2173 | */ | ||
2174 | update_nr_written(page, wbc, nr_written + 1); | ||
2208 | 2175 | ||
2209 | end = page_end; | 2176 | end = page_end; |
2210 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) | 2177 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) |
@@ -2314,9 +2281,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2314 | (unsigned long long)end); | 2281 | (unsigned long long)end); |
2315 | } | 2282 | } |
2316 | 2283 | ||
2317 | ret = submit_extent_page(WRITE, tree, page, sector, | 2284 | ret = submit_extent_page(write_flags, tree, page, |
2318 | iosize, pg_offset, bdev, | 2285 | sector, iosize, pg_offset, |
2319 | &epd->bio, max_nr, | 2286 | bdev, &epd->bio, max_nr, |
2320 | end_bio_extent_writepage, | 2287 | end_bio_extent_writepage, |
2321 | 0, 0, 0); | 2288 | 0, 0, 0); |
2322 | if (ret) | 2289 | if (ret) |
@@ -2336,11 +2303,8 @@ done: | |||
2336 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | 2303 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); |
2337 | unlock_page(page); | 2304 | unlock_page(page); |
2338 | 2305 | ||
2339 | update_nr_written: | 2306 | done_unlocked: |
2340 | wbc->nr_to_write -= nr_written; | 2307 | |
2341 | if (wbc->range_cyclic || (wbc->nr_to_write > 0 && | ||
2342 | wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) | ||
2343 | page->mapping->writeback_index = page->index + nr_written; | ||
2344 | return 0; | 2308 | return 0; |
2345 | } | 2309 | } |
2346 | 2310 | ||
@@ -2460,15 +2424,23 @@ retry: | |||
2460 | return ret; | 2424 | return ret; |
2461 | } | 2425 | } |
2462 | 2426 | ||
2463 | static noinline void flush_write_bio(void *data) | 2427 | static void flush_epd_write_bio(struct extent_page_data *epd) |
2464 | { | 2428 | { |
2465 | struct extent_page_data *epd = data; | ||
2466 | if (epd->bio) { | 2429 | if (epd->bio) { |
2467 | submit_one_bio(WRITE, epd->bio, 0, 0); | 2430 | if (epd->sync_io) |
2431 | submit_one_bio(WRITE_SYNC, epd->bio, 0, 0); | ||
2432 | else | ||
2433 | submit_one_bio(WRITE, epd->bio, 0, 0); | ||
2468 | epd->bio = NULL; | 2434 | epd->bio = NULL; |
2469 | } | 2435 | } |
2470 | } | 2436 | } |
2471 | 2437 | ||
2438 | static noinline void flush_write_bio(void *data) | ||
2439 | { | ||
2440 | struct extent_page_data *epd = data; | ||
2441 | flush_epd_write_bio(epd); | ||
2442 | } | ||
2443 | |||
2472 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | 2444 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, |
2473 | get_extent_t *get_extent, | 2445 | get_extent_t *get_extent, |
2474 | struct writeback_control *wbc) | 2446 | struct writeback_control *wbc) |
@@ -2480,23 +2452,22 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2480 | .tree = tree, | 2452 | .tree = tree, |
2481 | .get_extent = get_extent, | 2453 | .get_extent = get_extent, |
2482 | .extent_locked = 0, | 2454 | .extent_locked = 0, |
2455 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | ||
2483 | }; | 2456 | }; |
2484 | struct writeback_control wbc_writepages = { | 2457 | struct writeback_control wbc_writepages = { |
2485 | .bdi = wbc->bdi, | 2458 | .bdi = wbc->bdi, |
2486 | .sync_mode = WB_SYNC_NONE, | 2459 | .sync_mode = wbc->sync_mode, |
2487 | .older_than_this = NULL, | 2460 | .older_than_this = NULL, |
2488 | .nr_to_write = 64, | 2461 | .nr_to_write = 64, |
2489 | .range_start = page_offset(page) + PAGE_CACHE_SIZE, | 2462 | .range_start = page_offset(page) + PAGE_CACHE_SIZE, |
2490 | .range_end = (loff_t)-1, | 2463 | .range_end = (loff_t)-1, |
2491 | }; | 2464 | }; |
2492 | 2465 | ||
2493 | |||
2494 | ret = __extent_writepage(page, wbc, &epd); | 2466 | ret = __extent_writepage(page, wbc, &epd); |
2495 | 2467 | ||
2496 | extent_write_cache_pages(tree, mapping, &wbc_writepages, | 2468 | extent_write_cache_pages(tree, mapping, &wbc_writepages, |
2497 | __extent_writepage, &epd, flush_write_bio); | 2469 | __extent_writepage, &epd, flush_write_bio); |
2498 | if (epd.bio) | 2470 | flush_epd_write_bio(&epd); |
2499 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2500 | return ret; | 2471 | return ret; |
2501 | } | 2472 | } |
2502 | 2473 | ||
@@ -2515,6 +2486,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2515 | .tree = tree, | 2486 | .tree = tree, |
2516 | .get_extent = get_extent, | 2487 | .get_extent = get_extent, |
2517 | .extent_locked = 1, | 2488 | .extent_locked = 1, |
2489 | .sync_io = mode == WB_SYNC_ALL, | ||
2518 | }; | 2490 | }; |
2519 | struct writeback_control wbc_writepages = { | 2491 | struct writeback_control wbc_writepages = { |
2520 | .bdi = inode->i_mapping->backing_dev_info, | 2492 | .bdi = inode->i_mapping->backing_dev_info, |
@@ -2540,8 +2512,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2540 | start += PAGE_CACHE_SIZE; | 2512 | start += PAGE_CACHE_SIZE; |
2541 | } | 2513 | } |
2542 | 2514 | ||
2543 | if (epd.bio) | 2515 | flush_epd_write_bio(&epd); |
2544 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2545 | return ret; | 2516 | return ret; |
2546 | } | 2517 | } |
2547 | 2518 | ||
@@ -2556,13 +2527,13 @@ int extent_writepages(struct extent_io_tree *tree, | |||
2556 | .tree = tree, | 2527 | .tree = tree, |
2557 | .get_extent = get_extent, | 2528 | .get_extent = get_extent, |
2558 | .extent_locked = 0, | 2529 | .extent_locked = 0, |
2530 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | ||
2559 | }; | 2531 | }; |
2560 | 2532 | ||
2561 | ret = extent_write_cache_pages(tree, mapping, wbc, | 2533 | ret = extent_write_cache_pages(tree, mapping, wbc, |
2562 | __extent_writepage, &epd, | 2534 | __extent_writepage, &epd, |
2563 | flush_write_bio); | 2535 | flush_write_bio); |
2564 | if (epd.bio) | 2536 | flush_epd_write_bio(&epd); |
2565 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2566 | return ret; | 2537 | return ret; |
2567 | } | 2538 | } |
2568 | 2539 | ||
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index b187917b36fa..30c9365861e6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -6,19 +6,14 @@ | |||
6 | #include <linux/hardirq.h> | 6 | #include <linux/hardirq.h> |
7 | #include "extent_map.h" | 7 | #include "extent_map.h" |
8 | 8 | ||
9 | /* temporary define until extent_map moves out of btrfs */ | ||
10 | struct kmem_cache *btrfs_cache_create(const char *name, size_t size, | ||
11 | unsigned long extra_flags, | ||
12 | void (*ctor)(void *, struct kmem_cache *, | ||
13 | unsigned long)); | ||
14 | 9 | ||
15 | static struct kmem_cache *extent_map_cache; | 10 | static struct kmem_cache *extent_map_cache; |
16 | 11 | ||
17 | int __init extent_map_init(void) | 12 | int __init extent_map_init(void) |
18 | { | 13 | { |
19 | extent_map_cache = btrfs_cache_create("extent_map", | 14 | extent_map_cache = kmem_cache_create("extent_map", |
20 | sizeof(struct extent_map), 0, | 15 | sizeof(struct extent_map), 0, |
21 | NULL); | 16 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
22 | if (!extent_map_cache) | 17 | if (!extent_map_cache) |
23 | return -ENOMEM; | 18 | return -ENOMEM; |
24 | return 0; | 19 | return 0; |
@@ -43,7 +38,6 @@ void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) | |||
43 | tree->map.rb_node = NULL; | 38 | tree->map.rb_node = NULL; |
44 | spin_lock_init(&tree->lock); | 39 | spin_lock_init(&tree->lock); |
45 | } | 40 | } |
46 | EXPORT_SYMBOL(extent_map_tree_init); | ||
47 | 41 | ||
48 | /** | 42 | /** |
49 | * alloc_extent_map - allocate new extent map structure | 43 | * alloc_extent_map - allocate new extent map structure |
@@ -64,7 +58,6 @@ struct extent_map *alloc_extent_map(gfp_t mask) | |||
64 | atomic_set(&em->refs, 1); | 58 | atomic_set(&em->refs, 1); |
65 | return em; | 59 | return em; |
66 | } | 60 | } |
67 | EXPORT_SYMBOL(alloc_extent_map); | ||
68 | 61 | ||
69 | /** | 62 | /** |
70 | * free_extent_map - drop reference count of an extent_map | 63 | * free_extent_map - drop reference count of an extent_map |
@@ -83,7 +76,6 @@ void free_extent_map(struct extent_map *em) | |||
83 | kmem_cache_free(extent_map_cache, em); | 76 | kmem_cache_free(extent_map_cache, em); |
84 | } | 77 | } |
85 | } | 78 | } |
86 | EXPORT_SYMBOL(free_extent_map); | ||
87 | 79 | ||
88 | static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | 80 | static struct rb_node *tree_insert(struct rb_root *root, u64 offset, |
89 | struct rb_node *node) | 81 | struct rb_node *node) |
@@ -264,7 +256,6 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
264 | out: | 256 | out: |
265 | return ret; | 257 | return ret; |
266 | } | 258 | } |
267 | EXPORT_SYMBOL(add_extent_mapping); | ||
268 | 259 | ||
269 | /* simple helper to do math around the end of an extent, handling wrap */ | 260 | /* simple helper to do math around the end of an extent, handling wrap */ |
270 | static u64 range_end(u64 start, u64 len) | 261 | static u64 range_end(u64 start, u64 len) |
@@ -326,7 +317,6 @@ found: | |||
326 | out: | 317 | out: |
327 | return em; | 318 | return em; |
328 | } | 319 | } |
329 | EXPORT_SYMBOL(lookup_extent_mapping); | ||
330 | 320 | ||
331 | /** | 321 | /** |
332 | * remove_extent_mapping - removes an extent_map from the extent tree | 322 | * remove_extent_mapping - removes an extent_map from the extent tree |
@@ -346,4 +336,3 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | |||
346 | em->in_tree = 0; | 336 | em->in_tree = 0; |
347 | return ret; | 337 | return ret; |
348 | } | 338 | } |
349 | EXPORT_SYMBOL(remove_extent_mapping); | ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9c9fb46ccd08..1d51dc38bb49 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -272,83 +272,6 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
272 | return 0; | 272 | return 0; |
273 | } | 273 | } |
274 | 274 | ||
275 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode) | ||
276 | { | ||
277 | return 0; | ||
278 | #if 0 | ||
279 | struct btrfs_path *path; | ||
280 | struct btrfs_key found_key; | ||
281 | struct extent_buffer *leaf; | ||
282 | struct btrfs_file_extent_item *extent; | ||
283 | u64 last_offset = 0; | ||
284 | int nritems; | ||
285 | int slot; | ||
286 | int found_type; | ||
287 | int ret; | ||
288 | int err = 0; | ||
289 | u64 extent_end = 0; | ||
290 | |||
291 | path = btrfs_alloc_path(); | ||
292 | ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino, | ||
293 | last_offset, 0); | ||
294 | while (1) { | ||
295 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
296 | if (path->slots[0] >= nritems) { | ||
297 | ret = btrfs_next_leaf(root, path); | ||
298 | if (ret) | ||
299 | goto out; | ||
300 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
301 | } | ||
302 | slot = path->slots[0]; | ||
303 | leaf = path->nodes[0]; | ||
304 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
305 | if (found_key.objectid != inode->i_ino) | ||
306 | break; | ||
307 | if (found_key.type != BTRFS_EXTENT_DATA_KEY) | ||
308 | goto out; | ||
309 | |||
310 | if (found_key.offset < last_offset) { | ||
311 | WARN_ON(1); | ||
312 | btrfs_print_leaf(root, leaf); | ||
313 | printk(KERN_ERR "inode %lu found offset %llu " | ||
314 | "expected %llu\n", inode->i_ino, | ||
315 | (unsigned long long)found_key.offset, | ||
316 | (unsigned long long)last_offset); | ||
317 | err = 1; | ||
318 | goto out; | ||
319 | } | ||
320 | extent = btrfs_item_ptr(leaf, slot, | ||
321 | struct btrfs_file_extent_item); | ||
322 | found_type = btrfs_file_extent_type(leaf, extent); | ||
323 | if (found_type == BTRFS_FILE_EXTENT_REG) { | ||
324 | extent_end = found_key.offset + | ||
325 | btrfs_file_extent_num_bytes(leaf, extent); | ||
326 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | ||
327 | struct btrfs_item *item; | ||
328 | item = btrfs_item_nr(leaf, slot); | ||
329 | extent_end = found_key.offset + | ||
330 | btrfs_file_extent_inline_len(leaf, extent); | ||
331 | extent_end = (extent_end + root->sectorsize - 1) & | ||
332 | ~((u64)root->sectorsize - 1); | ||
333 | } | ||
334 | last_offset = extent_end; | ||
335 | path->slots[0]++; | ||
336 | } | ||
337 | if (0 && last_offset < inode->i_size) { | ||
338 | WARN_ON(1); | ||
339 | btrfs_print_leaf(root, leaf); | ||
340 | printk(KERN_ERR "inode %lu found offset %llu size %llu\n", | ||
341 | inode->i_ino, (unsigned long long)last_offset, | ||
342 | (unsigned long long)inode->i_size); | ||
343 | err = 1; | ||
344 | |||
345 | } | ||
346 | out: | ||
347 | btrfs_free_path(path); | ||
348 | return err; | ||
349 | #endif | ||
350 | } | ||
351 | |||
352 | /* | 275 | /* |
353 | * this is very complex, but the basic idea is to drop all extents | 276 | * this is very complex, but the basic idea is to drop all extents |
354 | * in the range start - end. hint_block is filled in with a block number | 277 | * in the range start - end. hint_block is filled in with a block number |
@@ -363,15 +286,16 @@ out: | |||
363 | */ | 286 | */ |
364 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 287 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
365 | struct btrfs_root *root, struct inode *inode, | 288 | struct btrfs_root *root, struct inode *inode, |
366 | u64 start, u64 end, u64 inline_limit, u64 *hint_byte) | 289 | u64 start, u64 end, u64 locked_end, |
290 | u64 inline_limit, u64 *hint_byte) | ||
367 | { | 291 | { |
368 | u64 extent_end = 0; | 292 | u64 extent_end = 0; |
369 | u64 locked_end = end; | ||
370 | u64 search_start = start; | 293 | u64 search_start = start; |
371 | u64 leaf_start; | 294 | u64 leaf_start; |
372 | u64 ram_bytes = 0; | 295 | u64 ram_bytes = 0; |
373 | u64 orig_parent = 0; | 296 | u64 orig_parent = 0; |
374 | u64 disk_bytenr = 0; | 297 | u64 disk_bytenr = 0; |
298 | u64 orig_locked_end = locked_end; | ||
375 | u8 compression; | 299 | u8 compression; |
376 | u8 encryption; | 300 | u8 encryption; |
377 | u16 other_encoding = 0; | 301 | u16 other_encoding = 0; |
@@ -684,11 +608,10 @@ next_slot: | |||
684 | } | 608 | } |
685 | out: | 609 | out: |
686 | btrfs_free_path(path); | 610 | btrfs_free_path(path); |
687 | if (locked_end > end) { | 611 | if (locked_end > orig_locked_end) { |
688 | unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, | 612 | unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end, |
689 | GFP_NOFS); | 613 | locked_end - 1, GFP_NOFS); |
690 | } | 614 | } |
691 | btrfs_check_file(root, inode); | ||
692 | return ret; | 615 | return ret; |
693 | } | 616 | } |
694 | 617 | ||
@@ -830,7 +753,7 @@ again: | |||
830 | 753 | ||
831 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); | 754 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); |
832 | BUG_ON(ret); | 755 | BUG_ON(ret); |
833 | goto done; | 756 | goto release; |
834 | } else if (split == start) { | 757 | } else if (split == start) { |
835 | if (locked_end < extent_end) { | 758 | if (locked_end < extent_end) { |
836 | ret = try_lock_extent(&BTRFS_I(inode)->io_tree, | 759 | ret = try_lock_extent(&BTRFS_I(inode)->io_tree, |
@@ -926,6 +849,8 @@ again: | |||
926 | } | 849 | } |
927 | done: | 850 | done: |
928 | btrfs_mark_buffer_dirty(leaf); | 851 | btrfs_mark_buffer_dirty(leaf); |
852 | |||
853 | release: | ||
929 | btrfs_release_path(root, path); | 854 | btrfs_release_path(root, path); |
930 | if (split_end && split == start) { | 855 | if (split_end && split == start) { |
931 | split = end; | 856 | split = end; |
@@ -1131,7 +1056,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
1131 | if (will_write) { | 1056 | if (will_write) { |
1132 | btrfs_fdatawrite_range(inode->i_mapping, pos, | 1057 | btrfs_fdatawrite_range(inode->i_mapping, pos, |
1133 | pos + write_bytes - 1, | 1058 | pos + write_bytes - 1, |
1134 | WB_SYNC_NONE); | 1059 | WB_SYNC_ALL); |
1135 | } else { | 1060 | } else { |
1136 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | 1061 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, |
1137 | num_pages); | 1062 | num_pages); |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 768b9523662d..0bc93657b460 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -332,13 +332,17 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | |||
332 | printk(KERN_ERR "couldn't find space %llu to free\n", | 332 | printk(KERN_ERR "couldn't find space %llu to free\n", |
333 | (unsigned long long)offset); | 333 | (unsigned long long)offset); |
334 | printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n", | 334 | printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n", |
335 | block_group->cached, block_group->key.objectid, | 335 | block_group->cached, |
336 | block_group->key.offset); | 336 | (unsigned long long)block_group->key.objectid, |
337 | (unsigned long long)block_group->key.offset); | ||
337 | btrfs_dump_free_space(block_group, bytes); | 338 | btrfs_dump_free_space(block_group, bytes); |
338 | } else if (info) { | 339 | } else if (info) { |
339 | printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, " | 340 | printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, " |
340 | "but wanted offset=%llu bytes=%llu\n", | 341 | "but wanted offset=%llu bytes=%llu\n", |
341 | info->offset, info->bytes, offset, bytes); | 342 | (unsigned long long)info->offset, |
343 | (unsigned long long)info->bytes, | ||
344 | (unsigned long long)offset, | ||
345 | (unsigned long long)bytes); | ||
342 | } | 346 | } |
343 | WARN_ON(1); | 347 | WARN_ON(1); |
344 | } | 348 | } |
@@ -357,8 +361,9 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | |||
357 | info = rb_entry(n, struct btrfs_free_space, offset_index); | 361 | info = rb_entry(n, struct btrfs_free_space, offset_index); |
358 | if (info->bytes >= bytes) | 362 | if (info->bytes >= bytes) |
359 | count++; | 363 | count++; |
360 | printk(KERN_ERR "entry offset %llu, bytes %llu\n", info->offset, | 364 | printk(KERN_ERR "entry offset %llu, bytes %llu\n", |
361 | info->bytes); | 365 | (unsigned long long)info->offset, |
366 | (unsigned long long)info->bytes); | ||
362 | } | 367 | } |
363 | printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" | 368 | printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" |
364 | "\n", count); | 369 | "\n", count); |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index cc7334d833c9..9abbced1123d 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -79,7 +79,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, | |||
79 | } | 79 | } |
80 | path = btrfs_alloc_path(); | 80 | path = btrfs_alloc_path(); |
81 | BUG_ON(!path); | 81 | BUG_ON(!path); |
82 | search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); | 82 | search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID); |
83 | search_key.objectid = search_start; | 83 | search_key.objectid = search_start; |
84 | search_key.type = 0; | 84 | search_key.type = 0; |
85 | search_key.offset = 0; | 85 | search_key.offset = 0; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a0d1dd492a58..90c23eb28829 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -70,7 +70,6 @@ static struct extent_io_ops btrfs_extent_io_ops; | |||
70 | static struct kmem_cache *btrfs_inode_cachep; | 70 | static struct kmem_cache *btrfs_inode_cachep; |
71 | struct kmem_cache *btrfs_trans_handle_cachep; | 71 | struct kmem_cache *btrfs_trans_handle_cachep; |
72 | struct kmem_cache *btrfs_transaction_cachep; | 72 | struct kmem_cache *btrfs_transaction_cachep; |
73 | struct kmem_cache *btrfs_bit_radix_cachep; | ||
74 | struct kmem_cache *btrfs_path_cachep; | 73 | struct kmem_cache *btrfs_path_cachep; |
75 | 74 | ||
76 | #define S_SHIFT 12 | 75 | #define S_SHIFT 12 |
@@ -234,7 +233,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
234 | } | 233 | } |
235 | 234 | ||
236 | ret = btrfs_drop_extents(trans, root, inode, start, | 235 | ret = btrfs_drop_extents(trans, root, inode, start, |
237 | aligned_end, start, &hint_byte); | 236 | aligned_end, aligned_end, start, &hint_byte); |
238 | BUG_ON(ret); | 237 | BUG_ON(ret); |
239 | 238 | ||
240 | if (isize > actual_end) | 239 | if (isize > actual_end) |
@@ -1439,6 +1438,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1439 | struct inode *inode, u64 file_pos, | 1438 | struct inode *inode, u64 file_pos, |
1440 | u64 disk_bytenr, u64 disk_num_bytes, | 1439 | u64 disk_bytenr, u64 disk_num_bytes, |
1441 | u64 num_bytes, u64 ram_bytes, | 1440 | u64 num_bytes, u64 ram_bytes, |
1441 | u64 locked_end, | ||
1442 | u8 compression, u8 encryption, | 1442 | u8 compression, u8 encryption, |
1443 | u16 other_encoding, int extent_type) | 1443 | u16 other_encoding, int extent_type) |
1444 | { | 1444 | { |
@@ -1455,7 +1455,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1455 | 1455 | ||
1456 | path->leave_spinning = 1; | 1456 | path->leave_spinning = 1; |
1457 | ret = btrfs_drop_extents(trans, root, inode, file_pos, | 1457 | ret = btrfs_drop_extents(trans, root, inode, file_pos, |
1458 | file_pos + num_bytes, file_pos, &hint); | 1458 | file_pos + num_bytes, locked_end, |
1459 | file_pos, &hint); | ||
1459 | BUG_ON(ret); | 1460 | BUG_ON(ret); |
1460 | 1461 | ||
1461 | ins.objectid = inode->i_ino; | 1462 | ins.objectid = inode->i_ino; |
@@ -1590,6 +1591,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1590 | ordered_extent->disk_len, | 1591 | ordered_extent->disk_len, |
1591 | ordered_extent->len, | 1592 | ordered_extent->len, |
1592 | ordered_extent->len, | 1593 | ordered_extent->len, |
1594 | ordered_extent->file_offset + | ||
1595 | ordered_extent->len, | ||
1593 | compressed, 0, 0, | 1596 | compressed, 0, 0, |
1594 | BTRFS_FILE_EXTENT_REG); | 1597 | BTRFS_FILE_EXTENT_REG); |
1595 | BUG_ON(ret); | 1598 | BUG_ON(ret); |
@@ -1819,10 +1822,12 @@ good: | |||
1819 | return 0; | 1822 | return 0; |
1820 | 1823 | ||
1821 | zeroit: | 1824 | zeroit: |
1822 | printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " | 1825 | if (printk_ratelimit()) { |
1823 | "private %llu\n", page->mapping->host->i_ino, | 1826 | printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " |
1824 | (unsigned long long)start, csum, | 1827 | "private %llu\n", page->mapping->host->i_ino, |
1825 | (unsigned long long)private); | 1828 | (unsigned long long)start, csum, |
1829 | (unsigned long long)private); | ||
1830 | } | ||
1826 | memset(kaddr + offset, 1, end - start + 1); | 1831 | memset(kaddr + offset, 1, end - start + 1); |
1827 | flush_dcache_page(page); | 1832 | flush_dcache_page(page); |
1828 | kunmap_atomic(kaddr, KM_USER0); | 1833 | kunmap_atomic(kaddr, KM_USER0); |
@@ -2011,6 +2016,57 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2011 | } | 2016 | } |
2012 | 2017 | ||
2013 | /* | 2018 | /* |
2019 | * very simple check to peek ahead in the leaf looking for xattrs. If we | ||
2020 | * don't find any xattrs, we know there can't be any acls. | ||
2021 | * | ||
2022 | * slot is the slot the inode is in, objectid is the objectid of the inode | ||
2023 | */ | ||
2024 | static noinline int acls_after_inode_item(struct extent_buffer *leaf, | ||
2025 | int slot, u64 objectid) | ||
2026 | { | ||
2027 | u32 nritems = btrfs_header_nritems(leaf); | ||
2028 | struct btrfs_key found_key; | ||
2029 | int scanned = 0; | ||
2030 | |||
2031 | slot++; | ||
2032 | while (slot < nritems) { | ||
2033 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
2034 | |||
2035 | /* we found a different objectid, there must not be acls */ | ||
2036 | if (found_key.objectid != objectid) | ||
2037 | return 0; | ||
2038 | |||
2039 | /* we found an xattr, assume we've got an acl */ | ||
2040 | if (found_key.type == BTRFS_XATTR_ITEM_KEY) | ||
2041 | return 1; | ||
2042 | |||
2043 | /* | ||
2044 | * we found a key greater than an xattr key, there can't | ||
2045 | * be any acls later on | ||
2046 | */ | ||
2047 | if (found_key.type > BTRFS_XATTR_ITEM_KEY) | ||
2048 | return 0; | ||
2049 | |||
2050 | slot++; | ||
2051 | scanned++; | ||
2052 | |||
2053 | /* | ||
2054 | * it goes inode, inode backrefs, xattrs, extents, | ||
2055 | * so if there are a ton of hard links to an inode there can | ||
2056 | * be a lot of backrefs. Don't waste time searching too hard, | ||
2057 | * this is just an optimization | ||
2058 | */ | ||
2059 | if (scanned >= 8) | ||
2060 | break; | ||
2061 | } | ||
2062 | /* we hit the end of the leaf before we found an xattr or | ||
2063 | * something larger than an xattr. We have to assume the inode | ||
2064 | * has acls | ||
2065 | */ | ||
2066 | return 1; | ||
2067 | } | ||
2068 | |||
2069 | /* | ||
2014 | * read an inode from the btree into the in-memory inode | 2070 | * read an inode from the btree into the in-memory inode |
2015 | */ | 2071 | */ |
2016 | void btrfs_read_locked_inode(struct inode *inode) | 2072 | void btrfs_read_locked_inode(struct inode *inode) |
@@ -2021,6 +2077,7 @@ void btrfs_read_locked_inode(struct inode *inode) | |||
2021 | struct btrfs_timespec *tspec; | 2077 | struct btrfs_timespec *tspec; |
2022 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2078 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2023 | struct btrfs_key location; | 2079 | struct btrfs_key location; |
2080 | int maybe_acls; | ||
2024 | u64 alloc_group_block; | 2081 | u64 alloc_group_block; |
2025 | u32 rdev; | 2082 | u32 rdev; |
2026 | int ret; | 2083 | int ret; |
@@ -2067,6 +2124,16 @@ void btrfs_read_locked_inode(struct inode *inode) | |||
2067 | 2124 | ||
2068 | alloc_group_block = btrfs_inode_block_group(leaf, inode_item); | 2125 | alloc_group_block = btrfs_inode_block_group(leaf, inode_item); |
2069 | 2126 | ||
2127 | /* | ||
2128 | * try to precache a NULL acl entry for files that don't have | ||
2129 | * any xattrs or acls | ||
2130 | */ | ||
2131 | maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); | ||
2132 | if (!maybe_acls) { | ||
2133 | BTRFS_I(inode)->i_acl = NULL; | ||
2134 | BTRFS_I(inode)->i_default_acl = NULL; | ||
2135 | } | ||
2136 | |||
2070 | BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, | 2137 | BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, |
2071 | alloc_group_block, 0); | 2138 | alloc_group_block, 0); |
2072 | btrfs_free_path(path); | 2139 | btrfs_free_path(path); |
@@ -2877,6 +2944,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
2877 | err = btrfs_drop_extents(trans, root, inode, | 2944 | err = btrfs_drop_extents(trans, root, inode, |
2878 | cur_offset, | 2945 | cur_offset, |
2879 | cur_offset + hole_size, | 2946 | cur_offset + hole_size, |
2947 | block_end, | ||
2880 | cur_offset, &hint_byte); | 2948 | cur_offset, &hint_byte); |
2881 | if (err) | 2949 | if (err) |
2882 | break; | 2950 | break; |
@@ -3041,8 +3109,8 @@ static noinline void init_btrfs_i(struct inode *inode) | |||
3041 | { | 3109 | { |
3042 | struct btrfs_inode *bi = BTRFS_I(inode); | 3110 | struct btrfs_inode *bi = BTRFS_I(inode); |
3043 | 3111 | ||
3044 | bi->i_acl = NULL; | 3112 | bi->i_acl = BTRFS_ACL_NOT_CACHED; |
3045 | bi->i_default_acl = NULL; | 3113 | bi->i_default_acl = BTRFS_ACL_NOT_CACHED; |
3046 | 3114 | ||
3047 | bi->generation = 0; | 3115 | bi->generation = 0; |
3048 | bi->sequence = 0; | 3116 | bi->sequence = 0; |
@@ -4634,47 +4702,36 @@ void btrfs_destroy_cachep(void) | |||
4634 | kmem_cache_destroy(btrfs_trans_handle_cachep); | 4702 | kmem_cache_destroy(btrfs_trans_handle_cachep); |
4635 | if (btrfs_transaction_cachep) | 4703 | if (btrfs_transaction_cachep) |
4636 | kmem_cache_destroy(btrfs_transaction_cachep); | 4704 | kmem_cache_destroy(btrfs_transaction_cachep); |
4637 | if (btrfs_bit_radix_cachep) | ||
4638 | kmem_cache_destroy(btrfs_bit_radix_cachep); | ||
4639 | if (btrfs_path_cachep) | 4705 | if (btrfs_path_cachep) |
4640 | kmem_cache_destroy(btrfs_path_cachep); | 4706 | kmem_cache_destroy(btrfs_path_cachep); |
4641 | } | 4707 | } |
4642 | 4708 | ||
4643 | struct kmem_cache *btrfs_cache_create(const char *name, size_t size, | ||
4644 | unsigned long extra_flags, | ||
4645 | void (*ctor)(void *)) | ||
4646 | { | ||
4647 | return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT | | ||
4648 | SLAB_MEM_SPREAD | extra_flags), ctor); | ||
4649 | } | ||
4650 | |||
4651 | int btrfs_init_cachep(void) | 4709 | int btrfs_init_cachep(void) |
4652 | { | 4710 | { |
4653 | btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache", | 4711 | btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache", |
4654 | sizeof(struct btrfs_inode), | 4712 | sizeof(struct btrfs_inode), 0, |
4655 | 0, init_once); | 4713 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once); |
4656 | if (!btrfs_inode_cachep) | 4714 | if (!btrfs_inode_cachep) |
4657 | goto fail; | 4715 | goto fail; |
4658 | btrfs_trans_handle_cachep = | 4716 | |
4659 | btrfs_cache_create("btrfs_trans_handle_cache", | 4717 | btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache", |
4660 | sizeof(struct btrfs_trans_handle), | 4718 | sizeof(struct btrfs_trans_handle), 0, |
4661 | 0, NULL); | 4719 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
4662 | if (!btrfs_trans_handle_cachep) | 4720 | if (!btrfs_trans_handle_cachep) |
4663 | goto fail; | 4721 | goto fail; |
4664 | btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache", | 4722 | |
4665 | sizeof(struct btrfs_transaction), | 4723 | btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache", |
4666 | 0, NULL); | 4724 | sizeof(struct btrfs_transaction), 0, |
4725 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | ||
4667 | if (!btrfs_transaction_cachep) | 4726 | if (!btrfs_transaction_cachep) |
4668 | goto fail; | 4727 | goto fail; |
4669 | btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache", | 4728 | |
4670 | sizeof(struct btrfs_path), | 4729 | btrfs_path_cachep = kmem_cache_create("btrfs_path_cache", |
4671 | 0, NULL); | 4730 | sizeof(struct btrfs_path), 0, |
4731 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | ||
4672 | if (!btrfs_path_cachep) | 4732 | if (!btrfs_path_cachep) |
4673 | goto fail; | 4733 | goto fail; |
4674 | btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256, | 4734 | |
4675 | SLAB_DESTROY_BY_RCU, NULL); | ||
4676 | if (!btrfs_bit_radix_cachep) | ||
4677 | goto fail; | ||
4678 | return 0; | 4735 | return 0; |
4679 | fail: | 4736 | fail: |
4680 | btrfs_destroy_cachep(); | 4737 | btrfs_destroy_cachep(); |
@@ -4970,10 +5027,10 @@ out_fail: | |||
4970 | return err; | 5027 | return err; |
4971 | } | 5028 | } |
4972 | 5029 | ||
4973 | static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | 5030 | static int prealloc_file_range(struct btrfs_trans_handle *trans, |
4974 | u64 alloc_hint, int mode) | 5031 | struct inode *inode, u64 start, u64 end, |
5032 | u64 locked_end, u64 alloc_hint, int mode) | ||
4975 | { | 5033 | { |
4976 | struct btrfs_trans_handle *trans; | ||
4977 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5034 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4978 | struct btrfs_key ins; | 5035 | struct btrfs_key ins; |
4979 | u64 alloc_size; | 5036 | u64 alloc_size; |
@@ -4981,10 +5038,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | |||
4981 | u64 num_bytes = end - start; | 5038 | u64 num_bytes = end - start; |
4982 | int ret = 0; | 5039 | int ret = 0; |
4983 | 5040 | ||
4984 | trans = btrfs_join_transaction(root, 1); | ||
4985 | BUG_ON(!trans); | ||
4986 | btrfs_set_trans_block_group(trans, inode); | ||
4987 | |||
4988 | while (num_bytes > 0) { | 5041 | while (num_bytes > 0) { |
4989 | alloc_size = min(num_bytes, root->fs_info->max_extent); | 5042 | alloc_size = min(num_bytes, root->fs_info->max_extent); |
4990 | ret = btrfs_reserve_extent(trans, root, alloc_size, | 5043 | ret = btrfs_reserve_extent(trans, root, alloc_size, |
@@ -4997,7 +5050,8 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | |||
4997 | ret = insert_reserved_file_extent(trans, inode, | 5050 | ret = insert_reserved_file_extent(trans, inode, |
4998 | cur_offset, ins.objectid, | 5051 | cur_offset, ins.objectid, |
4999 | ins.offset, ins.offset, | 5052 | ins.offset, ins.offset, |
5000 | ins.offset, 0, 0, 0, | 5053 | ins.offset, locked_end, |
5054 | 0, 0, 0, | ||
5001 | BTRFS_FILE_EXTENT_PREALLOC); | 5055 | BTRFS_FILE_EXTENT_PREALLOC); |
5002 | BUG_ON(ret); | 5056 | BUG_ON(ret); |
5003 | num_bytes -= ins.offset; | 5057 | num_bytes -= ins.offset; |
@@ -5015,7 +5069,6 @@ out: | |||
5015 | BUG_ON(ret); | 5069 | BUG_ON(ret); |
5016 | } | 5070 | } |
5017 | 5071 | ||
5018 | btrfs_end_transaction(trans, root); | ||
5019 | return ret; | 5072 | return ret; |
5020 | } | 5073 | } |
5021 | 5074 | ||
@@ -5027,13 +5080,21 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5027 | u64 alloc_start; | 5080 | u64 alloc_start; |
5028 | u64 alloc_end; | 5081 | u64 alloc_end; |
5029 | u64 alloc_hint = 0; | 5082 | u64 alloc_hint = 0; |
5083 | u64 locked_end; | ||
5030 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | 5084 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; |
5031 | struct extent_map *em; | 5085 | struct extent_map *em; |
5086 | struct btrfs_trans_handle *trans; | ||
5032 | int ret; | 5087 | int ret; |
5033 | 5088 | ||
5034 | alloc_start = offset & ~mask; | 5089 | alloc_start = offset & ~mask; |
5035 | alloc_end = (offset + len + mask) & ~mask; | 5090 | alloc_end = (offset + len + mask) & ~mask; |
5036 | 5091 | ||
5092 | /* | ||
5093 | * wait for ordered IO before we have any locks. We'll loop again | ||
5094 | * below with the locks held. | ||
5095 | */ | ||
5096 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
5097 | |||
5037 | mutex_lock(&inode->i_mutex); | 5098 | mutex_lock(&inode->i_mutex); |
5038 | if (alloc_start > inode->i_size) { | 5099 | if (alloc_start > inode->i_size) { |
5039 | ret = btrfs_cont_expand(inode, alloc_start); | 5100 | ret = btrfs_cont_expand(inode, alloc_start); |
@@ -5041,10 +5102,21 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5041 | goto out; | 5102 | goto out; |
5042 | } | 5103 | } |
5043 | 5104 | ||
5105 | locked_end = alloc_end - 1; | ||
5044 | while (1) { | 5106 | while (1) { |
5045 | struct btrfs_ordered_extent *ordered; | 5107 | struct btrfs_ordered_extent *ordered; |
5046 | lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, | 5108 | |
5047 | alloc_end - 1, GFP_NOFS); | 5109 | trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); |
5110 | if (!trans) { | ||
5111 | ret = -EIO; | ||
5112 | goto out; | ||
5113 | } | ||
5114 | |||
5115 | /* the extent lock is ordered inside the running | ||
5116 | * transaction | ||
5117 | */ | ||
5118 | lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||
5119 | GFP_NOFS); | ||
5048 | ordered = btrfs_lookup_first_ordered_extent(inode, | 5120 | ordered = btrfs_lookup_first_ordered_extent(inode, |
5049 | alloc_end - 1); | 5121 | alloc_end - 1); |
5050 | if (ordered && | 5122 | if (ordered && |
@@ -5052,7 +5124,13 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5052 | ordered->file_offset < alloc_end) { | 5124 | ordered->file_offset < alloc_end) { |
5053 | btrfs_put_ordered_extent(ordered); | 5125 | btrfs_put_ordered_extent(ordered); |
5054 | unlock_extent(&BTRFS_I(inode)->io_tree, | 5126 | unlock_extent(&BTRFS_I(inode)->io_tree, |
5055 | alloc_start, alloc_end - 1, GFP_NOFS); | 5127 | alloc_start, locked_end, GFP_NOFS); |
5128 | btrfs_end_transaction(trans, BTRFS_I(inode)->root); | ||
5129 | |||
5130 | /* | ||
5131 | * we can't wait on the range with the transaction | ||
5132 | * running or with the extent lock held | ||
5133 | */ | ||
5056 | btrfs_wait_ordered_range(inode, alloc_start, | 5134 | btrfs_wait_ordered_range(inode, alloc_start, |
5057 | alloc_end - alloc_start); | 5135 | alloc_end - alloc_start); |
5058 | } else { | 5136 | } else { |
@@ -5070,8 +5148,9 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5070 | last_byte = min(extent_map_end(em), alloc_end); | 5148 | last_byte = min(extent_map_end(em), alloc_end); |
5071 | last_byte = (last_byte + mask) & ~mask; | 5149 | last_byte = (last_byte + mask) & ~mask; |
5072 | if (em->block_start == EXTENT_MAP_HOLE) { | 5150 | if (em->block_start == EXTENT_MAP_HOLE) { |
5073 | ret = prealloc_file_range(inode, cur_offset, | 5151 | ret = prealloc_file_range(trans, inode, cur_offset, |
5074 | last_byte, alloc_hint, mode); | 5152 | last_byte, locked_end + 1, |
5153 | alloc_hint, mode); | ||
5075 | if (ret < 0) { | 5154 | if (ret < 0) { |
5076 | free_extent_map(em); | 5155 | free_extent_map(em); |
5077 | break; | 5156 | break; |
@@ -5087,8 +5166,10 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5087 | break; | 5166 | break; |
5088 | } | 5167 | } |
5089 | } | 5168 | } |
5090 | unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, | 5169 | unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, |
5091 | GFP_NOFS); | 5170 | GFP_NOFS); |
5171 | |||
5172 | btrfs_end_transaction(trans, BTRFS_I(inode)->root); | ||
5092 | out: | 5173 | out: |
5093 | mutex_unlock(&inode->i_mutex); | 5174 | mutex_unlock(&inode->i_mutex); |
5094 | return ret; | 5175 | return ret; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7594bec1be10..5e94ea6e1cbe 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -461,15 +461,9 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) | |||
461 | if (!capable(CAP_SYS_ADMIN)) | 461 | if (!capable(CAP_SYS_ADMIN)) |
462 | return -EPERM; | 462 | return -EPERM; |
463 | 463 | ||
464 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | 464 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
465 | 465 | if (IS_ERR(vol_args)) | |
466 | if (!vol_args) | 466 | return PTR_ERR(vol_args); |
467 | return -ENOMEM; | ||
468 | |||
469 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
470 | ret = -EFAULT; | ||
471 | goto out; | ||
472 | } | ||
473 | 467 | ||
474 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 468 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
475 | namelen = strlen(vol_args->name); | 469 | namelen = strlen(vol_args->name); |
@@ -483,11 +477,13 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) | |||
483 | *devstr = '\0'; | 477 | *devstr = '\0'; |
484 | devstr = vol_args->name; | 478 | devstr = vol_args->name; |
485 | devid = simple_strtoull(devstr, &end, 10); | 479 | devid = simple_strtoull(devstr, &end, 10); |
486 | printk(KERN_INFO "resizing devid %llu\n", devid); | 480 | printk(KERN_INFO "resizing devid %llu\n", |
481 | (unsigned long long)devid); | ||
487 | } | 482 | } |
488 | device = btrfs_find_device(root, devid, NULL, NULL); | 483 | device = btrfs_find_device(root, devid, NULL, NULL); |
489 | if (!device) { | 484 | if (!device) { |
490 | printk(KERN_INFO "resizer unable to find device %llu\n", devid); | 485 | printk(KERN_INFO "resizer unable to find device %llu\n", |
486 | (unsigned long long)devid); | ||
491 | ret = -EINVAL; | 487 | ret = -EINVAL; |
492 | goto out_unlock; | 488 | goto out_unlock; |
493 | } | 489 | } |
@@ -545,7 +541,6 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) | |||
545 | 541 | ||
546 | out_unlock: | 542 | out_unlock: |
547 | mutex_unlock(&root->fs_info->volume_mutex); | 543 | mutex_unlock(&root->fs_info->volume_mutex); |
548 | out: | ||
549 | kfree(vol_args); | 544 | kfree(vol_args); |
550 | return ret; | 545 | return ret; |
551 | } | 546 | } |
@@ -565,15 +560,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
565 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 560 | if (root->fs_info->sb->s_flags & MS_RDONLY) |
566 | return -EROFS; | 561 | return -EROFS; |
567 | 562 | ||
568 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | 563 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
569 | 564 | if (IS_ERR(vol_args)) | |
570 | if (!vol_args) | 565 | return PTR_ERR(vol_args); |
571 | return -ENOMEM; | ||
572 | |||
573 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
574 | ret = -EFAULT; | ||
575 | goto out; | ||
576 | } | ||
577 | 566 | ||
578 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 567 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
579 | namelen = strlen(vol_args->name); | 568 | namelen = strlen(vol_args->name); |
@@ -675,19 +664,13 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) | |||
675 | if (!capable(CAP_SYS_ADMIN)) | 664 | if (!capable(CAP_SYS_ADMIN)) |
676 | return -EPERM; | 665 | return -EPERM; |
677 | 666 | ||
678 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | 667 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
668 | if (IS_ERR(vol_args)) | ||
669 | return PTR_ERR(vol_args); | ||
679 | 670 | ||
680 | if (!vol_args) | ||
681 | return -ENOMEM; | ||
682 | |||
683 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
684 | ret = -EFAULT; | ||
685 | goto out; | ||
686 | } | ||
687 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 671 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
688 | ret = btrfs_init_new_device(root, vol_args->name); | 672 | ret = btrfs_init_new_device(root, vol_args->name); |
689 | 673 | ||
690 | out: | ||
691 | kfree(vol_args); | 674 | kfree(vol_args); |
692 | return ret; | 675 | return ret; |
693 | } | 676 | } |
@@ -703,19 +686,13 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) | |||
703 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 686 | if (root->fs_info->sb->s_flags & MS_RDONLY) |
704 | return -EROFS; | 687 | return -EROFS; |
705 | 688 | ||
706 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | 689 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
690 | if (IS_ERR(vol_args)) | ||
691 | return PTR_ERR(vol_args); | ||
707 | 692 | ||
708 | if (!vol_args) | ||
709 | return -ENOMEM; | ||
710 | |||
711 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
712 | ret = -EFAULT; | ||
713 | goto out; | ||
714 | } | ||
715 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 693 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
716 | ret = btrfs_rm_device(root, vol_args->name); | 694 | ret = btrfs_rm_device(root, vol_args->name); |
717 | 695 | ||
718 | out: | ||
719 | kfree(vol_args); | 696 | kfree(vol_args); |
720 | return ret; | 697 | return ret; |
721 | } | 698 | } |
@@ -830,7 +807,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
830 | BUG_ON(!trans); | 807 | BUG_ON(!trans); |
831 | 808 | ||
832 | /* punch hole in destination first */ | 809 | /* punch hole in destination first */ |
833 | btrfs_drop_extents(trans, root, inode, off, off+len, 0, &hint_byte); | 810 | btrfs_drop_extents(trans, root, inode, off, off + len, |
811 | off + len, 0, &hint_byte); | ||
834 | 812 | ||
835 | /* clone data */ | 813 | /* clone data */ |
836 | key.objectid = src->i_ino; | 814 | key.objectid = src->i_ino; |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 53c87b197d70..d6f0806c682f 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -489,7 +489,7 @@ again: | |||
489 | /* start IO across the range first to instantiate any delalloc | 489 | /* start IO across the range first to instantiate any delalloc |
490 | * extents | 490 | * extents |
491 | */ | 491 | */ |
492 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); | 492 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); |
493 | 493 | ||
494 | /* The compression code will leave pages locked but return from | 494 | /* The compression code will leave pages locked but return from |
495 | * writepage without setting the page writeback. Starting again | 495 | * writepage without setting the page writeback. Starting again |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9744af9d71e9..3536bdb2d7cb 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -68,7 +68,7 @@ enum { | |||
68 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, | 68 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, |
69 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, | 69 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, |
70 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog, | 70 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog, |
71 | Opt_flushoncommit, Opt_err, | 71 | Opt_ratio, Opt_flushoncommit, Opt_err, |
72 | }; | 72 | }; |
73 | 73 | ||
74 | static match_table_t tokens = { | 74 | static match_table_t tokens = { |
@@ -87,6 +87,7 @@ static match_table_t tokens = { | |||
87 | {Opt_noacl, "noacl"}, | 87 | {Opt_noacl, "noacl"}, |
88 | {Opt_notreelog, "notreelog"}, | 88 | {Opt_notreelog, "notreelog"}, |
89 | {Opt_flushoncommit, "flushoncommit"}, | 89 | {Opt_flushoncommit, "flushoncommit"}, |
90 | {Opt_ratio, "metadata_ratio=%d"}, | ||
90 | {Opt_err, NULL}, | 91 | {Opt_err, NULL}, |
91 | }; | 92 | }; |
92 | 93 | ||
@@ -195,7 +196,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
195 | info->max_extent = max_t(u64, | 196 | info->max_extent = max_t(u64, |
196 | info->max_extent, root->sectorsize); | 197 | info->max_extent, root->sectorsize); |
197 | printk(KERN_INFO "btrfs: max_extent at %llu\n", | 198 | printk(KERN_INFO "btrfs: max_extent at %llu\n", |
198 | info->max_extent); | 199 | (unsigned long long)info->max_extent); |
199 | } | 200 | } |
200 | break; | 201 | break; |
201 | case Opt_max_inline: | 202 | case Opt_max_inline: |
@@ -210,7 +211,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
210 | root->sectorsize); | 211 | root->sectorsize); |
211 | } | 212 | } |
212 | printk(KERN_INFO "btrfs: max_inline at %llu\n", | 213 | printk(KERN_INFO "btrfs: max_inline at %llu\n", |
213 | info->max_inline); | 214 | (unsigned long long)info->max_inline); |
214 | } | 215 | } |
215 | break; | 216 | break; |
216 | case Opt_alloc_start: | 217 | case Opt_alloc_start: |
@@ -220,7 +221,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
220 | kfree(num); | 221 | kfree(num); |
221 | printk(KERN_INFO | 222 | printk(KERN_INFO |
222 | "btrfs: allocations start at %llu\n", | 223 | "btrfs: allocations start at %llu\n", |
223 | info->alloc_start); | 224 | (unsigned long long)info->alloc_start); |
224 | } | 225 | } |
225 | break; | 226 | break; |
226 | case Opt_noacl: | 227 | case Opt_noacl: |
@@ -234,6 +235,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
234 | printk(KERN_INFO "btrfs: turning on flush-on-commit\n"); | 235 | printk(KERN_INFO "btrfs: turning on flush-on-commit\n"); |
235 | btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); | 236 | btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); |
236 | break; | 237 | break; |
238 | case Opt_ratio: | ||
239 | intarg = 0; | ||
240 | match_int(&args[0], &intarg); | ||
241 | if (intarg) { | ||
242 | info->metadata_ratio = intarg; | ||
243 | printk(KERN_INFO "btrfs: metadata ratio %d\n", | ||
244 | info->metadata_ratio); | ||
245 | } | ||
246 | break; | ||
237 | default: | 247 | default: |
238 | break; | 248 | break; |
239 | } | 249 | } |
@@ -410,11 +420,14 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
410 | if (btrfs_test_opt(root, NOBARRIER)) | 420 | if (btrfs_test_opt(root, NOBARRIER)) |
411 | seq_puts(seq, ",nobarrier"); | 421 | seq_puts(seq, ",nobarrier"); |
412 | if (info->max_extent != (u64)-1) | 422 | if (info->max_extent != (u64)-1) |
413 | seq_printf(seq, ",max_extent=%llu", info->max_extent); | 423 | seq_printf(seq, ",max_extent=%llu", |
424 | (unsigned long long)info->max_extent); | ||
414 | if (info->max_inline != 8192 * 1024) | 425 | if (info->max_inline != 8192 * 1024) |
415 | seq_printf(seq, ",max_inline=%llu", info->max_inline); | 426 | seq_printf(seq, ",max_inline=%llu", |
427 | (unsigned long long)info->max_inline); | ||
416 | if (info->alloc_start != 0) | 428 | if (info->alloc_start != 0) |
417 | seq_printf(seq, ",alloc_start=%llu", info->alloc_start); | 429 | seq_printf(seq, ",alloc_start=%llu", |
430 | (unsigned long long)info->alloc_start); | ||
418 | if (info->thread_pool_size != min_t(unsigned long, | 431 | if (info->thread_pool_size != min_t(unsigned long, |
419 | num_online_cpus() + 2, 8)) | 432 | num_online_cpus() + 2, 8)) |
420 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); | 433 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); |
@@ -635,14 +648,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, | |||
635 | if (!capable(CAP_SYS_ADMIN)) | 648 | if (!capable(CAP_SYS_ADMIN)) |
636 | return -EPERM; | 649 | return -EPERM; |
637 | 650 | ||
638 | vol = kmalloc(sizeof(*vol), GFP_KERNEL); | 651 | vol = memdup_user((void __user *)arg, sizeof(*vol)); |
639 | if (!vol) | 652 | if (IS_ERR(vol)) |
640 | return -ENOMEM; | 653 | return PTR_ERR(vol); |
641 | |||
642 | if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) { | ||
643 | ret = -EFAULT; | ||
644 | goto out; | ||
645 | } | ||
646 | 654 | ||
647 | switch (cmd) { | 655 | switch (cmd) { |
648 | case BTRFS_IOC_SCAN_DEV: | 656 | case BTRFS_IOC_SCAN_DEV: |
@@ -650,7 +658,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, | |||
650 | &btrfs_fs_type, &fs_devices); | 658 | &btrfs_fs_type, &fs_devices); |
651 | break; | 659 | break; |
652 | } | 660 | } |
653 | out: | 661 | |
654 | kfree(vol); | 662 | kfree(vol); |
655 | return ret; | 663 | return ret; |
656 | } | 664 | } |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2869b3361eb6..01b143605ec1 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -687,7 +687,13 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) | |||
687 | prepare_to_wait(&info->transaction_wait, &wait, | 687 | prepare_to_wait(&info->transaction_wait, &wait, |
688 | TASK_UNINTERRUPTIBLE); | 688 | TASK_UNINTERRUPTIBLE); |
689 | mutex_unlock(&info->trans_mutex); | 689 | mutex_unlock(&info->trans_mutex); |
690 | |||
691 | atomic_dec(&info->throttles); | ||
692 | wake_up(&info->transaction_throttle); | ||
693 | |||
690 | schedule(); | 694 | schedule(); |
695 | |||
696 | atomic_inc(&info->throttles); | ||
691 | mutex_lock(&info->trans_mutex); | 697 | mutex_lock(&info->trans_mutex); |
692 | finish_wait(&info->transaction_wait, &wait); | 698 | finish_wait(&info->transaction_wait, &wait); |
693 | } | 699 | } |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 25f20ea11f27..db5e212e8445 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -536,7 +536,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
536 | saved_nbytes = inode_get_bytes(inode); | 536 | saved_nbytes = inode_get_bytes(inode); |
537 | /* drop any overlapping extents */ | 537 | /* drop any overlapping extents */ |
538 | ret = btrfs_drop_extents(trans, root, inode, | 538 | ret = btrfs_drop_extents(trans, root, inode, |
539 | start, extent_end, start, &alloc_hint); | 539 | start, extent_end, extent_end, start, &alloc_hint); |
540 | BUG_ON(ret); | 540 | BUG_ON(ret); |
541 | 541 | ||
542 | if (found_type == BTRFS_FILE_EXTENT_REG || | 542 | if (found_type == BTRFS_FILE_EXTENT_REG || |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e0913e469728..5f01dad4b696 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) | |||
125 | return NULL; | 125 | return NULL; |
126 | } | 126 | } |
127 | 127 | ||
128 | static void requeue_list(struct btrfs_pending_bios *pending_bios, | ||
129 | struct bio *head, struct bio *tail) | ||
130 | { | ||
131 | |||
132 | struct bio *old_head; | ||
133 | |||
134 | old_head = pending_bios->head; | ||
135 | pending_bios->head = head; | ||
136 | if (pending_bios->tail) | ||
137 | tail->bi_next = old_head; | ||
138 | else | ||
139 | pending_bios->tail = tail; | ||
140 | } | ||
141 | |||
128 | /* | 142 | /* |
129 | * we try to collect pending bios for a device so we don't get a large | 143 | * we try to collect pending bios for a device so we don't get a large |
130 | * number of procs sending bios down to the same device. This greatly | 144 | * number of procs sending bios down to the same device. This greatly |
@@ -141,10 +155,12 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
141 | struct bio *pending; | 155 | struct bio *pending; |
142 | struct backing_dev_info *bdi; | 156 | struct backing_dev_info *bdi; |
143 | struct btrfs_fs_info *fs_info; | 157 | struct btrfs_fs_info *fs_info; |
158 | struct btrfs_pending_bios *pending_bios; | ||
144 | struct bio *tail; | 159 | struct bio *tail; |
145 | struct bio *cur; | 160 | struct bio *cur; |
146 | int again = 0; | 161 | int again = 0; |
147 | unsigned long num_run = 0; | 162 | unsigned long num_run; |
163 | unsigned long num_sync_run; | ||
148 | unsigned long limit; | 164 | unsigned long limit; |
149 | unsigned long last_waited = 0; | 165 | unsigned long last_waited = 0; |
150 | 166 | ||
@@ -153,20 +169,30 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
153 | limit = btrfs_async_submit_limit(fs_info); | 169 | limit = btrfs_async_submit_limit(fs_info); |
154 | limit = limit * 2 / 3; | 170 | limit = limit * 2 / 3; |
155 | 171 | ||
172 | /* we want to make sure that every time we switch from the sync | ||
173 | * list to the normal list, we unplug | ||
174 | */ | ||
175 | num_sync_run = 0; | ||
176 | |||
156 | loop: | 177 | loop: |
157 | spin_lock(&device->io_lock); | 178 | spin_lock(&device->io_lock); |
179 | num_run = 0; | ||
158 | 180 | ||
159 | loop_lock: | 181 | loop_lock: |
182 | |||
160 | /* take all the bios off the list at once and process them | 183 | /* take all the bios off the list at once and process them |
161 | * later on (without the lock held). But, remember the | 184 | * later on (without the lock held). But, remember the |
162 | * tail and other pointers so the bios can be properly reinserted | 185 | * tail and other pointers so the bios can be properly reinserted |
163 | * into the list if we hit congestion | 186 | * into the list if we hit congestion |
164 | */ | 187 | */ |
165 | pending = device->pending_bios; | 188 | if (device->pending_sync_bios.head) |
166 | tail = device->pending_bio_tail; | 189 | pending_bios = &device->pending_sync_bios; |
190 | else | ||
191 | pending_bios = &device->pending_bios; | ||
192 | |||
193 | pending = pending_bios->head; | ||
194 | tail = pending_bios->tail; | ||
167 | WARN_ON(pending && !tail); | 195 | WARN_ON(pending && !tail); |
168 | device->pending_bios = NULL; | ||
169 | device->pending_bio_tail = NULL; | ||
170 | 196 | ||
171 | /* | 197 | /* |
172 | * if pending was null this time around, no bios need processing | 198 | * if pending was null this time around, no bios need processing |
@@ -176,16 +202,41 @@ loop_lock: | |||
176 | * device->running_pending is used to synchronize with the | 202 | * device->running_pending is used to synchronize with the |
177 | * schedule_bio code. | 203 | * schedule_bio code. |
178 | */ | 204 | */ |
179 | if (pending) { | 205 | if (device->pending_sync_bios.head == NULL && |
180 | again = 1; | 206 | device->pending_bios.head == NULL) { |
181 | device->running_pending = 1; | ||
182 | } else { | ||
183 | again = 0; | 207 | again = 0; |
184 | device->running_pending = 0; | 208 | device->running_pending = 0; |
209 | } else { | ||
210 | again = 1; | ||
211 | device->running_pending = 1; | ||
185 | } | 212 | } |
213 | |||
214 | pending_bios->head = NULL; | ||
215 | pending_bios->tail = NULL; | ||
216 | |||
186 | spin_unlock(&device->io_lock); | 217 | spin_unlock(&device->io_lock); |
187 | 218 | ||
219 | /* | ||
220 | * if we're doing the regular priority list, make sure we unplug | ||
221 | * for any high prio bios we've sent down | ||
222 | */ | ||
223 | if (pending_bios == &device->pending_bios && num_sync_run > 0) { | ||
224 | num_sync_run = 0; | ||
225 | blk_run_backing_dev(bdi, NULL); | ||
226 | } | ||
227 | |||
188 | while (pending) { | 228 | while (pending) { |
229 | |||
230 | rmb(); | ||
231 | if (pending_bios != &device->pending_sync_bios && | ||
232 | device->pending_sync_bios.head && | ||
233 | num_run > 16) { | ||
234 | cond_resched(); | ||
235 | spin_lock(&device->io_lock); | ||
236 | requeue_list(pending_bios, pending, tail); | ||
237 | goto loop_lock; | ||
238 | } | ||
239 | |||
189 | cur = pending; | 240 | cur = pending; |
190 | pending = pending->bi_next; | 241 | pending = pending->bi_next; |
191 | cur->bi_next = NULL; | 242 | cur->bi_next = NULL; |
@@ -196,10 +247,18 @@ loop_lock: | |||
196 | wake_up(&fs_info->async_submit_wait); | 247 | wake_up(&fs_info->async_submit_wait); |
197 | 248 | ||
198 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | 249 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); |
199 | bio_get(cur); | ||
200 | submit_bio(cur->bi_rw, cur); | 250 | submit_bio(cur->bi_rw, cur); |
201 | bio_put(cur); | ||
202 | num_run++; | 251 | num_run++; |
252 | if (bio_sync(cur)) | ||
253 | num_sync_run++; | ||
254 | |||
255 | if (need_resched()) { | ||
256 | if (num_sync_run) { | ||
257 | blk_run_backing_dev(bdi, NULL); | ||
258 | num_sync_run = 0; | ||
259 | } | ||
260 | cond_resched(); | ||
261 | } | ||
203 | 262 | ||
204 | /* | 263 | /* |
205 | * we made progress, there is more work to do and the bdi | 264 | * we made progress, there is more work to do and the bdi |
@@ -208,7 +267,6 @@ loop_lock: | |||
208 | */ | 267 | */ |
209 | if (pending && bdi_write_congested(bdi) && num_run > 16 && | 268 | if (pending && bdi_write_congested(bdi) && num_run > 16 && |
210 | fs_info->fs_devices->open_devices > 1) { | 269 | fs_info->fs_devices->open_devices > 1) { |
211 | struct bio *old_head; | ||
212 | struct io_context *ioc; | 270 | struct io_context *ioc; |
213 | 271 | ||
214 | ioc = current->io_context; | 272 | ioc = current->io_context; |
@@ -233,17 +291,17 @@ loop_lock: | |||
233 | * against it before looping | 291 | * against it before looping |
234 | */ | 292 | */ |
235 | last_waited = ioc->last_waited; | 293 | last_waited = ioc->last_waited; |
294 | if (need_resched()) { | ||
295 | if (num_sync_run) { | ||
296 | blk_run_backing_dev(bdi, NULL); | ||
297 | num_sync_run = 0; | ||
298 | } | ||
299 | cond_resched(); | ||
300 | } | ||
236 | continue; | 301 | continue; |
237 | } | 302 | } |
238 | spin_lock(&device->io_lock); | 303 | spin_lock(&device->io_lock); |
239 | 304 | requeue_list(pending_bios, pending, tail); | |
240 | old_head = device->pending_bios; | ||
241 | device->pending_bios = pending; | ||
242 | if (device->pending_bio_tail) | ||
243 | tail->bi_next = old_head; | ||
244 | else | ||
245 | device->pending_bio_tail = tail; | ||
246 | |||
247 | device->running_pending = 1; | 305 | device->running_pending = 1; |
248 | 306 | ||
249 | spin_unlock(&device->io_lock); | 307 | spin_unlock(&device->io_lock); |
@@ -251,11 +309,18 @@ loop_lock: | |||
251 | goto done; | 309 | goto done; |
252 | } | 310 | } |
253 | } | 311 | } |
312 | |||
313 | if (num_sync_run) { | ||
314 | num_sync_run = 0; | ||
315 | blk_run_backing_dev(bdi, NULL); | ||
316 | } | ||
317 | |||
318 | cond_resched(); | ||
254 | if (again) | 319 | if (again) |
255 | goto loop; | 320 | goto loop; |
256 | 321 | ||
257 | spin_lock(&device->io_lock); | 322 | spin_lock(&device->io_lock); |
258 | if (device->pending_bios) | 323 | if (device->pending_bios.head || device->pending_sync_bios.head) |
259 | goto loop_lock; | 324 | goto loop_lock; |
260 | spin_unlock(&device->io_lock); | 325 | spin_unlock(&device->io_lock); |
261 | 326 | ||
@@ -1478,7 +1543,7 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, | |||
1478 | btrfs_set_device_io_align(leaf, dev_item, device->io_align); | 1543 | btrfs_set_device_io_align(leaf, dev_item, device->io_align); |
1479 | btrfs_set_device_io_width(leaf, dev_item, device->io_width); | 1544 | btrfs_set_device_io_width(leaf, dev_item, device->io_width); |
1480 | btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); | 1545 | btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); |
1481 | btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); | 1546 | btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes); |
1482 | btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); | 1547 | btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); |
1483 | btrfs_mark_buffer_dirty(leaf); | 1548 | btrfs_mark_buffer_dirty(leaf); |
1484 | 1549 | ||
@@ -1875,14 +1940,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
1875 | device->total_bytes = new_size; | 1940 | device->total_bytes = new_size; |
1876 | if (device->writeable) | 1941 | if (device->writeable) |
1877 | device->fs_devices->total_rw_bytes -= diff; | 1942 | device->fs_devices->total_rw_bytes -= diff; |
1878 | ret = btrfs_update_device(trans, device); | ||
1879 | if (ret) { | ||
1880 | unlock_chunks(root); | ||
1881 | btrfs_end_transaction(trans, root); | ||
1882 | goto done; | ||
1883 | } | ||
1884 | WARN_ON(diff > old_total); | ||
1885 | btrfs_set_super_total_bytes(super_copy, old_total - diff); | ||
1886 | unlock_chunks(root); | 1943 | unlock_chunks(root); |
1887 | btrfs_end_transaction(trans, root); | 1944 | btrfs_end_transaction(trans, root); |
1888 | 1945 | ||
@@ -1914,7 +1971,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
1914 | length = btrfs_dev_extent_length(l, dev_extent); | 1971 | length = btrfs_dev_extent_length(l, dev_extent); |
1915 | 1972 | ||
1916 | if (key.offset + length <= new_size) | 1973 | if (key.offset + length <= new_size) |
1917 | goto done; | 1974 | break; |
1918 | 1975 | ||
1919 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); | 1976 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); |
1920 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); | 1977 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); |
@@ -1927,6 +1984,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
1927 | goto done; | 1984 | goto done; |
1928 | } | 1985 | } |
1929 | 1986 | ||
1987 | /* Shrinking succeeded, else we would be at "done". */ | ||
1988 | trans = btrfs_start_transaction(root, 1); | ||
1989 | if (!trans) { | ||
1990 | ret = -ENOMEM; | ||
1991 | goto done; | ||
1992 | } | ||
1993 | lock_chunks(root); | ||
1994 | |||
1995 | device->disk_total_bytes = new_size; | ||
1996 | /* Now btrfs_update_device() will change the on-disk size. */ | ||
1997 | ret = btrfs_update_device(trans, device); | ||
1998 | if (ret) { | ||
1999 | unlock_chunks(root); | ||
2000 | btrfs_end_transaction(trans, root); | ||
2001 | goto done; | ||
2002 | } | ||
2003 | WARN_ON(diff > old_total); | ||
2004 | btrfs_set_super_total_bytes(super_copy, old_total - diff); | ||
2005 | unlock_chunks(root); | ||
2006 | btrfs_end_transaction(trans, root); | ||
1930 | done: | 2007 | done: |
1931 | btrfs_free_path(path); | 2008 | btrfs_free_path(path); |
1932 | return ret; | 2009 | return ret; |
@@ -2497,7 +2574,7 @@ again: | |||
2497 | max_errors = 1; | 2574 | max_errors = 1; |
2498 | } | 2575 | } |
2499 | } | 2576 | } |
2500 | if (multi_ret && rw == WRITE && | 2577 | if (multi_ret && (rw & (1 << BIO_RW)) && |
2501 | stripes_allocated < stripes_required) { | 2578 | stripes_allocated < stripes_required) { |
2502 | stripes_allocated = map->num_stripes; | 2579 | stripes_allocated = map->num_stripes; |
2503 | free_extent_map(em); | 2580 | free_extent_map(em); |
@@ -2762,6 +2839,7 @@ static noinline int schedule_bio(struct btrfs_root *root, | |||
2762 | int rw, struct bio *bio) | 2839 | int rw, struct bio *bio) |
2763 | { | 2840 | { |
2764 | int should_queue = 1; | 2841 | int should_queue = 1; |
2842 | struct btrfs_pending_bios *pending_bios; | ||
2765 | 2843 | ||
2766 | /* don't bother with additional async steps for reads, right now */ | 2844 | /* don't bother with additional async steps for reads, right now */ |
2767 | if (!(rw & (1 << BIO_RW))) { | 2845 | if (!(rw & (1 << BIO_RW))) { |
@@ -2783,13 +2861,17 @@ static noinline int schedule_bio(struct btrfs_root *root, | |||
2783 | bio->bi_rw |= rw; | 2861 | bio->bi_rw |= rw; |
2784 | 2862 | ||
2785 | spin_lock(&device->io_lock); | 2863 | spin_lock(&device->io_lock); |
2864 | if (bio_sync(bio)) | ||
2865 | pending_bios = &device->pending_sync_bios; | ||
2866 | else | ||
2867 | pending_bios = &device->pending_bios; | ||
2786 | 2868 | ||
2787 | if (device->pending_bio_tail) | 2869 | if (pending_bios->tail) |
2788 | device->pending_bio_tail->bi_next = bio; | 2870 | pending_bios->tail->bi_next = bio; |
2789 | 2871 | ||
2790 | device->pending_bio_tail = bio; | 2872 | pending_bios->tail = bio; |
2791 | if (!device->pending_bios) | 2873 | if (!pending_bios->head) |
2792 | device->pending_bios = bio; | 2874 | pending_bios->head = bio; |
2793 | if (device->running_pending) | 2875 | if (device->running_pending) |
2794 | should_queue = 0; | 2876 | should_queue = 0; |
2795 | 2877 | ||
@@ -3006,7 +3088,8 @@ static int fill_device_from_item(struct extent_buffer *leaf, | |||
3006 | unsigned long ptr; | 3088 | unsigned long ptr; |
3007 | 3089 | ||
3008 | device->devid = btrfs_device_id(leaf, dev_item); | 3090 | device->devid = btrfs_device_id(leaf, dev_item); |
3009 | device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); | 3091 | device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item); |
3092 | device->total_bytes = device->disk_total_bytes; | ||
3010 | device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); | 3093 | device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); |
3011 | device->type = btrfs_device_type(leaf, dev_item); | 3094 | device->type = btrfs_device_type(leaf, dev_item); |
3012 | device->io_align = btrfs_device_io_align(leaf, dev_item); | 3095 | device->io_align = btrfs_device_io_align(leaf, dev_item); |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2185de72ff7d..5c3ff6d02fd7 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -23,13 +23,22 @@ | |||
23 | #include "async-thread.h" | 23 | #include "async-thread.h" |
24 | 24 | ||
25 | struct buffer_head; | 25 | struct buffer_head; |
26 | struct btrfs_pending_bios { | ||
27 | struct bio *head; | ||
28 | struct bio *tail; | ||
29 | }; | ||
30 | |||
26 | struct btrfs_device { | 31 | struct btrfs_device { |
27 | struct list_head dev_list; | 32 | struct list_head dev_list; |
28 | struct list_head dev_alloc_list; | 33 | struct list_head dev_alloc_list; |
29 | struct btrfs_fs_devices *fs_devices; | 34 | struct btrfs_fs_devices *fs_devices; |
30 | struct btrfs_root *dev_root; | 35 | struct btrfs_root *dev_root; |
31 | struct bio *pending_bios; | 36 | |
32 | struct bio *pending_bio_tail; | 37 | /* regular prio bios */ |
38 | struct btrfs_pending_bios pending_bios; | ||
39 | /* WRITE_SYNC bios */ | ||
40 | struct btrfs_pending_bios pending_sync_bios; | ||
41 | |||
33 | int running_pending; | 42 | int running_pending; |
34 | u64 generation; | 43 | u64 generation; |
35 | 44 | ||
@@ -52,6 +61,9 @@ struct btrfs_device { | |||
52 | /* size of the device */ | 61 | /* size of the device */ |
53 | u64 total_bytes; | 62 | u64 total_bytes; |
54 | 63 | ||
64 | /* size of the disk */ | ||
65 | u64 disk_total_bytes; | ||
66 | |||
55 | /* bytes used */ | 67 | /* bytes used */ |
56 | u64 bytes_used; | 68 | u64 bytes_used; |
57 | 69 | ||
diff --git a/fs/compat.c b/fs/compat.c index 3f84d5f15889..681ed81e6be0 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -181,22 +181,24 @@ asmlinkage long compat_sys_newstat(char __user * filename, | |||
181 | struct compat_stat __user *statbuf) | 181 | struct compat_stat __user *statbuf) |
182 | { | 182 | { |
183 | struct kstat stat; | 183 | struct kstat stat; |
184 | int error = vfs_stat_fd(AT_FDCWD, filename, &stat); | 184 | int error; |
185 | 185 | ||
186 | if (!error) | 186 | error = vfs_stat(filename, &stat); |
187 | error = cp_compat_stat(&stat, statbuf); | 187 | if (error) |
188 | return error; | 188 | return error; |
189 | return cp_compat_stat(&stat, statbuf); | ||
189 | } | 190 | } |
190 | 191 | ||
191 | asmlinkage long compat_sys_newlstat(char __user * filename, | 192 | asmlinkage long compat_sys_newlstat(char __user * filename, |
192 | struct compat_stat __user *statbuf) | 193 | struct compat_stat __user *statbuf) |
193 | { | 194 | { |
194 | struct kstat stat; | 195 | struct kstat stat; |
195 | int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); | 196 | int error; |
196 | 197 | ||
197 | if (!error) | 198 | error = vfs_lstat(filename, &stat); |
198 | error = cp_compat_stat(&stat, statbuf); | 199 | if (error) |
199 | return error; | 200 | return error; |
201 | return cp_compat_stat(&stat, statbuf); | ||
200 | } | 202 | } |
201 | 203 | ||
202 | #ifndef __ARCH_WANT_STAT64 | 204 | #ifndef __ARCH_WANT_STAT64 |
@@ -204,21 +206,12 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename, | |||
204 | struct compat_stat __user *statbuf, int flag) | 206 | struct compat_stat __user *statbuf, int flag) |
205 | { | 207 | { |
206 | struct kstat stat; | 208 | struct kstat stat; |
207 | int error = -EINVAL; | 209 | int error; |
208 | |||
209 | if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) | ||
210 | goto out; | ||
211 | |||
212 | if (flag & AT_SYMLINK_NOFOLLOW) | ||
213 | error = vfs_lstat_fd(dfd, filename, &stat); | ||
214 | else | ||
215 | error = vfs_stat_fd(dfd, filename, &stat); | ||
216 | |||
217 | if (!error) | ||
218 | error = cp_compat_stat(&stat, statbuf); | ||
219 | 210 | ||
220 | out: | 211 | error = vfs_fstatat(dfd, filename, &stat, flag); |
221 | return error; | 212 | if (error) |
213 | return error; | ||
214 | return cp_compat_stat(&stat, statbuf); | ||
222 | } | 215 | } |
223 | #endif | 216 | #endif |
224 | 217 | ||
@@ -1483,6 +1476,7 @@ int compat_do_execve(char * filename, | |||
1483 | struct linux_binprm *bprm; | 1476 | struct linux_binprm *bprm; |
1484 | struct file *file; | 1477 | struct file *file; |
1485 | struct files_struct *displaced; | 1478 | struct files_struct *displaced; |
1479 | bool clear_in_exec; | ||
1486 | int retval; | 1480 | int retval; |
1487 | 1481 | ||
1488 | retval = unshare_files(&displaced); | 1482 | retval = unshare_files(&displaced); |
@@ -1505,8 +1499,9 @@ int compat_do_execve(char * filename, | |||
1505 | goto out_unlock; | 1499 | goto out_unlock; |
1506 | 1500 | ||
1507 | retval = check_unsafe_exec(bprm); | 1501 | retval = check_unsafe_exec(bprm); |
1508 | if (retval) | 1502 | if (retval < 0) |
1509 | goto out_unlock; | 1503 | goto out_unlock; |
1504 | clear_in_exec = retval; | ||
1510 | 1505 | ||
1511 | file = open_exec(filename); | 1506 | file = open_exec(filename); |
1512 | retval = PTR_ERR(file); | 1507 | retval = PTR_ERR(file); |
@@ -1553,9 +1548,7 @@ int compat_do_execve(char * filename, | |||
1553 | goto out; | 1548 | goto out; |
1554 | 1549 | ||
1555 | /* execve succeeded */ | 1550 | /* execve succeeded */ |
1556 | write_lock(¤t->fs->lock); | ||
1557 | current->fs->in_exec = 0; | 1551 | current->fs->in_exec = 0; |
1558 | write_unlock(¤t->fs->lock); | ||
1559 | current->in_execve = 0; | 1552 | current->in_execve = 0; |
1560 | mutex_unlock(¤t->cred_exec_mutex); | 1553 | mutex_unlock(¤t->cred_exec_mutex); |
1561 | acct_update_integrals(current); | 1554 | acct_update_integrals(current); |
@@ -1575,9 +1568,8 @@ out_file: | |||
1575 | } | 1568 | } |
1576 | 1569 | ||
1577 | out_unmark: | 1570 | out_unmark: |
1578 | write_lock(¤t->fs->lock); | 1571 | if (clear_in_exec) |
1579 | current->fs->in_exec = 0; | 1572 | current->fs->in_exec = 0; |
1580 | write_unlock(¤t->fs->lock); | ||
1581 | 1573 | ||
1582 | out_unlock: | 1574 | out_unlock: |
1583 | current->in_execve = 0; | 1575 | current->in_execve = 0; |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 3e87ce443ea2..b83f6bcfa51a 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -58,7 +58,6 @@ | |||
58 | #include <linux/i2c.h> | 58 | #include <linux/i2c.h> |
59 | #include <linux/i2c-dev.h> | 59 | #include <linux/i2c-dev.h> |
60 | #include <linux/atalk.h> | 60 | #include <linux/atalk.h> |
61 | #include <linux/loop.h> | ||
62 | 61 | ||
63 | #include <net/bluetooth/bluetooth.h> | 62 | #include <net/bluetooth/bluetooth.h> |
64 | #include <net/bluetooth/hci.h> | 63 | #include <net/bluetooth/hci.h> |
@@ -68,6 +67,7 @@ | |||
68 | #include <linux/gigaset_dev.h> | 67 | #include <linux/gigaset_dev.h> |
69 | 68 | ||
70 | #ifdef CONFIG_BLOCK | 69 | #ifdef CONFIG_BLOCK |
70 | #include <linux/loop.h> | ||
71 | #include <scsi/scsi.h> | 71 | #include <scsi/scsi.h> |
72 | #include <scsi/scsi_ioctl.h> | 72 | #include <scsi/scsi_ioctl.h> |
73 | #include <scsi/sg.h> | 73 | #include <scsi/sg.h> |
@@ -2660,6 +2660,8 @@ HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl) | |||
2660 | HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl) | 2660 | HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl) |
2661 | /* block stuff */ | 2661 | /* block stuff */ |
2662 | #ifdef CONFIG_BLOCK | 2662 | #ifdef CONFIG_BLOCK |
2663 | /* loop */ | ||
2664 | IGNORE_IOCTL(LOOP_CLR_FD) | ||
2663 | /* Raw devices */ | 2665 | /* Raw devices */ |
2664 | HANDLE_IOCTL(RAW_SETBIND, raw_ioctl) | 2666 | HANDLE_IOCTL(RAW_SETBIND, raw_ioctl) |
2665 | HANDLE_IOCTL(RAW_GETBIND, raw_ioctl) | 2667 | HANDLE_IOCTL(RAW_GETBIND, raw_ioctl) |
@@ -2728,9 +2730,6 @@ HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans) | |||
2728 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32) | 2730 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32) |
2729 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32) | 2731 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32) |
2730 | 2732 | ||
2731 | /* loop */ | ||
2732 | IGNORE_IOCTL(LOOP_CLR_FD) | ||
2733 | |||
2734 | #ifdef CONFIG_SPARC | 2733 | #ifdef CONFIG_SPARC |
2735 | /* Sparc framebuffers, handled in sbusfb_compat_ioctl() */ | 2734 | /* Sparc framebuffers, handled in sbusfb_compat_ioctl() */ |
2736 | IGNORE_IOCTL(FBIOGTYPE) | 2735 | IGNORE_IOCTL(FBIOGTYPE) |
diff --git a/fs/dcache.c b/fs/dcache.c index 761d30be2683..1fcffebfb44f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -2149,7 +2149,6 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) | |||
2149 | int result; | 2149 | int result; |
2150 | unsigned long seq; | 2150 | unsigned long seq; |
2151 | 2151 | ||
2152 | /* FIXME: This is old behavior, needed? Please check callers. */ | ||
2153 | if (new_dentry == old_dentry) | 2152 | if (new_dentry == old_dentry) |
2154 | return 1; | 2153 | return 1; |
2155 | 2154 | ||
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 8b65f289ee00..b91851f1cda3 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c | |||
@@ -483,15 +483,7 @@ int ecryptfs_encrypt_page(struct page *page) | |||
483 | ecryptfs_inode = page->mapping->host; | 483 | ecryptfs_inode = page->mapping->host; |
484 | crypt_stat = | 484 | crypt_stat = |
485 | &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); | 485 | &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); |
486 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { | 486 | BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)); |
487 | rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page, | ||
488 | 0, PAGE_CACHE_SIZE); | ||
489 | if (rc) | ||
490 | printk(KERN_ERR "%s: Error attempting to copy " | ||
491 | "page at index [%ld]\n", __func__, | ||
492 | page->index); | ||
493 | goto out; | ||
494 | } | ||
495 | enc_extent_page = alloc_page(GFP_USER); | 487 | enc_extent_page = alloc_page(GFP_USER); |
496 | if (!enc_extent_page) { | 488 | if (!enc_extent_page) { |
497 | rc = -ENOMEM; | 489 | rc = -ENOMEM; |
@@ -620,16 +612,7 @@ int ecryptfs_decrypt_page(struct page *page) | |||
620 | ecryptfs_inode = page->mapping->host; | 612 | ecryptfs_inode = page->mapping->host; |
621 | crypt_stat = | 613 | crypt_stat = |
622 | &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); | 614 | &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); |
623 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { | 615 | BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)); |
624 | rc = ecryptfs_read_lower_page_segment(page, page->index, 0, | ||
625 | PAGE_CACHE_SIZE, | ||
626 | ecryptfs_inode); | ||
627 | if (rc) | ||
628 | printk(KERN_ERR "%s: Error attempting to copy " | ||
629 | "page at index [%ld]\n", __func__, | ||
630 | page->index); | ||
631 | goto out; | ||
632 | } | ||
633 | enc_extent_page = alloc_page(GFP_USER); | 616 | enc_extent_page = alloc_page(GFP_USER); |
634 | if (!enc_extent_page) { | 617 | if (!enc_extent_page) { |
635 | rc = -ENOMEM; | 618 | rc = -ENOMEM; |
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 064c5820e4e5..00b30a2d5466 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
@@ -269,6 +269,7 @@ struct ecryptfs_crypt_stat { | |||
269 | #define ECRYPTFS_ENCRYPT_FILENAMES 0x00000800 | 269 | #define ECRYPTFS_ENCRYPT_FILENAMES 0x00000800 |
270 | #define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00001000 | 270 | #define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00001000 |
271 | #define ECRYPTFS_ENCFN_USE_FEK 0x00002000 | 271 | #define ECRYPTFS_ENCFN_USE_FEK 0x00002000 |
272 | #define ECRYPTFS_UNLINK_SIGS 0x00004000 | ||
272 | u32 flags; | 273 | u32 flags; |
273 | unsigned int file_version; | 274 | unsigned int file_version; |
274 | size_t iv_bytes; | 275 | size_t iv_bytes; |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 55b3145b8072..2f0945d63297 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -379,9 +379,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
379 | goto out_d_drop; | 379 | goto out_d_drop; |
380 | } | 380 | } |
381 | lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); | 381 | lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); |
382 | mutex_lock(&lower_dir_dentry->d_inode->i_mutex); | ||
382 | lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, | 383 | lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, |
383 | lower_dir_dentry, | 384 | lower_dir_dentry, |
384 | ecryptfs_dentry->d_name.len); | 385 | ecryptfs_dentry->d_name.len); |
386 | mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); | ||
385 | if (IS_ERR(lower_dentry)) { | 387 | if (IS_ERR(lower_dentry)) { |
386 | rc = PTR_ERR(lower_dentry); | 388 | rc = PTR_ERR(lower_dentry); |
387 | printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " | 389 | printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " |
@@ -406,9 +408,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
406 | "filename; rc = [%d]\n", __func__, rc); | 408 | "filename; rc = [%d]\n", __func__, rc); |
407 | goto out_d_drop; | 409 | goto out_d_drop; |
408 | } | 410 | } |
411 | mutex_lock(&lower_dir_dentry->d_inode->i_mutex); | ||
409 | lower_dentry = lookup_one_len(encrypted_and_encoded_name, | 412 | lower_dentry = lookup_one_len(encrypted_and_encoded_name, |
410 | lower_dir_dentry, | 413 | lower_dir_dentry, |
411 | encrypted_and_encoded_name_size - 1); | 414 | encrypted_and_encoded_name_size - 1); |
415 | mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); | ||
412 | if (IS_ERR(lower_dentry)) { | 416 | if (IS_ERR(lower_dentry)) { |
413 | rc = PTR_ERR(lower_dentry); | 417 | rc = PTR_ERR(lower_dentry); |
414 | printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " | 418 | printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " |
@@ -636,8 +640,9 @@ static int | |||
636 | ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) | 640 | ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) |
637 | { | 641 | { |
638 | char *lower_buf; | 642 | char *lower_buf; |
643 | size_t lower_bufsiz; | ||
639 | struct dentry *lower_dentry; | 644 | struct dentry *lower_dentry; |
640 | struct ecryptfs_crypt_stat *crypt_stat; | 645 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat; |
641 | char *plaintext_name; | 646 | char *plaintext_name; |
642 | size_t plaintext_name_size; | 647 | size_t plaintext_name_size; |
643 | mm_segment_t old_fs; | 648 | mm_segment_t old_fs; |
@@ -648,12 +653,21 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) | |||
648 | rc = -EINVAL; | 653 | rc = -EINVAL; |
649 | goto out; | 654 | goto out; |
650 | } | 655 | } |
651 | crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; | 656 | mount_crypt_stat = &ecryptfs_superblock_to_private( |
657 | dentry->d_sb)->mount_crypt_stat; | ||
658 | /* | ||
659 | * If the lower filename is encrypted, it will result in a significantly | ||
660 | * longer name. If needed, truncate the name after decode and decrypt. | ||
661 | */ | ||
662 | if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) | ||
663 | lower_bufsiz = PATH_MAX; | ||
664 | else | ||
665 | lower_bufsiz = bufsiz; | ||
652 | /* Released in this function */ | 666 | /* Released in this function */ |
653 | lower_buf = kmalloc(bufsiz, GFP_KERNEL); | 667 | lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL); |
654 | if (lower_buf == NULL) { | 668 | if (lower_buf == NULL) { |
655 | printk(KERN_ERR "%s: Out of memory whilst attempting to " | 669 | printk(KERN_ERR "%s: Out of memory whilst attempting to " |
656 | "kmalloc [%d] bytes\n", __func__, bufsiz); | 670 | "kmalloc [%zd] bytes\n", __func__, lower_bufsiz); |
657 | rc = -ENOMEM; | 671 | rc = -ENOMEM; |
658 | goto out; | 672 | goto out; |
659 | } | 673 | } |
@@ -661,7 +675,7 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) | |||
661 | set_fs(get_ds()); | 675 | set_fs(get_ds()); |
662 | rc = lower_dentry->d_inode->i_op->readlink(lower_dentry, | 676 | rc = lower_dentry->d_inode->i_op->readlink(lower_dentry, |
663 | (char __user *)lower_buf, | 677 | (char __user *)lower_buf, |
664 | bufsiz); | 678 | lower_bufsiz); |
665 | set_fs(old_fs); | 679 | set_fs(old_fs); |
666 | if (rc >= 0) { | 680 | if (rc >= 0) { |
667 | rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name, | 681 | rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name, |
@@ -674,7 +688,9 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) | |||
674 | rc); | 688 | rc); |
675 | goto out_free_lower_buf; | 689 | goto out_free_lower_buf; |
676 | } | 690 | } |
677 | rc = copy_to_user(buf, plaintext_name, plaintext_name_size); | 691 | /* Check for bufsiz <= 0 done in sys_readlinkat() */ |
692 | rc = copy_to_user(buf, plaintext_name, | ||
693 | min((size_t) bufsiz, plaintext_name_size)); | ||
678 | if (rc) | 694 | if (rc) |
679 | rc = -EFAULT; | 695 | rc = -EFAULT; |
680 | else | 696 | else |
@@ -814,6 +830,13 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) | |||
814 | size_t num_zeros = (PAGE_CACHE_SIZE | 830 | size_t num_zeros = (PAGE_CACHE_SIZE |
815 | - (new_length & ~PAGE_CACHE_MASK)); | 831 | - (new_length & ~PAGE_CACHE_MASK)); |
816 | 832 | ||
833 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { | ||
834 | rc = vmtruncate(inode, new_length); | ||
835 | if (rc) | ||
836 | goto out_free; | ||
837 | rc = vmtruncate(lower_dentry->d_inode, new_length); | ||
838 | goto out_free; | ||
839 | } | ||
817 | if (num_zeros) { | 840 | if (num_zeros) { |
818 | char *zeros_virt; | 841 | char *zeros_virt; |
819 | 842 | ||
@@ -915,8 +938,6 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) | |||
915 | } | 938 | } |
916 | rc = 0; | 939 | rc = 0; |
917 | crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); | 940 | crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); |
918 | mutex_unlock(&crypt_stat->cs_mutex); | ||
919 | goto out; | ||
920 | } | 941 | } |
921 | } | 942 | } |
922 | mutex_unlock(&crypt_stat->cs_mutex); | 943 | mutex_unlock(&crypt_stat->cs_mutex); |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index aed56c25539b..ccabd5faa04d 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -190,14 +190,14 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, | |||
190 | init_special_inode(inode, lower_inode->i_mode, | 190 | init_special_inode(inode, lower_inode->i_mode, |
191 | lower_inode->i_rdev); | 191 | lower_inode->i_rdev); |
192 | dentry->d_op = &ecryptfs_dops; | 192 | dentry->d_op = &ecryptfs_dops; |
193 | if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD) | ||
194 | d_add(dentry, inode); | ||
195 | else | ||
196 | d_instantiate(dentry, inode); | ||
197 | fsstack_copy_attr_all(inode, lower_inode, NULL); | 193 | fsstack_copy_attr_all(inode, lower_inode, NULL); |
198 | /* This size will be overwritten for real files w/ headers and | 194 | /* This size will be overwritten for real files w/ headers and |
199 | * other metadata */ | 195 | * other metadata */ |
200 | fsstack_copy_inode_size(inode, lower_inode); | 196 | fsstack_copy_inode_size(inode, lower_inode); |
197 | if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD) | ||
198 | d_add(dentry, inode); | ||
199 | else | ||
200 | d_instantiate(dentry, inode); | ||
201 | out: | 201 | out: |
202 | return rc; | 202 | return rc; |
203 | } | 203 | } |
@@ -208,7 +208,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, | |||
208 | ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, | 208 | ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, |
209 | ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig, | 209 | ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig, |
210 | ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes, | 210 | ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes, |
211 | ecryptfs_opt_err }; | 211 | ecryptfs_opt_unlink_sigs, ecryptfs_opt_err }; |
212 | 212 | ||
213 | static const match_table_t tokens = { | 213 | static const match_table_t tokens = { |
214 | {ecryptfs_opt_sig, "sig=%s"}, | 214 | {ecryptfs_opt_sig, "sig=%s"}, |
@@ -222,6 +222,7 @@ static const match_table_t tokens = { | |||
222 | {ecryptfs_opt_fnek_sig, "ecryptfs_fnek_sig=%s"}, | 222 | {ecryptfs_opt_fnek_sig, "ecryptfs_fnek_sig=%s"}, |
223 | {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"}, | 223 | {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"}, |
224 | {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"}, | 224 | {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"}, |
225 | {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"}, | ||
225 | {ecryptfs_opt_err, NULL} | 226 | {ecryptfs_opt_err, NULL} |
226 | }; | 227 | }; |
227 | 228 | ||
@@ -402,6 +403,9 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) | |||
402 | fn_cipher_key_bytes; | 403 | fn_cipher_key_bytes; |
403 | fn_cipher_key_bytes_set = 1; | 404 | fn_cipher_key_bytes_set = 1; |
404 | break; | 405 | break; |
406 | case ecryptfs_opt_unlink_sigs: | ||
407 | mount_crypt_stat->flags |= ECRYPTFS_UNLINK_SIGS; | ||
408 | break; | ||
405 | case ecryptfs_opt_err: | 409 | case ecryptfs_opt_err: |
406 | default: | 410 | default: |
407 | printk(KERN_WARNING | 411 | printk(KERN_WARNING |
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 295e7fa56755..f1c17e87c5fb 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c | |||
@@ -133,45 +133,6 @@ out: | |||
133 | return rc; | 133 | return rc; |
134 | } | 134 | } |
135 | 135 | ||
136 | static int | ||
137 | ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type, | ||
138 | struct ecryptfs_msg_ctx **msg_ctx); | ||
139 | |||
140 | /** | ||
141 | * ecryptfs_send_raw_message | ||
142 | * @msg_type: Message type | ||
143 | * @daemon: Daemon struct for recipient of message | ||
144 | * | ||
145 | * A raw message is one that does not include an ecryptfs_message | ||
146 | * struct. It simply has a type. | ||
147 | * | ||
148 | * Must be called with ecryptfs_daemon_hash_mux held. | ||
149 | * | ||
150 | * Returns zero on success; non-zero otherwise | ||
151 | */ | ||
152 | static int ecryptfs_send_raw_message(u8 msg_type, | ||
153 | struct ecryptfs_daemon *daemon) | ||
154 | { | ||
155 | struct ecryptfs_msg_ctx *msg_ctx; | ||
156 | int rc; | ||
157 | |||
158 | rc = ecryptfs_send_message_locked(NULL, 0, msg_type, &msg_ctx); | ||
159 | if (rc) { | ||
160 | printk(KERN_ERR "%s: Error whilst attempting to send " | ||
161 | "message to ecryptfsd; rc = [%d]\n", __func__, rc); | ||
162 | goto out; | ||
163 | } | ||
164 | /* Raw messages are logically context-free (e.g., no | ||
165 | * reply is expected), so we set the state of the | ||
166 | * ecryptfs_msg_ctx object to indicate that it should | ||
167 | * be freed as soon as the message is sent. */ | ||
168 | mutex_lock(&msg_ctx->mux); | ||
169 | msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY; | ||
170 | mutex_unlock(&msg_ctx->mux); | ||
171 | out: | ||
172 | return rc; | ||
173 | } | ||
174 | |||
175 | /** | 136 | /** |
176 | * ecryptfs_spawn_daemon - Create and initialize a new daemon struct | 137 | * ecryptfs_spawn_daemon - Create and initialize a new daemon struct |
177 | * @daemon: Pointer to set to newly allocated daemon struct | 138 | * @daemon: Pointer to set to newly allocated daemon struct |
@@ -212,49 +173,6 @@ out: | |||
212 | } | 173 | } |
213 | 174 | ||
214 | /** | 175 | /** |
215 | * ecryptfs_process_helo | ||
216 | * @euid: The user ID owner of the message | ||
217 | * @user_ns: The namespace in which @euid applies | ||
218 | * @pid: The process ID for the userspace program that sent the | ||
219 | * message | ||
220 | * | ||
221 | * Adds the euid and pid values to the daemon euid hash. If an euid | ||
222 | * already has a daemon pid registered, the daemon will be | ||
223 | * unregistered before the new daemon is put into the hash list. | ||
224 | * Returns zero after adding a new daemon to the hash list; | ||
225 | * non-zero otherwise. | ||
226 | */ | ||
227 | int ecryptfs_process_helo(uid_t euid, struct user_namespace *user_ns, | ||
228 | struct pid *pid) | ||
229 | { | ||
230 | struct ecryptfs_daemon *new_daemon; | ||
231 | struct ecryptfs_daemon *old_daemon; | ||
232 | int rc; | ||
233 | |||
234 | mutex_lock(&ecryptfs_daemon_hash_mux); | ||
235 | rc = ecryptfs_find_daemon_by_euid(&old_daemon, euid, user_ns); | ||
236 | if (rc != 0) { | ||
237 | printk(KERN_WARNING "Received request from user [%d] " | ||
238 | "to register daemon [0x%p]; unregistering daemon " | ||
239 | "[0x%p]\n", euid, pid, old_daemon->pid); | ||
240 | rc = ecryptfs_send_raw_message(ECRYPTFS_MSG_QUIT, old_daemon); | ||
241 | if (rc) | ||
242 | printk(KERN_WARNING "Failed to send QUIT " | ||
243 | "message to daemon [0x%p]; rc = [%d]\n", | ||
244 | old_daemon->pid, rc); | ||
245 | hlist_del(&old_daemon->euid_chain); | ||
246 | kfree(old_daemon); | ||
247 | } | ||
248 | rc = ecryptfs_spawn_daemon(&new_daemon, euid, user_ns, pid); | ||
249 | if (rc) | ||
250 | printk(KERN_ERR "%s: The gods are displeased with this attempt " | ||
251 | "to create a new daemon object for euid [%d]; pid " | ||
252 | "[0x%p]; rc = [%d]\n", __func__, euid, pid, rc); | ||
253 | mutex_unlock(&ecryptfs_daemon_hash_mux); | ||
254 | return rc; | ||
255 | } | ||
256 | |||
257 | /** | ||
258 | * ecryptfs_exorcise_daemon - Destroy the daemon struct | 176 | * ecryptfs_exorcise_daemon - Destroy the daemon struct |
259 | * | 177 | * |
260 | * Must be called ceremoniously while in possession of | 178 | * Must be called ceremoniously while in possession of |
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index a67fea655f49..4ec8f61ccf5a 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c | |||
@@ -193,26 +193,20 @@ int ecryptfs_send_miscdev(char *data, size_t data_size, | |||
193 | int rc = 0; | 193 | int rc = 0; |
194 | 194 | ||
195 | mutex_lock(&msg_ctx->mux); | 195 | mutex_lock(&msg_ctx->mux); |
196 | if (data) { | 196 | msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size), |
197 | msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size), | 197 | GFP_KERNEL); |
198 | GFP_KERNEL); | 198 | if (!msg_ctx->msg) { |
199 | if (!msg_ctx->msg) { | 199 | rc = -ENOMEM; |
200 | rc = -ENOMEM; | 200 | printk(KERN_ERR "%s: Out of memory whilst attempting " |
201 | printk(KERN_ERR "%s: Out of memory whilst attempting " | 201 | "to kmalloc(%zd, GFP_KERNEL)\n", __func__, |
202 | "to kmalloc(%zd, GFP_KERNEL)\n", __func__, | 202 | (sizeof(*msg_ctx->msg) + data_size)); |
203 | (sizeof(*msg_ctx->msg) + data_size)); | 203 | goto out_unlock; |
204 | goto out_unlock; | 204 | } |
205 | } | ||
206 | } else | ||
207 | msg_ctx->msg = NULL; | ||
208 | msg_ctx->msg->index = msg_ctx->index; | 205 | msg_ctx->msg->index = msg_ctx->index; |
209 | msg_ctx->msg->data_len = data_size; | 206 | msg_ctx->msg->data_len = data_size; |
210 | msg_ctx->type = msg_type; | 207 | msg_ctx->type = msg_type; |
211 | if (data) { | 208 | memcpy(msg_ctx->msg->data, data, data_size); |
212 | memcpy(msg_ctx->msg->data, data, data_size); | 209 | msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size); |
213 | msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size); | ||
214 | } else | ||
215 | msg_ctx->msg_size = 0; | ||
216 | mutex_lock(&daemon->mux); | 210 | mutex_lock(&daemon->mux); |
217 | list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue); | 211 | list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue); |
218 | daemon->num_queued_msg_ctx++; | 212 | daemon->num_queued_msg_ctx++; |
@@ -418,18 +412,13 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, | |||
418 | 412 | ||
419 | if (count == 0) | 413 | if (count == 0) |
420 | goto out; | 414 | goto out; |
421 | data = kmalloc(count, GFP_KERNEL); | 415 | |
422 | if (!data) { | 416 | data = memdup_user(buf, count); |
423 | printk(KERN_ERR "%s: Out of memory whilst attempting to " | 417 | if (IS_ERR(data)) { |
424 | "kmalloc([%zd], GFP_KERNEL)\n", __func__, count); | 418 | printk(KERN_ERR "%s: memdup_user returned error [%ld]\n", |
419 | __func__, PTR_ERR(data)); | ||
425 | goto out; | 420 | goto out; |
426 | } | 421 | } |
427 | rc = copy_from_user(data, buf, count); | ||
428 | if (rc) { | ||
429 | printk(KERN_ERR "%s: copy_from_user returned error [%d]\n", | ||
430 | __func__, rc); | ||
431 | goto out_free; | ||
432 | } | ||
433 | sz = count; | 422 | sz = count; |
434 | i = 0; | 423 | i = 0; |
435 | switch (data[i++]) { | 424 | switch (data[i++]) { |
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 46cec2b69796..5c6bab9786e3 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c | |||
@@ -449,6 +449,7 @@ int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode) | |||
449 | struct ecryptfs_crypt_stat *crypt_stat; | 449 | struct ecryptfs_crypt_stat *crypt_stat; |
450 | 450 | ||
451 | crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; | 451 | crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; |
452 | BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)); | ||
452 | if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) | 453 | if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) |
453 | return ecryptfs_write_inode_size_to_xattr(ecryptfs_inode); | 454 | return ecryptfs_write_inode_size_to_xattr(ecryptfs_inode); |
454 | else | 455 | else |
@@ -490,6 +491,16 @@ static int ecryptfs_write_end(struct file *file, | |||
490 | ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); | 491 | ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); |
491 | ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" | 492 | ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" |
492 | "(page w/ index = [0x%.16x], to = [%d])\n", index, to); | 493 | "(page w/ index = [0x%.16x], to = [%d])\n", index, to); |
494 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { | ||
495 | rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page, 0, | ||
496 | to); | ||
497 | if (!rc) { | ||
498 | rc = copied; | ||
499 | fsstack_copy_inode_size(ecryptfs_inode, | ||
500 | ecryptfs_inode_to_lower(ecryptfs_inode)); | ||
501 | } | ||
502 | goto out; | ||
503 | } | ||
493 | /* Fills in zeros if 'to' goes beyond inode size */ | 504 | /* Fills in zeros if 'to' goes beyond inode size */ |
494 | rc = fill_zeros_to_end_of_page(page, to); | 505 | rc = fill_zeros_to_end_of_page(page, to); |
495 | if (rc) { | 506 | if (rc) { |
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 75c2ea9fee35..a137c6ea2fee 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c | |||
@@ -117,13 +117,15 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, | |||
117 | size_t size) | 117 | size_t size) |
118 | { | 118 | { |
119 | struct page *ecryptfs_page; | 119 | struct page *ecryptfs_page; |
120 | struct ecryptfs_crypt_stat *crypt_stat; | ||
121 | struct inode *ecryptfs_inode = ecryptfs_file->f_dentry->d_inode; | ||
120 | char *ecryptfs_page_virt; | 122 | char *ecryptfs_page_virt; |
121 | loff_t ecryptfs_file_size = | 123 | loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode); |
122 | i_size_read(ecryptfs_file->f_dentry->d_inode); | ||
123 | loff_t data_offset = 0; | 124 | loff_t data_offset = 0; |
124 | loff_t pos; | 125 | loff_t pos; |
125 | int rc = 0; | 126 | int rc = 0; |
126 | 127 | ||
128 | crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; | ||
127 | /* | 129 | /* |
128 | * if we are writing beyond current size, then start pos | 130 | * if we are writing beyond current size, then start pos |
129 | * at the current size - we'll fill in zeros from there. | 131 | * at the current size - we'll fill in zeros from there. |
@@ -184,7 +186,13 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, | |||
184 | flush_dcache_page(ecryptfs_page); | 186 | flush_dcache_page(ecryptfs_page); |
185 | SetPageUptodate(ecryptfs_page); | 187 | SetPageUptodate(ecryptfs_page); |
186 | unlock_page(ecryptfs_page); | 188 | unlock_page(ecryptfs_page); |
187 | rc = ecryptfs_encrypt_page(ecryptfs_page); | 189 | if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) |
190 | rc = ecryptfs_encrypt_page(ecryptfs_page); | ||
191 | else | ||
192 | rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, | ||
193 | ecryptfs_page, | ||
194 | start_offset_in_page, | ||
195 | data_offset); | ||
188 | page_cache_release(ecryptfs_page); | 196 | page_cache_release(ecryptfs_page); |
189 | if (rc) { | 197 | if (rc) { |
190 | printk(KERN_ERR "%s: Error encrypting " | 198 | printk(KERN_ERR "%s: Error encrypting " |
@@ -194,14 +202,16 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, | |||
194 | pos += num_bytes; | 202 | pos += num_bytes; |
195 | } | 203 | } |
196 | if ((offset + size) > ecryptfs_file_size) { | 204 | if ((offset + size) > ecryptfs_file_size) { |
197 | i_size_write(ecryptfs_file->f_dentry->d_inode, (offset + size)); | 205 | i_size_write(ecryptfs_inode, (offset + size)); |
198 | rc = ecryptfs_write_inode_size_to_metadata( | 206 | if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) { |
199 | ecryptfs_file->f_dentry->d_inode); | 207 | rc = ecryptfs_write_inode_size_to_metadata( |
200 | if (rc) { | 208 | ecryptfs_inode); |
201 | printk(KERN_ERR "Problem with " | 209 | if (rc) { |
202 | "ecryptfs_write_inode_size_to_metadata; " | 210 | printk(KERN_ERR "Problem with " |
203 | "rc = [%d]\n", rc); | 211 | "ecryptfs_write_inode_size_to_metadata; " |
204 | goto out; | 212 | "rc = [%d]\n", rc); |
213 | goto out; | ||
214 | } | ||
205 | } | 215 | } |
206 | } | 216 | } |
207 | out: | 217 | out: |
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index c27ac2b358a1..fa4c7e7d15d9 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c | |||
@@ -170,7 +170,10 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
170 | list_for_each_entry(walker, | 170 | list_for_each_entry(walker, |
171 | &mount_crypt_stat->global_auth_tok_list, | 171 | &mount_crypt_stat->global_auth_tok_list, |
172 | mount_crypt_stat_list) { | 172 | mount_crypt_stat_list) { |
173 | seq_printf(m, ",ecryptfs_sig=%s", walker->sig); | 173 | if (walker->flags & ECRYPTFS_AUTH_TOK_FNEK) |
174 | seq_printf(m, ",ecryptfs_fnek_sig=%s", walker->sig); | ||
175 | else | ||
176 | seq_printf(m, ",ecryptfs_sig=%s", walker->sig); | ||
174 | } | 177 | } |
175 | mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); | 178 | mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); |
176 | 179 | ||
@@ -186,6 +189,8 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
186 | seq_printf(m, ",ecryptfs_xattr_metadata"); | 189 | seq_printf(m, ",ecryptfs_xattr_metadata"); |
187 | if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) | 190 | if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) |
188 | seq_printf(m, ",ecryptfs_encrypted_view"); | 191 | seq_printf(m, ",ecryptfs_encrypted_view"); |
192 | if (mount_crypt_stat->flags & ECRYPTFS_UNLINK_SIGS) | ||
193 | seq_printf(m, ",ecryptfs_unlink_sigs"); | ||
189 | 194 | ||
190 | return 0; | 195 | return 0; |
191 | } | 196 | } |
@@ -1060,7 +1060,6 @@ EXPORT_SYMBOL(install_exec_creds); | |||
1060 | int check_unsafe_exec(struct linux_binprm *bprm) | 1060 | int check_unsafe_exec(struct linux_binprm *bprm) |
1061 | { | 1061 | { |
1062 | struct task_struct *p = current, *t; | 1062 | struct task_struct *p = current, *t; |
1063 | unsigned long flags; | ||
1064 | unsigned n_fs; | 1063 | unsigned n_fs; |
1065 | int res = 0; | 1064 | int res = 0; |
1066 | 1065 | ||
@@ -1068,21 +1067,22 @@ int check_unsafe_exec(struct linux_binprm *bprm) | |||
1068 | 1067 | ||
1069 | n_fs = 1; | 1068 | n_fs = 1; |
1070 | write_lock(&p->fs->lock); | 1069 | write_lock(&p->fs->lock); |
1071 | lock_task_sighand(p, &flags); | 1070 | rcu_read_lock(); |
1072 | for (t = next_thread(p); t != p; t = next_thread(t)) { | 1071 | for (t = next_thread(p); t != p; t = next_thread(t)) { |
1073 | if (t->fs == p->fs) | 1072 | if (t->fs == p->fs) |
1074 | n_fs++; | 1073 | n_fs++; |
1075 | } | 1074 | } |
1075 | rcu_read_unlock(); | ||
1076 | 1076 | ||
1077 | if (p->fs->users > n_fs) { | 1077 | if (p->fs->users > n_fs) { |
1078 | bprm->unsafe |= LSM_UNSAFE_SHARE; | 1078 | bprm->unsafe |= LSM_UNSAFE_SHARE; |
1079 | } else { | 1079 | } else { |
1080 | if (p->fs->in_exec) | 1080 | res = -EAGAIN; |
1081 | res = -EAGAIN; | 1081 | if (!p->fs->in_exec) { |
1082 | p->fs->in_exec = 1; | 1082 | p->fs->in_exec = 1; |
1083 | res = 1; | ||
1084 | } | ||
1083 | } | 1085 | } |
1084 | |||
1085 | unlock_task_sighand(p, &flags); | ||
1086 | write_unlock(&p->fs->lock); | 1086 | write_unlock(&p->fs->lock); |
1087 | 1087 | ||
1088 | return res; | 1088 | return res; |
@@ -1284,6 +1284,7 @@ int do_execve(char * filename, | |||
1284 | struct linux_binprm *bprm; | 1284 | struct linux_binprm *bprm; |
1285 | struct file *file; | 1285 | struct file *file; |
1286 | struct files_struct *displaced; | 1286 | struct files_struct *displaced; |
1287 | bool clear_in_exec; | ||
1287 | int retval; | 1288 | int retval; |
1288 | 1289 | ||
1289 | retval = unshare_files(&displaced); | 1290 | retval = unshare_files(&displaced); |
@@ -1306,8 +1307,9 @@ int do_execve(char * filename, | |||
1306 | goto out_unlock; | 1307 | goto out_unlock; |
1307 | 1308 | ||
1308 | retval = check_unsafe_exec(bprm); | 1309 | retval = check_unsafe_exec(bprm); |
1309 | if (retval) | 1310 | if (retval < 0) |
1310 | goto out_unlock; | 1311 | goto out_unlock; |
1312 | clear_in_exec = retval; | ||
1311 | 1313 | ||
1312 | file = open_exec(filename); | 1314 | file = open_exec(filename); |
1313 | retval = PTR_ERR(file); | 1315 | retval = PTR_ERR(file); |
@@ -1355,9 +1357,7 @@ int do_execve(char * filename, | |||
1355 | goto out; | 1357 | goto out; |
1356 | 1358 | ||
1357 | /* execve succeeded */ | 1359 | /* execve succeeded */ |
1358 | write_lock(¤t->fs->lock); | ||
1359 | current->fs->in_exec = 0; | 1360 | current->fs->in_exec = 0; |
1360 | write_unlock(¤t->fs->lock); | ||
1361 | current->in_execve = 0; | 1361 | current->in_execve = 0; |
1362 | mutex_unlock(¤t->cred_exec_mutex); | 1362 | mutex_unlock(¤t->cred_exec_mutex); |
1363 | acct_update_integrals(current); | 1363 | acct_update_integrals(current); |
@@ -1377,9 +1377,8 @@ out_file: | |||
1377 | } | 1377 | } |
1378 | 1378 | ||
1379 | out_unmark: | 1379 | out_unmark: |
1380 | write_lock(¤t->fs->lock); | 1380 | if (clear_in_exec) |
1381 | current->fs->in_exec = 0; | 1381 | current->fs->in_exec = 0; |
1382 | write_unlock(¤t->fs->lock); | ||
1383 | 1382 | ||
1384 | out_unlock: | 1383 | out_unlock: |
1385 | current->in_execve = 0; | 1384 | current->in_execve = 0; |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index f983225266dc..5c4afe652245 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -1395,8 +1395,10 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, | |||
1395 | blk++; | 1395 | blk++; |
1396 | } | 1396 | } |
1397 | out: | 1397 | out: |
1398 | if (len == towrite) | 1398 | if (len == towrite) { |
1399 | mutex_unlock(&inode->i_mutex); | ||
1399 | return err; | 1400 | return err; |
1401 | } | ||
1400 | if (inode->i_size < off+len-towrite) | 1402 | if (inode->i_size < off+len-towrite) |
1401 | i_size_write(inode, off+len-towrite); | 1403 | i_size_write(inode, off+len-towrite); |
1402 | inode->i_version++; | 1404 | inode->i_version++; |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 2a1cb0979768..e40332158340 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -326,11 +326,14 @@ ext4_ext_max_entries(struct inode *inode, int depth) | |||
326 | 326 | ||
327 | static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | 327 | static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) |
328 | { | 328 | { |
329 | ext4_fsblk_t block = ext_pblock(ext); | 329 | ext4_fsblk_t block = ext_pblock(ext), valid_block; |
330 | int len = ext4_ext_get_actual_len(ext); | 330 | int len = ext4_ext_get_actual_len(ext); |
331 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | 331 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; |
332 | if (unlikely(block < le32_to_cpu(es->s_first_data_block) || | 332 | |
333 | ((block + len) > ext4_blocks_count(es)))) | 333 | valid_block = le32_to_cpu(es->s_first_data_block) + |
334 | EXT4_SB(inode->i_sb)->s_gdb_count; | ||
335 | if (unlikely(block <= valid_block || | ||
336 | ((block + len) > ext4_blocks_count(es)))) | ||
334 | return 0; | 337 | return 0; |
335 | else | 338 | else |
336 | return 1; | 339 | return 1; |
@@ -339,10 +342,13 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | |||
339 | static int ext4_valid_extent_idx(struct inode *inode, | 342 | static int ext4_valid_extent_idx(struct inode *inode, |
340 | struct ext4_extent_idx *ext_idx) | 343 | struct ext4_extent_idx *ext_idx) |
341 | { | 344 | { |
342 | ext4_fsblk_t block = idx_pblock(ext_idx); | 345 | ext4_fsblk_t block = idx_pblock(ext_idx), valid_block; |
343 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | 346 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; |
344 | if (unlikely(block < le32_to_cpu(es->s_first_data_block) || | 347 | |
345 | (block >= ext4_blocks_count(es)))) | 348 | valid_block = le32_to_cpu(es->s_first_data_block) + |
349 | EXT4_SB(inode->i_sb)->s_gdb_count; | ||
350 | if (unlikely(block <= valid_block || | ||
351 | (block >= ext4_blocks_count(es)))) | ||
346 | return 0; | 352 | return 0; |
347 | else | 353 | else |
348 | return 1; | 354 | return 1; |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 47b84e8df568..f18e0a08a6b5 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -585,6 +585,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
585 | fallback: | 585 | fallback: |
586 | ngroups = sbi->s_groups_count; | 586 | ngroups = sbi->s_groups_count; |
587 | avefreei = freei / ngroups; | 587 | avefreei = freei / ngroups; |
588 | fallback_retry: | ||
588 | parent_group = EXT4_I(parent)->i_block_group; | 589 | parent_group = EXT4_I(parent)->i_block_group; |
589 | for (i = 0; i < ngroups; i++) { | 590 | for (i = 0; i < ngroups; i++) { |
590 | grp = (parent_group + i) % ngroups; | 591 | grp = (parent_group + i) % ngroups; |
@@ -602,7 +603,7 @@ fallback: | |||
602 | * filesystems the above test can fail to find any blockgroups | 603 | * filesystems the above test can fail to find any blockgroups |
603 | */ | 604 | */ |
604 | avefreei = 0; | 605 | avefreei = 0; |
605 | goto fallback; | 606 | goto fallback_retry; |
606 | } | 607 | } |
607 | 608 | ||
608 | return -1; | 609 | return -1; |
@@ -831,11 +832,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) | |||
831 | ret2 = find_group_flex(sb, dir, &group); | 832 | ret2 = find_group_flex(sb, dir, &group); |
832 | if (ret2 == -1) { | 833 | if (ret2 == -1) { |
833 | ret2 = find_group_other(sb, dir, &group, mode); | 834 | ret2 = find_group_other(sb, dir, &group, mode); |
834 | if (ret2 == 0 && once) | 835 | if (ret2 == 0 && once) { |
835 | once = 0; | 836 | once = 0; |
836 | printk(KERN_NOTICE "ext4: find_group_flex " | 837 | printk(KERN_NOTICE "ext4: find_group_flex " |
837 | "failed, fallback succeeded dir %lu\n", | 838 | "failed, fallback succeeded dir %lu\n", |
838 | dir->i_ino); | 839 | dir->i_ino); |
840 | } | ||
839 | } | 841 | } |
840 | goto got_group; | 842 | goto got_group; |
841 | } | 843 | } |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c6bd6ced3bb7..e91f978c7f12 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -4357,11 +4357,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4357 | ei->i_flags = le32_to_cpu(raw_inode->i_flags); | 4357 | ei->i_flags = le32_to_cpu(raw_inode->i_flags); |
4358 | inode->i_blocks = ext4_inode_blocks(raw_inode, ei); | 4358 | inode->i_blocks = ext4_inode_blocks(raw_inode, ei); |
4359 | ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); | 4359 | ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); |
4360 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 4360 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) |
4361 | cpu_to_le32(EXT4_OS_HURD)) { | ||
4362 | ei->i_file_acl |= | 4361 | ei->i_file_acl |= |
4363 | ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; | 4362 | ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; |
4364 | } | ||
4365 | inode->i_size = ext4_isize(raw_inode); | 4363 | inode->i_size = ext4_isize(raw_inode); |
4366 | ei->i_disksize = inode->i_size; | 4364 | ei->i_disksize = inode->i_size; |
4367 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); | 4365 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); |
@@ -4409,9 +4407,23 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4409 | (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; | 4407 | (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; |
4410 | } | 4408 | } |
4411 | 4409 | ||
4412 | if (ei->i_flags & EXT4_EXTENTS_FL) { | 4410 | ret = 0; |
4413 | /* Validate extent which is part of inode */ | 4411 | if (ei->i_file_acl && |
4414 | ret = ext4_ext_check_inode(inode); | 4412 | ((ei->i_file_acl < |
4413 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + | ||
4414 | EXT4_SB(sb)->s_gdb_count)) || | ||
4415 | (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) { | ||
4416 | ext4_error(sb, __func__, | ||
4417 | "bad extended attribute block %llu in inode #%lu", | ||
4418 | ei->i_file_acl, inode->i_ino); | ||
4419 | ret = -EIO; | ||
4420 | goto bad_inode; | ||
4421 | } else if (ei->i_flags & EXT4_EXTENTS_FL) { | ||
4422 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | ||
4423 | (S_ISLNK(inode->i_mode) && | ||
4424 | !ext4_inode_is_fast_symlink(inode))) | ||
4425 | /* Validate extent which is part of inode */ | ||
4426 | ret = ext4_ext_check_inode(inode); | ||
4415 | } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 4427 | } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
4416 | (S_ISLNK(inode->i_mode) && | 4428 | (S_ISLNK(inode->i_mode) && |
4417 | !ext4_inode_is_fast_symlink(inode))) { | 4429 | !ext4_inode_is_fast_symlink(inode))) { |
diff --git a/fs/filesystems.c b/fs/filesystems.c index 1aa70260e6d1..a24c58e181db 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c | |||
@@ -199,7 +199,7 @@ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2) | |||
199 | return retval; | 199 | return retval; |
200 | } | 200 | } |
201 | 201 | ||
202 | int get_filesystem_list(char * buf) | 202 | int __init get_filesystem_list(char *buf) |
203 | { | 203 | { |
204 | int len = 0; | 204 | int len = 0; |
205 | struct file_system_type * tmp; | 205 | struct file_system_type * tmp; |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index bf23a62aa925..70f87f43afa2 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -156,6 +156,12 @@ static void inode_go_sync(struct gfs2_glock *gl) | |||
156 | error = filemap_fdatawait(metamapping); | 156 | error = filemap_fdatawait(metamapping); |
157 | mapping_set_error(metamapping, error); | 157 | mapping_set_error(metamapping, error); |
158 | gfs2_ail_empty_gl(gl); | 158 | gfs2_ail_empty_gl(gl); |
159 | /* | ||
160 | * Writeback of the data mapping may cause the dirty flag to be set | ||
161 | * so we have to clear it again here. | ||
162 | */ | ||
163 | smp_mb__before_clear_bit(); | ||
164 | clear_bit(GLF_DIRTY, &gl->gl_flags); | ||
159 | } | 165 | } |
160 | 166 | ||
161 | /** | 167 | /** |
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 101caf3ee861..5d82e91887e3 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
@@ -413,7 +413,9 @@ out_unlock: | |||
413 | gfs2_glock_dq(&gh); | 413 | gfs2_glock_dq(&gh); |
414 | out: | 414 | out: |
415 | gfs2_holder_uninit(&gh); | 415 | gfs2_holder_uninit(&gh); |
416 | if (ret) | 416 | if (ret == -ENOMEM) |
417 | ret = VM_FAULT_OOM; | ||
418 | else if (ret) | ||
417 | ret = VM_FAULT_SIGBUS; | 419 | ret = VM_FAULT_SIGBUS; |
418 | return ret; | 420 | return ret; |
419 | } | 421 | } |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f03d024038ea..565038243fa2 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -212,8 +212,7 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, | |||
212 | if (tmp == 0) | 212 | if (tmp == 0) |
213 | return BFITNOENT; | 213 | return BFITNOENT; |
214 | ptr--; | 214 | ptr--; |
215 | bit = fls64(tmp); | 215 | bit = __ffs64(tmp); |
216 | bit--; /* fls64 always adds one to the bit count */ | ||
217 | bit /= 2; /* two bits per entry in the bitmap */ | 216 | bit /= 2; /* two bits per entry in the bitmap */ |
218 | return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit; | 217 | return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit; |
219 | } | 218 | } |
@@ -1445,10 +1444,12 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, | |||
1445 | u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) | 1444 | u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) |
1446 | { | 1445 | { |
1447 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1446 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1447 | struct buffer_head *dibh; | ||
1448 | struct gfs2_alloc *al = ip->i_alloc; | 1448 | struct gfs2_alloc *al = ip->i_alloc; |
1449 | struct gfs2_rgrpd *rgd = al->al_rgd; | 1449 | struct gfs2_rgrpd *rgd = al->al_rgd; |
1450 | u32 goal, blk; | 1450 | u32 goal, blk; |
1451 | u64 block; | 1451 | u64 block; |
1452 | int error; | ||
1452 | 1453 | ||
1453 | if (rgrp_contains_block(rgd, ip->i_goal)) | 1454 | if (rgrp_contains_block(rgd, ip->i_goal)) |
1454 | goal = ip->i_goal - rgd->rd_data0; | 1455 | goal = ip->i_goal - rgd->rd_data0; |
@@ -1461,7 +1462,13 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) | |||
1461 | rgd->rd_last_alloc = blk; | 1462 | rgd->rd_last_alloc = blk; |
1462 | block = rgd->rd_data0 + blk; | 1463 | block = rgd->rd_data0 + blk; |
1463 | ip->i_goal = block; | 1464 | ip->i_goal = block; |
1464 | 1465 | error = gfs2_meta_inode_buffer(ip, &dibh); | |
1466 | if (error == 0) { | ||
1467 | struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; | ||
1468 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1469 | di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal); | ||
1470 | brelse(dibh); | ||
1471 | } | ||
1465 | gfs2_assert_withdraw(sdp, rgd->rd_free >= *n); | 1472 | gfs2_assert_withdraw(sdp, rgd->rd_free >= *n); |
1466 | rgd->rd_free -= *n; | 1473 | rgd->rd_free -= *n; |
1467 | 1474 | ||
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 23a3c76711e0..153d9681192b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include <linux/pagevec.h> | 26 | #include <linux/pagevec.h> |
27 | #include <linux/parser.h> | 27 | #include <linux/parser.h> |
28 | #include <linux/mman.h> | 28 | #include <linux/mman.h> |
29 | #include <linux/quotaops.h> | ||
30 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
31 | #include <linux/dnotify.h> | 30 | #include <linux/dnotify.h> |
32 | #include <linux/statfs.h> | 31 | #include <linux/statfs.h> |
@@ -842,7 +841,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) | |||
842 | bad_val: | 841 | bad_val: |
843 | printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n", | 842 | printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n", |
844 | args[0].from, p); | 843 | args[0].from, p); |
845 | return 1; | 844 | return -EINVAL; |
846 | } | 845 | } |
847 | 846 | ||
848 | static int | 847 | static int |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index a8e8513a78a9..06560c520f49 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -502,7 +502,7 @@ void journal_commit_transaction(journal_t *journal) | |||
502 | err = 0; | 502 | err = 0; |
503 | } | 503 | } |
504 | 504 | ||
505 | journal_write_revoke_records(journal, commit_transaction); | 505 | journal_write_revoke_records(journal, commit_transaction, write_op); |
506 | 506 | ||
507 | /* | 507 | /* |
508 | * If we found any dirty or locked buffers, then we should have | 508 | * If we found any dirty or locked buffers, then we should have |
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index 3e9afc2a91d2..da6cd9bdaabc 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c | |||
@@ -86,6 +86,7 @@ | |||
86 | #include <linux/slab.h> | 86 | #include <linux/slab.h> |
87 | #include <linux/list.h> | 87 | #include <linux/list.h> |
88 | #include <linux/init.h> | 88 | #include <linux/init.h> |
89 | #include <linux/bio.h> | ||
89 | #endif | 90 | #endif |
90 | #include <linux/log2.h> | 91 | #include <linux/log2.h> |
91 | 92 | ||
@@ -118,8 +119,8 @@ struct jbd_revoke_table_s | |||
118 | #ifdef __KERNEL__ | 119 | #ifdef __KERNEL__ |
119 | static void write_one_revoke_record(journal_t *, transaction_t *, | 120 | static void write_one_revoke_record(journal_t *, transaction_t *, |
120 | struct journal_head **, int *, | 121 | struct journal_head **, int *, |
121 | struct jbd_revoke_record_s *); | 122 | struct jbd_revoke_record_s *, int); |
122 | static void flush_descriptor(journal_t *, struct journal_head *, int); | 123 | static void flush_descriptor(journal_t *, struct journal_head *, int, int); |
123 | #endif | 124 | #endif |
124 | 125 | ||
125 | /* Utility functions to maintain the revoke table */ | 126 | /* Utility functions to maintain the revoke table */ |
@@ -500,7 +501,7 @@ void journal_switch_revoke_table(journal_t *journal) | |||
500 | * revoke hash, deleting the entries as we go. | 501 | * revoke hash, deleting the entries as we go. |
501 | */ | 502 | */ |
502 | void journal_write_revoke_records(journal_t *journal, | 503 | void journal_write_revoke_records(journal_t *journal, |
503 | transaction_t *transaction) | 504 | transaction_t *transaction, int write_op) |
504 | { | 505 | { |
505 | struct journal_head *descriptor; | 506 | struct journal_head *descriptor; |
506 | struct jbd_revoke_record_s *record; | 507 | struct jbd_revoke_record_s *record; |
@@ -524,14 +525,14 @@ void journal_write_revoke_records(journal_t *journal, | |||
524 | hash_list->next; | 525 | hash_list->next; |
525 | write_one_revoke_record(journal, transaction, | 526 | write_one_revoke_record(journal, transaction, |
526 | &descriptor, &offset, | 527 | &descriptor, &offset, |
527 | record); | 528 | record, write_op); |
528 | count++; | 529 | count++; |
529 | list_del(&record->hash); | 530 | list_del(&record->hash); |
530 | kmem_cache_free(revoke_record_cache, record); | 531 | kmem_cache_free(revoke_record_cache, record); |
531 | } | 532 | } |
532 | } | 533 | } |
533 | if (descriptor) | 534 | if (descriptor) |
534 | flush_descriptor(journal, descriptor, offset); | 535 | flush_descriptor(journal, descriptor, offset, write_op); |
535 | jbd_debug(1, "Wrote %d revoke records\n", count); | 536 | jbd_debug(1, "Wrote %d revoke records\n", count); |
536 | } | 537 | } |
537 | 538 | ||
@@ -544,7 +545,8 @@ static void write_one_revoke_record(journal_t *journal, | |||
544 | transaction_t *transaction, | 545 | transaction_t *transaction, |
545 | struct journal_head **descriptorp, | 546 | struct journal_head **descriptorp, |
546 | int *offsetp, | 547 | int *offsetp, |
547 | struct jbd_revoke_record_s *record) | 548 | struct jbd_revoke_record_s *record, |
549 | int write_op) | ||
548 | { | 550 | { |
549 | struct journal_head *descriptor; | 551 | struct journal_head *descriptor; |
550 | int offset; | 552 | int offset; |
@@ -563,7 +565,7 @@ static void write_one_revoke_record(journal_t *journal, | |||
563 | /* Make sure we have a descriptor with space left for the record */ | 565 | /* Make sure we have a descriptor with space left for the record */ |
564 | if (descriptor) { | 566 | if (descriptor) { |
565 | if (offset == journal->j_blocksize) { | 567 | if (offset == journal->j_blocksize) { |
566 | flush_descriptor(journal, descriptor, offset); | 568 | flush_descriptor(journal, descriptor, offset, write_op); |
567 | descriptor = NULL; | 569 | descriptor = NULL; |
568 | } | 570 | } |
569 | } | 571 | } |
@@ -600,7 +602,7 @@ static void write_one_revoke_record(journal_t *journal, | |||
600 | 602 | ||
601 | static void flush_descriptor(journal_t *journal, | 603 | static void flush_descriptor(journal_t *journal, |
602 | struct journal_head *descriptor, | 604 | struct journal_head *descriptor, |
603 | int offset) | 605 | int offset, int write_op) |
604 | { | 606 | { |
605 | journal_revoke_header_t *header; | 607 | journal_revoke_header_t *header; |
606 | struct buffer_head *bh = jh2bh(descriptor); | 608 | struct buffer_head *bh = jh2bh(descriptor); |
@@ -615,7 +617,7 @@ static void flush_descriptor(journal_t *journal, | |||
615 | set_buffer_jwrite(bh); | 617 | set_buffer_jwrite(bh); |
616 | BUFFER_TRACE(bh, "write"); | 618 | BUFFER_TRACE(bh, "write"); |
617 | set_buffer_dirty(bh); | 619 | set_buffer_dirty(bh); |
618 | ll_rw_block(SWRITE, 1, &bh); | 620 | ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); |
619 | } | 621 | } |
620 | #endif | 622 | #endif |
621 | 623 | ||
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 073c8c3df7cd..0b7d3b8226fd 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -506,7 +506,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
506 | if (err) | 506 | if (err) |
507 | jbd2_journal_abort(journal, err); | 507 | jbd2_journal_abort(journal, err); |
508 | 508 | ||
509 | jbd2_journal_write_revoke_records(journal, commit_transaction); | 509 | jbd2_journal_write_revoke_records(journal, commit_transaction, |
510 | write_op); | ||
510 | 511 | ||
511 | jbd_debug(3, "JBD: commit phase 2\n"); | 512 | jbd_debug(3, "JBD: commit phase 2\n"); |
512 | 513 | ||
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index bbe6d592d8b3..a360b06af2e3 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -86,6 +86,7 @@ | |||
86 | #include <linux/slab.h> | 86 | #include <linux/slab.h> |
87 | #include <linux/list.h> | 87 | #include <linux/list.h> |
88 | #include <linux/init.h> | 88 | #include <linux/init.h> |
89 | #include <linux/bio.h> | ||
89 | #endif | 90 | #endif |
90 | #include <linux/log2.h> | 91 | #include <linux/log2.h> |
91 | 92 | ||
@@ -118,8 +119,8 @@ struct jbd2_revoke_table_s | |||
118 | #ifdef __KERNEL__ | 119 | #ifdef __KERNEL__ |
119 | static void write_one_revoke_record(journal_t *, transaction_t *, | 120 | static void write_one_revoke_record(journal_t *, transaction_t *, |
120 | struct journal_head **, int *, | 121 | struct journal_head **, int *, |
121 | struct jbd2_revoke_record_s *); | 122 | struct jbd2_revoke_record_s *, int); |
122 | static void flush_descriptor(journal_t *, struct journal_head *, int); | 123 | static void flush_descriptor(journal_t *, struct journal_head *, int, int); |
123 | #endif | 124 | #endif |
124 | 125 | ||
125 | /* Utility functions to maintain the revoke table */ | 126 | /* Utility functions to maintain the revoke table */ |
@@ -499,7 +500,8 @@ void jbd2_journal_switch_revoke_table(journal_t *journal) | |||
499 | * revoke hash, deleting the entries as we go. | 500 | * revoke hash, deleting the entries as we go. |
500 | */ | 501 | */ |
501 | void jbd2_journal_write_revoke_records(journal_t *journal, | 502 | void jbd2_journal_write_revoke_records(journal_t *journal, |
502 | transaction_t *transaction) | 503 | transaction_t *transaction, |
504 | int write_op) | ||
503 | { | 505 | { |
504 | struct journal_head *descriptor; | 506 | struct journal_head *descriptor; |
505 | struct jbd2_revoke_record_s *record; | 507 | struct jbd2_revoke_record_s *record; |
@@ -523,14 +525,14 @@ void jbd2_journal_write_revoke_records(journal_t *journal, | |||
523 | hash_list->next; | 525 | hash_list->next; |
524 | write_one_revoke_record(journal, transaction, | 526 | write_one_revoke_record(journal, transaction, |
525 | &descriptor, &offset, | 527 | &descriptor, &offset, |
526 | record); | 528 | record, write_op); |
527 | count++; | 529 | count++; |
528 | list_del(&record->hash); | 530 | list_del(&record->hash); |
529 | kmem_cache_free(jbd2_revoke_record_cache, record); | 531 | kmem_cache_free(jbd2_revoke_record_cache, record); |
530 | } | 532 | } |
531 | } | 533 | } |
532 | if (descriptor) | 534 | if (descriptor) |
533 | flush_descriptor(journal, descriptor, offset); | 535 | flush_descriptor(journal, descriptor, offset, write_op); |
534 | jbd_debug(1, "Wrote %d revoke records\n", count); | 536 | jbd_debug(1, "Wrote %d revoke records\n", count); |
535 | } | 537 | } |
536 | 538 | ||
@@ -543,7 +545,8 @@ static void write_one_revoke_record(journal_t *journal, | |||
543 | transaction_t *transaction, | 545 | transaction_t *transaction, |
544 | struct journal_head **descriptorp, | 546 | struct journal_head **descriptorp, |
545 | int *offsetp, | 547 | int *offsetp, |
546 | struct jbd2_revoke_record_s *record) | 548 | struct jbd2_revoke_record_s *record, |
549 | int write_op) | ||
547 | { | 550 | { |
548 | struct journal_head *descriptor; | 551 | struct journal_head *descriptor; |
549 | int offset; | 552 | int offset; |
@@ -562,7 +565,7 @@ static void write_one_revoke_record(journal_t *journal, | |||
562 | /* Make sure we have a descriptor with space left for the record */ | 565 | /* Make sure we have a descriptor with space left for the record */ |
563 | if (descriptor) { | 566 | if (descriptor) { |
564 | if (offset == journal->j_blocksize) { | 567 | if (offset == journal->j_blocksize) { |
565 | flush_descriptor(journal, descriptor, offset); | 568 | flush_descriptor(journal, descriptor, offset, write_op); |
566 | descriptor = NULL; | 569 | descriptor = NULL; |
567 | } | 570 | } |
568 | } | 571 | } |
@@ -607,7 +610,7 @@ static void write_one_revoke_record(journal_t *journal, | |||
607 | 610 | ||
608 | static void flush_descriptor(journal_t *journal, | 611 | static void flush_descriptor(journal_t *journal, |
609 | struct journal_head *descriptor, | 612 | struct journal_head *descriptor, |
610 | int offset) | 613 | int offset, int write_op) |
611 | { | 614 | { |
612 | jbd2_journal_revoke_header_t *header; | 615 | jbd2_journal_revoke_header_t *header; |
613 | struct buffer_head *bh = jh2bh(descriptor); | 616 | struct buffer_head *bh = jh2bh(descriptor); |
@@ -622,7 +625,7 @@ static void flush_descriptor(journal_t *journal, | |||
622 | set_buffer_jwrite(bh); | 625 | set_buffer_jwrite(bh); |
623 | BUFFER_TRACE(bh, "write"); | 626 | BUFFER_TRACE(bh, "write"); |
624 | set_buffer_dirty(bh); | 627 | set_buffer_dirty(bh); |
625 | ll_rw_block(SWRITE, 1, &bh); | 628 | ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); |
626 | } | 629 | } |
627 | #endif | 630 | #endif |
628 | 631 | ||
diff --git a/fs/namei.c b/fs/namei.c index b8433ebfae05..78f253cd2d4f 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1248,6 +1248,8 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) | |||
1248 | int err; | 1248 | int err; |
1249 | struct qstr this; | 1249 | struct qstr this; |
1250 | 1250 | ||
1251 | WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); | ||
1252 | |||
1251 | err = __lookup_one_len(name, &this, base, len); | 1253 | err = __lookup_one_len(name, &this, base, len); |
1252 | if (err) | 1254 | if (err) |
1253 | return ERR_PTR(err); | 1255 | return ERR_PTR(err); |
diff --git a/fs/namespace.c b/fs/namespace.c index d9138f81ec10..41196209a906 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -1377,7 +1377,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, | |||
1377 | if (parent_path) { | 1377 | if (parent_path) { |
1378 | detach_mnt(source_mnt, parent_path); | 1378 | detach_mnt(source_mnt, parent_path); |
1379 | attach_mnt(source_mnt, path); | 1379 | attach_mnt(source_mnt, path); |
1380 | touch_mnt_namespace(current->nsproxy->mnt_ns); | 1380 | touch_mnt_namespace(parent_path->mnt->mnt_ns); |
1381 | } else { | 1381 | } else { |
1382 | mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); | 1382 | mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); |
1383 | commit_tree(source_mnt); | 1383 | commit_tree(source_mnt); |
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index f54360f50a9c..fa038df63ac8 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c | |||
@@ -660,13 +660,10 @@ outrel: | |||
660 | if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN) | 660 | if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN) |
661 | return -ENOMEM; | 661 | return -ENOMEM; |
662 | if (user.object_name_len) { | 662 | if (user.object_name_len) { |
663 | newname = kmalloc(user.object_name_len, GFP_USER); | 663 | newname = memdup_user(user.object_name, |
664 | if (!newname) | 664 | user.object_name_len); |
665 | return -ENOMEM; | 665 | if (IS_ERR(newname)) |
666 | if (copy_from_user(newname, user.object_name, user.object_name_len)) { | 666 | return PTR_ERR(newname); |
667 | kfree(newname); | ||
668 | return -EFAULT; | ||
669 | } | ||
670 | } else { | 667 | } else { |
671 | newname = NULL; | 668 | newname = NULL; |
672 | } | 669 | } |
@@ -760,13 +757,9 @@ outrel: | |||
760 | if (user.len > NCP_PRIVATE_DATA_MAX_LEN) | 757 | if (user.len > NCP_PRIVATE_DATA_MAX_LEN) |
761 | return -ENOMEM; | 758 | return -ENOMEM; |
762 | if (user.len) { | 759 | if (user.len) { |
763 | new = kmalloc(user.len, GFP_USER); | 760 | new = memdup_user(user.data, user.len); |
764 | if (!new) | 761 | if (IS_ERR(new)) |
765 | return -ENOMEM; | 762 | return PTR_ERR(new); |
766 | if (copy_from_user(new, user.data, user.len)) { | ||
767 | kfree(new); | ||
768 | return -EFAULT; | ||
769 | } | ||
770 | } else { | 763 | } else { |
771 | new = NULL; | 764 | new = NULL; |
772 | } | 765 | } |
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index e6a1932c7110..35869a4921f1 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c | |||
@@ -713,7 +713,8 @@ nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p, | |||
713 | if (args->npages != 0) | 713 | if (args->npages != 0) |
714 | xdr_encode_pages(buf, args->pages, 0, args->len); | 714 | xdr_encode_pages(buf, args->pages, 0, args->len); |
715 | else | 715 | else |
716 | req->rq_slen += args->len; | 716 | req->rq_slen = xdr_adjust_iovec(req->rq_svec, |
717 | p + XDR_QUADLEN(args->len)); | ||
717 | 718 | ||
718 | err = nfsacl_encode(buf, base, args->inode, | 719 | err = nfsacl_encode(buf, base, args->inode, |
719 | (args->mask & NFS_ACL) ? | 720 | (args->mask & NFS_ACL) ? |
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 3444c0052a87..5275097a7565 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -229,21 +229,23 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) | |||
229 | goto out; | 229 | goto out; |
230 | status = vfs_readdir(filp, nfsd4_build_namelist, &names); | 230 | status = vfs_readdir(filp, nfsd4_build_namelist, &names); |
231 | fput(filp); | 231 | fput(filp); |
232 | mutex_lock(&dir->d_inode->i_mutex); | ||
232 | while (!list_empty(&names)) { | 233 | while (!list_empty(&names)) { |
233 | entry = list_entry(names.next, struct name_list, list); | 234 | entry = list_entry(names.next, struct name_list, list); |
234 | 235 | ||
235 | dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); | 236 | dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); |
236 | if (IS_ERR(dentry)) { | 237 | if (IS_ERR(dentry)) { |
237 | status = PTR_ERR(dentry); | 238 | status = PTR_ERR(dentry); |
238 | goto out; | 239 | break; |
239 | } | 240 | } |
240 | status = f(dir, dentry); | 241 | status = f(dir, dentry); |
241 | dput(dentry); | 242 | dput(dentry); |
242 | if (status) | 243 | if (status) |
243 | goto out; | 244 | break; |
244 | list_del(&entry->list); | 245 | list_del(&entry->list); |
245 | kfree(entry); | 246 | kfree(entry); |
246 | } | 247 | } |
248 | mutex_unlock(&dir->d_inode->i_mutex); | ||
247 | out: | 249 | out: |
248 | while (!list_empty(&names)) { | 250 | while (!list_empty(&names)) { |
249 | entry = list_entry(names.next, struct name_list, list); | 251 | entry = list_entry(names.next, struct name_list, list); |
@@ -255,36 +257,6 @@ out: | |||
255 | } | 257 | } |
256 | 258 | ||
257 | static int | 259 | static int |
258 | nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry) | ||
259 | { | ||
260 | int status; | ||
261 | |||
262 | if (!S_ISREG(dir->d_inode->i_mode)) { | ||
263 | printk("nfsd4: non-file found in client recovery directory\n"); | ||
264 | return -EINVAL; | ||
265 | } | ||
266 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); | ||
267 | status = vfs_unlink(dir->d_inode, dentry); | ||
268 | mutex_unlock(&dir->d_inode->i_mutex); | ||
269 | return status; | ||
270 | } | ||
271 | |||
272 | static int | ||
273 | nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry) | ||
274 | { | ||
275 | int status; | ||
276 | |||
277 | /* For now this directory should already be empty, but we empty it of | ||
278 | * any regular files anyway, just in case the directory was created by | ||
279 | * a kernel from the future.... */ | ||
280 | nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); | ||
281 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); | ||
282 | status = vfs_rmdir(dir->d_inode, dentry); | ||
283 | mutex_unlock(&dir->d_inode->i_mutex); | ||
284 | return status; | ||
285 | } | ||
286 | |||
287 | static int | ||
288 | nfsd4_unlink_clid_dir(char *name, int namlen) | 260 | nfsd4_unlink_clid_dir(char *name, int namlen) |
289 | { | 261 | { |
290 | struct dentry *dentry; | 262 | struct dentry *dentry; |
@@ -294,18 +266,18 @@ nfsd4_unlink_clid_dir(char *name, int namlen) | |||
294 | 266 | ||
295 | mutex_lock(&rec_dir.dentry->d_inode->i_mutex); | 267 | mutex_lock(&rec_dir.dentry->d_inode->i_mutex); |
296 | dentry = lookup_one_len(name, rec_dir.dentry, namlen); | 268 | dentry = lookup_one_len(name, rec_dir.dentry, namlen); |
297 | mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); | ||
298 | if (IS_ERR(dentry)) { | 269 | if (IS_ERR(dentry)) { |
299 | status = PTR_ERR(dentry); | 270 | status = PTR_ERR(dentry); |
300 | return status; | 271 | goto out_unlock; |
301 | } | 272 | } |
302 | status = -ENOENT; | 273 | status = -ENOENT; |
303 | if (!dentry->d_inode) | 274 | if (!dentry->d_inode) |
304 | goto out; | 275 | goto out; |
305 | 276 | status = vfs_rmdir(rec_dir.dentry->d_inode, dentry); | |
306 | status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry); | ||
307 | out: | 277 | out: |
308 | dput(dentry); | 278 | dput(dentry); |
279 | out_unlock: | ||
280 | mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); | ||
309 | return status; | 281 | return status; |
310 | } | 282 | } |
311 | 283 | ||
@@ -348,7 +320,7 @@ purge_old(struct dentry *parent, struct dentry *child) | |||
348 | if (nfs4_has_reclaimed_state(child->d_name.name, false)) | 320 | if (nfs4_has_reclaimed_state(child->d_name.name, false)) |
349 | return 0; | 321 | return 0; |
350 | 322 | ||
351 | status = nfsd4_clear_clid_dir(parent, child); | 323 | status = vfs_rmdir(parent->d_inode, child); |
352 | if (status) | 324 | if (status) |
353 | printk("failed to remove client recovery directory %s\n", | 325 | printk("failed to remove client recovery directory %s\n", |
354 | child->d_name.name); | 326 | child->d_name.name); |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ab93fcfef254..6c68ffd6b4bb 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -116,10 +116,15 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, | |||
116 | } | 116 | } |
117 | if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { | 117 | if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { |
118 | /* successfully crossed mount point */ | 118 | /* successfully crossed mount point */ |
119 | exp_put(exp); | 119 | /* |
120 | *expp = exp2; | 120 | * This is subtle: dentry is *not* under mnt at this point. |
121 | * The only reason we are safe is that original mnt is pinned | ||
122 | * down by exp, so we should dput before putting exp. | ||
123 | */ | ||
121 | dput(dentry); | 124 | dput(dentry); |
122 | *dpp = mounts; | 125 | *dpp = mounts; |
126 | exp_put(exp); | ||
127 | *expp = exp2; | ||
123 | } else { | 128 | } else { |
124 | exp_put(exp2); | 129 | exp_put(exp2); |
125 | dput(mounts); | 130 | dput(mounts); |
@@ -1885,8 +1890,8 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, | |||
1885 | return 0; | 1890 | return 0; |
1886 | } | 1891 | } |
1887 | 1892 | ||
1888 | static int nfsd_buffered_readdir(struct file *file, filldir_t func, | 1893 | static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, |
1889 | struct readdir_cd *cdp, loff_t *offsetp) | 1894 | struct readdir_cd *cdp, loff_t *offsetp) |
1890 | { | 1895 | { |
1891 | struct readdir_data buf; | 1896 | struct readdir_data buf; |
1892 | struct buffered_dirent *de; | 1897 | struct buffered_dirent *de; |
@@ -1896,11 +1901,12 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func, | |||
1896 | 1901 | ||
1897 | buf.dirent = (void *)__get_free_page(GFP_KERNEL); | 1902 | buf.dirent = (void *)__get_free_page(GFP_KERNEL); |
1898 | if (!buf.dirent) | 1903 | if (!buf.dirent) |
1899 | return -ENOMEM; | 1904 | return nfserrno(-ENOMEM); |
1900 | 1905 | ||
1901 | offset = *offsetp; | 1906 | offset = *offsetp; |
1902 | 1907 | ||
1903 | while (1) { | 1908 | while (1) { |
1909 | struct inode *dir_inode = file->f_path.dentry->d_inode; | ||
1904 | unsigned int reclen; | 1910 | unsigned int reclen; |
1905 | 1911 | ||
1906 | cdp->err = nfserr_eof; /* will be cleared on successful read */ | 1912 | cdp->err = nfserr_eof; /* will be cleared on successful read */ |
@@ -1919,26 +1925,38 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func, | |||
1919 | if (!size) | 1925 | if (!size) |
1920 | break; | 1926 | break; |
1921 | 1927 | ||
1928 | /* | ||
1929 | * Various filldir functions may end up calling back into | ||
1930 | * lookup_one_len() and the file system's ->lookup() method. | ||
1931 | * These expect i_mutex to be held, as it would within readdir. | ||
1932 | */ | ||
1933 | host_err = mutex_lock_killable(&dir_inode->i_mutex); | ||
1934 | if (host_err) | ||
1935 | break; | ||
1936 | |||
1922 | de = (struct buffered_dirent *)buf.dirent; | 1937 | de = (struct buffered_dirent *)buf.dirent; |
1923 | while (size > 0) { | 1938 | while (size > 0) { |
1924 | offset = de->offset; | 1939 | offset = de->offset; |
1925 | 1940 | ||
1926 | if (func(cdp, de->name, de->namlen, de->offset, | 1941 | if (func(cdp, de->name, de->namlen, de->offset, |
1927 | de->ino, de->d_type)) | 1942 | de->ino, de->d_type)) |
1928 | goto done; | 1943 | break; |
1929 | 1944 | ||
1930 | if (cdp->err != nfs_ok) | 1945 | if (cdp->err != nfs_ok) |
1931 | goto done; | 1946 | break; |
1932 | 1947 | ||
1933 | reclen = ALIGN(sizeof(*de) + de->namlen, | 1948 | reclen = ALIGN(sizeof(*de) + de->namlen, |
1934 | sizeof(u64)); | 1949 | sizeof(u64)); |
1935 | size -= reclen; | 1950 | size -= reclen; |
1936 | de = (struct buffered_dirent *)((char *)de + reclen); | 1951 | de = (struct buffered_dirent *)((char *)de + reclen); |
1937 | } | 1952 | } |
1953 | mutex_unlock(&dir_inode->i_mutex); | ||
1954 | if (size > 0) /* We bailed out early */ | ||
1955 | break; | ||
1956 | |||
1938 | offset = vfs_llseek(file, 0, SEEK_CUR); | 1957 | offset = vfs_llseek(file, 0, SEEK_CUR); |
1939 | } | 1958 | } |
1940 | 1959 | ||
1941 | done: | ||
1942 | free_page((unsigned long)(buf.dirent)); | 1960 | free_page((unsigned long)(buf.dirent)); |
1943 | 1961 | ||
1944 | if (host_err) | 1962 | if (host_err) |
diff --git a/fs/proc/stat.c b/fs/proc/stat.c index f75efa22df5e..81e4eb60972e 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c | |||
@@ -18,6 +18,9 @@ | |||
18 | #ifndef arch_irq_stat | 18 | #ifndef arch_irq_stat |
19 | #define arch_irq_stat() 0 | 19 | #define arch_irq_stat() 0 |
20 | #endif | 20 | #endif |
21 | #ifndef arch_idle_time | ||
22 | #define arch_idle_time(cpu) 0 | ||
23 | #endif | ||
21 | 24 | ||
22 | static int show_stat(struct seq_file *p, void *v) | 25 | static int show_stat(struct seq_file *p, void *v) |
23 | { | 26 | { |
@@ -40,6 +43,7 @@ static int show_stat(struct seq_file *p, void *v) | |||
40 | nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); | 43 | nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); |
41 | system = cputime64_add(system, kstat_cpu(i).cpustat.system); | 44 | system = cputime64_add(system, kstat_cpu(i).cpustat.system); |
42 | idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); | 45 | idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); |
46 | idle = cputime64_add(idle, arch_idle_time(i)); | ||
43 | iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); | 47 | iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); |
44 | irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); | 48 | irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); |
45 | softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); | 49 | softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); |
@@ -69,6 +73,7 @@ static int show_stat(struct seq_file *p, void *v) | |||
69 | nice = kstat_cpu(i).cpustat.nice; | 73 | nice = kstat_cpu(i).cpustat.nice; |
70 | system = kstat_cpu(i).cpustat.system; | 74 | system = kstat_cpu(i).cpustat.system; |
71 | idle = kstat_cpu(i).cpustat.idle; | 75 | idle = kstat_cpu(i).cpustat.idle; |
76 | idle = cputime64_add(idle, arch_idle_time(i)); | ||
72 | iowait = kstat_cpu(i).cpustat.iowait; | 77 | iowait = kstat_cpu(i).cpustat.iowait; |
73 | irq = kstat_cpu(i).cpustat.irq; | 78 | irq = kstat_cpu(i).cpustat.irq; |
74 | softirq = kstat_cpu(i).cpustat.softirq; | 79 | softirq = kstat_cpu(i).cpustat.softirq; |
diff --git a/fs/quota/Makefile b/fs/quota/Makefile index 385a0831cc99..68d4f6dc0578 100644 --- a/fs/quota/Makefile +++ b/fs/quota/Makefile | |||
@@ -1,12 +1,3 @@ | |||
1 | # | ||
2 | # Makefile for the Linux filesystems. | ||
3 | # | ||
4 | # 14 Sep 2000, Christoph Hellwig <hch@infradead.org> | ||
5 | # Rewritten to use lists instead of if-statements. | ||
6 | # | ||
7 | |||
8 | obj-y := | ||
9 | |||
10 | obj-$(CONFIG_QUOTA) += dquot.o | 1 | obj-$(CONFIG_QUOTA) += dquot.o |
11 | obj-$(CONFIG_QFMT_V1) += quota_v1.o | 2 | obj-$(CONFIG_QFMT_V1) += quota_v1.o |
12 | obj-$(CONFIG_QFMT_V2) += quota_v2.o | 3 | obj-$(CONFIG_QFMT_V2) += quota_v2.o |
diff --git a/fs/romfs/internal.h b/fs/romfs/internal.h index 06044a9dc62d..95217b830118 100644 --- a/fs/romfs/internal.h +++ b/fs/romfs/internal.h | |||
@@ -43,5 +43,5 @@ extern int romfs_dev_read(struct super_block *sb, unsigned long pos, | |||
43 | void *buf, size_t buflen); | 43 | void *buf, size_t buflen); |
44 | extern ssize_t romfs_dev_strnlen(struct super_block *sb, | 44 | extern ssize_t romfs_dev_strnlen(struct super_block *sb, |
45 | unsigned long pos, size_t maxlen); | 45 | unsigned long pos, size_t maxlen); |
46 | extern int romfs_dev_strncmp(struct super_block *sb, unsigned long pos, | 46 | extern int romfs_dev_strcmp(struct super_block *sb, unsigned long pos, |
47 | const char *str, size_t size); | 47 | const char *str, size_t size); |
diff --git a/fs/romfs/storage.c b/fs/romfs/storage.c index 7e3e1e12a081..b3208adf8e71 100644 --- a/fs/romfs/storage.c +++ b/fs/romfs/storage.c | |||
@@ -67,26 +67,35 @@ static ssize_t romfs_mtd_strnlen(struct super_block *sb, | |||
67 | * compare a string to one in a romfs image on MTD | 67 | * compare a string to one in a romfs image on MTD |
68 | * - return 1 if matched, 0 if differ, -ve if error | 68 | * - return 1 if matched, 0 if differ, -ve if error |
69 | */ | 69 | */ |
70 | static int romfs_mtd_strncmp(struct super_block *sb, unsigned long pos, | 70 | static int romfs_mtd_strcmp(struct super_block *sb, unsigned long pos, |
71 | const char *str, size_t size) | 71 | const char *str, size_t size) |
72 | { | 72 | { |
73 | u_char buf[16]; | 73 | u_char buf[17]; |
74 | size_t len, segment; | 74 | size_t len, segment; |
75 | int ret; | 75 | int ret; |
76 | 76 | ||
77 | /* scan the string up to 16 bytes at a time */ | 77 | /* scan the string up to 16 bytes at a time, and attempt to grab the |
78 | * trailing NUL whilst we're at it */ | ||
79 | buf[0] = 0xff; | ||
80 | |||
78 | while (size > 0) { | 81 | while (size > 0) { |
79 | segment = min_t(size_t, size, 16); | 82 | segment = min_t(size_t, size + 1, 17); |
80 | ret = ROMFS_MTD_READ(sb, pos, segment, &len, buf); | 83 | ret = ROMFS_MTD_READ(sb, pos, segment, &len, buf); |
81 | if (ret < 0) | 84 | if (ret < 0) |
82 | return ret; | 85 | return ret; |
86 | len--; | ||
83 | if (memcmp(buf, str, len) != 0) | 87 | if (memcmp(buf, str, len) != 0) |
84 | return 0; | 88 | return 0; |
89 | buf[0] = buf[len]; | ||
85 | size -= len; | 90 | size -= len; |
86 | pos += len; | 91 | pos += len; |
87 | str += len; | 92 | str += len; |
88 | } | 93 | } |
89 | 94 | ||
95 | /* check the trailing NUL was */ | ||
96 | if (buf[0]) | ||
97 | return 0; | ||
98 | |||
90 | return 1; | 99 | return 1; |
91 | } | 100 | } |
92 | #endif /* CONFIG_ROMFS_ON_MTD */ | 101 | #endif /* CONFIG_ROMFS_ON_MTD */ |
@@ -111,6 +120,7 @@ static int romfs_blk_read(struct super_block *sb, unsigned long pos, | |||
111 | return -EIO; | 120 | return -EIO; |
112 | memcpy(buf, bh->b_data + offset, segment); | 121 | memcpy(buf, bh->b_data + offset, segment); |
113 | brelse(bh); | 122 | brelse(bh); |
123 | buf += segment; | ||
114 | buflen -= segment; | 124 | buflen -= segment; |
115 | pos += segment; | 125 | pos += segment; |
116 | } | 126 | } |
@@ -154,28 +164,48 @@ static ssize_t romfs_blk_strnlen(struct super_block *sb, | |||
154 | * compare a string to one in a romfs image on a block device | 164 | * compare a string to one in a romfs image on a block device |
155 | * - return 1 if matched, 0 if differ, -ve if error | 165 | * - return 1 if matched, 0 if differ, -ve if error |
156 | */ | 166 | */ |
157 | static int romfs_blk_strncmp(struct super_block *sb, unsigned long pos, | 167 | static int romfs_blk_strcmp(struct super_block *sb, unsigned long pos, |
158 | const char *str, size_t size) | 168 | const char *str, size_t size) |
159 | { | 169 | { |
160 | struct buffer_head *bh; | 170 | struct buffer_head *bh; |
161 | unsigned long offset; | 171 | unsigned long offset; |
162 | size_t segment; | 172 | size_t segment; |
163 | bool x; | 173 | bool matched, terminated = false; |
164 | 174 | ||
165 | /* scan the string up to 16 bytes at a time */ | 175 | /* compare string up to a block at a time */ |
166 | while (size > 0) { | 176 | while (size > 0) { |
167 | offset = pos & (ROMBSIZE - 1); | 177 | offset = pos & (ROMBSIZE - 1); |
168 | segment = min_t(size_t, size, ROMBSIZE - offset); | 178 | segment = min_t(size_t, size, ROMBSIZE - offset); |
169 | bh = sb_bread(sb, pos >> ROMBSBITS); | 179 | bh = sb_bread(sb, pos >> ROMBSBITS); |
170 | if (!bh) | 180 | if (!bh) |
171 | return -EIO; | 181 | return -EIO; |
172 | x = (memcmp(bh->b_data + offset, str, segment) != 0); | 182 | matched = (memcmp(bh->b_data + offset, str, segment) == 0); |
173 | brelse(bh); | 183 | |
174 | if (x) | ||
175 | return 0; | ||
176 | size -= segment; | 184 | size -= segment; |
177 | pos += segment; | 185 | pos += segment; |
178 | str += segment; | 186 | str += segment; |
187 | if (matched && size == 0 && offset + segment < ROMBSIZE) { | ||
188 | if (!bh->b_data[offset + segment]) | ||
189 | terminated = true; | ||
190 | else | ||
191 | matched = false; | ||
192 | } | ||
193 | brelse(bh); | ||
194 | if (!matched) | ||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | if (!terminated) { | ||
199 | /* the terminating NUL must be on the first byte of the next | ||
200 | * block */ | ||
201 | BUG_ON((pos & (ROMBSIZE - 1)) != 0); | ||
202 | bh = sb_bread(sb, pos >> ROMBSBITS); | ||
203 | if (!bh) | ||
204 | return -EIO; | ||
205 | matched = !bh->b_data[0]; | ||
206 | brelse(bh); | ||
207 | if (!matched) | ||
208 | return 0; | ||
179 | } | 209 | } |
180 | 210 | ||
181 | return 1; | 211 | return 1; |
@@ -234,10 +264,12 @@ ssize_t romfs_dev_strnlen(struct super_block *sb, | |||
234 | 264 | ||
235 | /* | 265 | /* |
236 | * compare a string to one in romfs | 266 | * compare a string to one in romfs |
267 | * - the string to be compared to, str, may not be NUL-terminated; instead the | ||
268 | * string is of the specified size | ||
237 | * - return 1 if matched, 0 if differ, -ve if error | 269 | * - return 1 if matched, 0 if differ, -ve if error |
238 | */ | 270 | */ |
239 | int romfs_dev_strncmp(struct super_block *sb, unsigned long pos, | 271 | int romfs_dev_strcmp(struct super_block *sb, unsigned long pos, |
240 | const char *str, size_t size) | 272 | const char *str, size_t size) |
241 | { | 273 | { |
242 | size_t limit; | 274 | size_t limit; |
243 | 275 | ||
@@ -246,16 +278,16 @@ int romfs_dev_strncmp(struct super_block *sb, unsigned long pos, | |||
246 | return -EIO; | 278 | return -EIO; |
247 | if (size > ROMFS_MAXFN) | 279 | if (size > ROMFS_MAXFN) |
248 | return -ENAMETOOLONG; | 280 | return -ENAMETOOLONG; |
249 | if (size > limit - pos) | 281 | if (size + 1 > limit - pos) |
250 | return -EIO; | 282 | return -EIO; |
251 | 283 | ||
252 | #ifdef CONFIG_ROMFS_ON_MTD | 284 | #ifdef CONFIG_ROMFS_ON_MTD |
253 | if (sb->s_mtd) | 285 | if (sb->s_mtd) |
254 | return romfs_mtd_strncmp(sb, pos, str, size); | 286 | return romfs_mtd_strcmp(sb, pos, str, size); |
255 | #endif | 287 | #endif |
256 | #ifdef CONFIG_ROMFS_ON_BLOCK | 288 | #ifdef CONFIG_ROMFS_ON_BLOCK |
257 | if (sb->s_bdev) | 289 | if (sb->s_bdev) |
258 | return romfs_blk_strncmp(sb, pos, str, size); | 290 | return romfs_blk_strcmp(sb, pos, str, size); |
259 | #endif | 291 | #endif |
260 | return -EIO; | 292 | return -EIO; |
261 | } | 293 | } |
diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 10ca7d984a8b..c53b5ef8a02f 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c | |||
@@ -240,8 +240,8 @@ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry, | |||
240 | goto error; | 240 | goto error; |
241 | 241 | ||
242 | /* try to match the first 16 bytes of name */ | 242 | /* try to match the first 16 bytes of name */ |
243 | ret = romfs_dev_strncmp(dir->i_sb, offset + ROMFH_SIZE, name, | 243 | ret = romfs_dev_strcmp(dir->i_sb, offset + ROMFH_SIZE, name, |
244 | len); | 244 | len); |
245 | if (ret < 0) | 245 | if (ret < 0) |
246 | goto error; | 246 | goto error; |
247 | if (ret == 1) | 247 | if (ret == 1) |
@@ -55,59 +55,54 @@ int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
55 | 55 | ||
56 | EXPORT_SYMBOL(vfs_getattr); | 56 | EXPORT_SYMBOL(vfs_getattr); |
57 | 57 | ||
58 | int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat) | 58 | int vfs_fstat(unsigned int fd, struct kstat *stat) |
59 | { | 59 | { |
60 | struct path path; | 60 | struct file *f = fget(fd); |
61 | int error; | 61 | int error = -EBADF; |
62 | 62 | ||
63 | error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path); | 63 | if (f) { |
64 | if (!error) { | 64 | error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat); |
65 | error = vfs_getattr(path.mnt, path.dentry, stat); | 65 | fput(f); |
66 | path_put(&path); | ||
67 | } | 66 | } |
68 | return error; | 67 | return error; |
69 | } | 68 | } |
69 | EXPORT_SYMBOL(vfs_fstat); | ||
70 | 70 | ||
71 | int vfs_stat(char __user *name, struct kstat *stat) | 71 | int vfs_fstatat(int dfd, char __user *filename, struct kstat *stat, int flag) |
72 | { | 72 | { |
73 | return vfs_stat_fd(AT_FDCWD, name, stat); | 73 | struct path path; |
74 | } | 74 | int error = -EINVAL; |
75 | int lookup_flags = 0; | ||
75 | 76 | ||
76 | EXPORT_SYMBOL(vfs_stat); | 77 | if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) |
78 | goto out; | ||
77 | 79 | ||
78 | int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat) | 80 | if (!(flag & AT_SYMLINK_NOFOLLOW)) |
79 | { | 81 | lookup_flags |= LOOKUP_FOLLOW; |
80 | struct path path; | ||
81 | int error; | ||
82 | 82 | ||
83 | error = user_path_at(dfd, name, 0, &path); | 83 | error = user_path_at(dfd, filename, lookup_flags, &path); |
84 | if (!error) { | 84 | if (error) |
85 | error = vfs_getattr(path.mnt, path.dentry, stat); | 85 | goto out; |
86 | path_put(&path); | 86 | |
87 | } | 87 | error = vfs_getattr(path.mnt, path.dentry, stat); |
88 | path_put(&path); | ||
89 | out: | ||
88 | return error; | 90 | return error; |
89 | } | 91 | } |
92 | EXPORT_SYMBOL(vfs_fstatat); | ||
90 | 93 | ||
91 | int vfs_lstat(char __user *name, struct kstat *stat) | 94 | int vfs_stat(char __user *name, struct kstat *stat) |
92 | { | 95 | { |
93 | return vfs_lstat_fd(AT_FDCWD, name, stat); | 96 | return vfs_fstatat(AT_FDCWD, name, stat, 0); |
94 | } | 97 | } |
98 | EXPORT_SYMBOL(vfs_stat); | ||
95 | 99 | ||
96 | EXPORT_SYMBOL(vfs_lstat); | 100 | int vfs_lstat(char __user *name, struct kstat *stat) |
97 | |||
98 | int vfs_fstat(unsigned int fd, struct kstat *stat) | ||
99 | { | 101 | { |
100 | struct file *f = fget(fd); | 102 | return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW); |
101 | int error = -EBADF; | ||
102 | |||
103 | if (f) { | ||
104 | error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat); | ||
105 | fput(f); | ||
106 | } | ||
107 | return error; | ||
108 | } | 103 | } |
104 | EXPORT_SYMBOL(vfs_lstat); | ||
109 | 105 | ||
110 | EXPORT_SYMBOL(vfs_fstat); | ||
111 | 106 | ||
112 | #ifdef __ARCH_WANT_OLD_STAT | 107 | #ifdef __ARCH_WANT_OLD_STAT |
113 | 108 | ||
@@ -155,23 +150,25 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta | |||
155 | SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) | 150 | SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) |
156 | { | 151 | { |
157 | struct kstat stat; | 152 | struct kstat stat; |
158 | int error = vfs_stat_fd(AT_FDCWD, filename, &stat); | 153 | int error; |
159 | 154 | ||
160 | if (!error) | 155 | error = vfs_stat(filename, &stat); |
161 | error = cp_old_stat(&stat, statbuf); | 156 | if (error) |
157 | return error; | ||
162 | 158 | ||
163 | return error; | 159 | return cp_old_stat(&stat, statbuf); |
164 | } | 160 | } |
165 | 161 | ||
166 | SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) | 162 | SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) |
167 | { | 163 | { |
168 | struct kstat stat; | 164 | struct kstat stat; |
169 | int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); | 165 | int error; |
170 | 166 | ||
171 | if (!error) | 167 | error = vfs_lstat(filename, &stat); |
172 | error = cp_old_stat(&stat, statbuf); | 168 | if (error) |
169 | return error; | ||
173 | 170 | ||
174 | return error; | 171 | return cp_old_stat(&stat, statbuf); |
175 | } | 172 | } |
176 | 173 | ||
177 | SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf) | 174 | SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf) |
@@ -240,23 +237,23 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) | |||
240 | SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf) | 237 | SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf) |
241 | { | 238 | { |
242 | struct kstat stat; | 239 | struct kstat stat; |
243 | int error = vfs_stat_fd(AT_FDCWD, filename, &stat); | 240 | int error = vfs_stat(filename, &stat); |
244 | |||
245 | if (!error) | ||
246 | error = cp_new_stat(&stat, statbuf); | ||
247 | 241 | ||
248 | return error; | 242 | if (error) |
243 | return error; | ||
244 | return cp_new_stat(&stat, statbuf); | ||
249 | } | 245 | } |
250 | 246 | ||
251 | SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf) | 247 | SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf) |
252 | { | 248 | { |
253 | struct kstat stat; | 249 | struct kstat stat; |
254 | int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); | 250 | int error; |
255 | 251 | ||
256 | if (!error) | 252 | error = vfs_lstat(filename, &stat); |
257 | error = cp_new_stat(&stat, statbuf); | 253 | if (error) |
254 | return error; | ||
258 | 255 | ||
259 | return error; | 256 | return cp_new_stat(&stat, statbuf); |
260 | } | 257 | } |
261 | 258 | ||
262 | #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) | 259 | #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) |
@@ -264,21 +261,12 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, char __user *, filename, | |||
264 | struct stat __user *, statbuf, int, flag) | 261 | struct stat __user *, statbuf, int, flag) |
265 | { | 262 | { |
266 | struct kstat stat; | 263 | struct kstat stat; |
267 | int error = -EINVAL; | 264 | int error; |
268 | |||
269 | if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) | ||
270 | goto out; | ||
271 | |||
272 | if (flag & AT_SYMLINK_NOFOLLOW) | ||
273 | error = vfs_lstat_fd(dfd, filename, &stat); | ||
274 | else | ||
275 | error = vfs_stat_fd(dfd, filename, &stat); | ||
276 | |||
277 | if (!error) | ||
278 | error = cp_new_stat(&stat, statbuf); | ||
279 | 265 | ||
280 | out: | 266 | error = vfs_fstatat(dfd, filename, &stat, flag); |
281 | return error; | 267 | if (error) |
268 | return error; | ||
269 | return cp_new_stat(&stat, statbuf); | ||
282 | } | 270 | } |
283 | #endif | 271 | #endif |
284 | 272 | ||
@@ -404,21 +392,12 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename, | |||
404 | struct stat64 __user *, statbuf, int, flag) | 392 | struct stat64 __user *, statbuf, int, flag) |
405 | { | 393 | { |
406 | struct kstat stat; | 394 | struct kstat stat; |
407 | int error = -EINVAL; | 395 | int error; |
408 | |||
409 | if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) | ||
410 | goto out; | ||
411 | |||
412 | if (flag & AT_SYMLINK_NOFOLLOW) | ||
413 | error = vfs_lstat_fd(dfd, filename, &stat); | ||
414 | else | ||
415 | error = vfs_stat_fd(dfd, filename, &stat); | ||
416 | |||
417 | if (!error) | ||
418 | error = cp_new_stat64(&stat, statbuf); | ||
419 | 396 | ||
420 | out: | 397 | error = vfs_fstatat(dfd, filename, &stat, flag); |
421 | return error; | 398 | if (error) |
399 | return error; | ||
400 | return cp_new_stat64(&stat, statbuf); | ||
422 | } | 401 | } |
423 | #endif /* __ARCH_WANT_STAT64 */ | 402 | #endif /* __ARCH_WANT_STAT64 */ |
424 | 403 | ||
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 93e0c0281d45..9345806c8853 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c | |||
@@ -157,14 +157,9 @@ static ssize_t write(struct file *file, const char __user *userbuf, | |||
157 | count = size - offs; | 157 | count = size - offs; |
158 | } | 158 | } |
159 | 159 | ||
160 | temp = kmalloc(count, GFP_KERNEL); | 160 | temp = memdup_user(userbuf, count); |
161 | if (!temp) | 161 | if (IS_ERR(temp)) |
162 | return -ENOMEM; | 162 | return PTR_ERR(temp); |
163 | |||
164 | if (copy_from_user(temp, userbuf, count)) { | ||
165 | count = -EFAULT; | ||
166 | goto out_free; | ||
167 | } | ||
168 | 163 | ||
169 | mutex_lock(&bb->mutex); | 164 | mutex_lock(&bb->mutex); |
170 | 165 | ||
@@ -176,8 +171,6 @@ static ssize_t write(struct file *file, const char __user *userbuf, | |||
176 | if (count > 0) | 171 | if (count > 0) |
177 | *off = offs + count; | 172 | *off = offs + count; |
178 | 173 | ||
179 | out_free: | ||
180 | kfree(temp); | ||
181 | return count; | 174 | return count; |
182 | } | 175 | } |
183 | 176 | ||
diff --git a/fs/xattr.c b/fs/xattr.c index 197c4fcac032..d51b8f9db921 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -237,13 +237,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value, | |||
237 | if (size) { | 237 | if (size) { |
238 | if (size > XATTR_SIZE_MAX) | 238 | if (size > XATTR_SIZE_MAX) |
239 | return -E2BIG; | 239 | return -E2BIG; |
240 | kvalue = kmalloc(size, GFP_KERNEL); | 240 | kvalue = memdup_user(value, size); |
241 | if (!kvalue) | 241 | if (IS_ERR(kvalue)) |
242 | return -ENOMEM; | 242 | return PTR_ERR(kvalue); |
243 | if (copy_from_user(kvalue, value, size)) { | ||
244 | kfree(kvalue); | ||
245 | return -EFAULT; | ||
246 | } | ||
247 | } | 243 | } |
248 | 244 | ||
249 | error = vfs_setxattr(d, kname, kvalue, size, flags); | 245 | error = vfs_setxattr(d, kname, kvalue, size, flags); |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index d0b499418a7d..34eaab608e6e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -489,17 +489,12 @@ xfs_attrmulti_attr_set( | |||
489 | if (len > XATTR_SIZE_MAX) | 489 | if (len > XATTR_SIZE_MAX) |
490 | return EINVAL; | 490 | return EINVAL; |
491 | 491 | ||
492 | kbuf = kmalloc(len, GFP_KERNEL); | 492 | kbuf = memdup_user(ubuf, len); |
493 | if (!kbuf) | 493 | if (IS_ERR(kbuf)) |
494 | return ENOMEM; | 494 | return PTR_ERR(kbuf); |
495 | |||
496 | if (copy_from_user(kbuf, ubuf, len)) | ||
497 | goto out_kfree; | ||
498 | 495 | ||
499 | error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); | 496 | error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); |
500 | 497 | ||
501 | out_kfree: | ||
502 | kfree(kbuf); | ||
503 | return error; | 498 | return error; |
504 | } | 499 | } |
505 | 500 | ||
@@ -540,20 +535,16 @@ xfs_attrmulti_by_handle( | |||
540 | if (!size || size > 16 * PAGE_SIZE) | 535 | if (!size || size > 16 * PAGE_SIZE) |
541 | goto out_dput; | 536 | goto out_dput; |
542 | 537 | ||
543 | error = ENOMEM; | 538 | ops = memdup_user(am_hreq.ops, size); |
544 | ops = kmalloc(size, GFP_KERNEL); | 539 | if (IS_ERR(ops)) { |
545 | if (!ops) | 540 | error = PTR_ERR(ops); |
546 | goto out_dput; | 541 | goto out_dput; |
547 | 542 | } | |
548 | error = EFAULT; | ||
549 | if (copy_from_user(ops, am_hreq.ops, size)) | ||
550 | goto out_kfree_ops; | ||
551 | 543 | ||
552 | attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); | 544 | attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); |
553 | if (!attr_name) | 545 | if (!attr_name) |
554 | goto out_kfree_ops; | 546 | goto out_kfree_ops; |
555 | 547 | ||
556 | |||
557 | error = 0; | 548 | error = 0; |
558 | for (i = 0; i < am_hreq.opcount; i++) { | 549 | for (i = 0; i < am_hreq.opcount; i++) { |
559 | ops[i].am_error = strncpy_from_user(attr_name, | 550 | ops[i].am_error = strncpy_from_user(attr_name, |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index c70c4e3db790..0882d166239a 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c | |||
@@ -427,20 +427,16 @@ xfs_compat_attrmulti_by_handle( | |||
427 | if (!size || size > 16 * PAGE_SIZE) | 427 | if (!size || size > 16 * PAGE_SIZE) |
428 | goto out_dput; | 428 | goto out_dput; |
429 | 429 | ||
430 | error = ENOMEM; | 430 | ops = memdup_user(compat_ptr(am_hreq.ops), size); |
431 | ops = kmalloc(size, GFP_KERNEL); | 431 | if (IS_ERR(ops)) { |
432 | if (!ops) | 432 | error = PTR_ERR(ops); |
433 | goto out_dput; | 433 | goto out_dput; |
434 | 434 | } | |
435 | error = EFAULT; | ||
436 | if (copy_from_user(ops, compat_ptr(am_hreq.ops), size)) | ||
437 | goto out_kfree_ops; | ||
438 | 435 | ||
439 | attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); | 436 | attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); |
440 | if (!attr_name) | 437 | if (!attr_name) |
441 | goto out_kfree_ops; | 438 | goto out_kfree_ops; |
442 | 439 | ||
443 | |||
444 | error = 0; | 440 | error = 0; |
445 | for (i = 0; i < am_hreq.opcount; i++) { | 441 | for (i = 0; i < am_hreq.opcount; i++) { |
446 | ops[i].am_error = strncpy_from_user(attr_name, | 442 | ops[i].am_error = strncpy_from_user(attr_name, |