diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-05-07 05:17:13 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-05-07 05:17:34 -0400 |
commit | 44347d947f628060b92449702071bfe1d31dfb75 (patch) | |
tree | c6ed74610d5b3295df4296659f80f5feb94b28cc /fs | |
parent | d94fc523f3c35bd8013f04827e94756cbc0212f4 (diff) | |
parent | 413f81eba35d6ede9289b0c8a920c013a84fac71 (diff) |
Merge branch 'linus' into tracing/core
Merge reason: tracing/core was on a .30-rc1 base and was missing out on
on a handful of tracing fixes present in .30-rc5-almost.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs')
135 files changed, 2471 insertions, 2088 deletions
diff --git a/fs/afs/file.c b/fs/afs/file.c index 7a1d942ef68d..0149dab365e7 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c | |||
@@ -102,6 +102,7 @@ int afs_release(struct inode *inode, struct file *file) | |||
102 | return 0; | 102 | return 0; |
103 | } | 103 | } |
104 | 104 | ||
105 | #ifdef CONFIG_AFS_FSCACHE | ||
105 | /* | 106 | /* |
106 | * deal with notification that a page was read from the cache | 107 | * deal with notification that a page was read from the cache |
107 | */ | 108 | */ |
@@ -117,6 +118,7 @@ static void afs_file_readpage_read_complete(struct page *page, | |||
117 | SetPageUptodate(page); | 118 | SetPageUptodate(page); |
118 | unlock_page(page); | 119 | unlock_page(page); |
119 | } | 120 | } |
121 | #endif | ||
120 | 122 | ||
121 | /* | 123 | /* |
122 | * AFS read page from file, directory or symlink | 124 | * AFS read page from file, directory or symlink |
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c index bf8c8af98004..4eb4d8dfb2f1 100644 --- a/fs/autofs/dirhash.c +++ b/fs/autofs/dirhash.c | |||
@@ -39,10 +39,12 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, | |||
39 | { | 39 | { |
40 | struct autofs_dirhash *dh = &sbi->dirhash; | 40 | struct autofs_dirhash *dh = &sbi->dirhash; |
41 | struct autofs_dir_ent *ent; | 41 | struct autofs_dir_ent *ent; |
42 | struct dentry *dentry; | ||
43 | unsigned long timeout = sbi->exp_timeout; | 42 | unsigned long timeout = sbi->exp_timeout; |
44 | 43 | ||
45 | while (1) { | 44 | while (1) { |
45 | struct path path; | ||
46 | int umount_ok; | ||
47 | |||
46 | if ( list_empty(&dh->expiry_head) || sbi->catatonic ) | 48 | if ( list_empty(&dh->expiry_head) || sbi->catatonic ) |
47 | return NULL; /* No entries */ | 49 | return NULL; /* No entries */ |
48 | /* We keep the list sorted by last_usage and want old stuff */ | 50 | /* We keep the list sorted by last_usage and want old stuff */ |
@@ -57,17 +59,17 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, | |||
57 | return ent; /* Symlinks are always expirable */ | 59 | return ent; /* Symlinks are always expirable */ |
58 | 60 | ||
59 | /* Get the dentry for the autofs subdirectory */ | 61 | /* Get the dentry for the autofs subdirectory */ |
60 | dentry = ent->dentry; | 62 | path.dentry = ent->dentry; |
61 | 63 | ||
62 | if ( !dentry ) { | 64 | if (!path.dentry) { |
63 | /* Should only happen in catatonic mode */ | 65 | /* Should only happen in catatonic mode */ |
64 | printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name); | 66 | printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name); |
65 | autofs_delete_usage(ent); | 67 | autofs_delete_usage(ent); |
66 | continue; | 68 | continue; |
67 | } | 69 | } |
68 | 70 | ||
69 | if ( !dentry->d_inode ) { | 71 | if (!path.dentry->d_inode) { |
70 | dput(dentry); | 72 | dput(path.dentry); |
71 | printk("autofs: negative dentry on expiry queue: %s\n", | 73 | printk("autofs: negative dentry on expiry queue: %s\n", |
72 | ent->name); | 74 | ent->name); |
73 | autofs_delete_usage(ent); | 75 | autofs_delete_usage(ent); |
@@ -76,29 +78,29 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb, | |||
76 | 78 | ||
77 | /* Make sure entry is mounted and unused; note that dentry will | 79 | /* Make sure entry is mounted and unused; note that dentry will |
78 | point to the mounted-on-top root. */ | 80 | point to the mounted-on-top root. */ |
79 | if (!S_ISDIR(dentry->d_inode->i_mode)||!d_mountpoint(dentry)) { | 81 | if (!S_ISDIR(path.dentry->d_inode->i_mode) || |
82 | !d_mountpoint(path.dentry)) { | ||
80 | DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); | 83 | DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); |
81 | continue; | 84 | continue; |
82 | } | 85 | } |
83 | mntget(mnt); | 86 | path.mnt = mnt; |
84 | dget(dentry); | 87 | path_get(&path); |
85 | if (!follow_down(&mnt, &dentry)) { | 88 | if (!follow_down(&path.mnt, &path.dentry)) { |
86 | dput(dentry); | 89 | path_put(&path); |
87 | mntput(mnt); | ||
88 | DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); | 90 | DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name)); |
89 | continue; | 91 | continue; |
90 | } | 92 | } |
91 | while (d_mountpoint(dentry) && follow_down(&mnt, &dentry)) | 93 | while (d_mountpoint(path.dentry) && |
94 | follow_down(&path.mnt, &path.dentry)) | ||
92 | ; | 95 | ; |
93 | dput(dentry); | 96 | umount_ok = may_umount(path.mnt); |
97 | path_put(&path); | ||
94 | 98 | ||
95 | if ( may_umount(mnt) ) { | 99 | if (umount_ok) { |
96 | mntput(mnt); | ||
97 | DPRINTK(("autofs: signaling expire on %s\n", ent->name)); | 100 | DPRINTK(("autofs: signaling expire on %s\n", ent->name)); |
98 | return ent; /* Expirable! */ | 101 | return ent; /* Expirable! */ |
99 | } | 102 | } |
100 | DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name)); | 103 | DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name)); |
101 | mntput(mnt); | ||
102 | } | 104 | } |
103 | return NULL; /* No expirable entries */ | 105 | return NULL; /* No expirable entries */ |
104 | } | 106 | } |
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 9e5ae8a4f5c8..84168c0dcc2d 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c | |||
@@ -54,11 +54,10 @@ static int check_name(const char *name) | |||
54 | * Check a string doesn't overrun the chunk of | 54 | * Check a string doesn't overrun the chunk of |
55 | * memory we copied from user land. | 55 | * memory we copied from user land. |
56 | */ | 56 | */ |
57 | static int invalid_str(char *str, void *end) | 57 | static int invalid_str(char *str, size_t size) |
58 | { | 58 | { |
59 | while ((void *) str <= end) | 59 | if (memchr(str, 0, size)) |
60 | if (!*str++) | 60 | return 0; |
61 | return 0; | ||
62 | return -EINVAL; | 61 | return -EINVAL; |
63 | } | 62 | } |
64 | 63 | ||
@@ -138,8 +137,7 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) | |||
138 | } | 137 | } |
139 | 138 | ||
140 | if (param->size > sizeof(*param)) { | 139 | if (param->size > sizeof(*param)) { |
141 | err = invalid_str(param->path, | 140 | err = invalid_str(param->path, param->size - sizeof(*param)); |
142 | (void *) ((size_t) param + param->size)); | ||
143 | if (err) { | 141 | if (err) { |
144 | AUTOFS_WARN( | 142 | AUTOFS_WARN( |
145 | "path string terminator missing for cmd(0x%08x)", | 143 | "path string terminator missing for cmd(0x%08x)", |
@@ -488,7 +486,7 @@ static int autofs_dev_ioctl_requester(struct file *fp, | |||
488 | } | 486 | } |
489 | 487 | ||
490 | path = param->path; | 488 | path = param->path; |
491 | devid = sbi->sb->s_dev; | 489 | devid = new_encode_dev(sbi->sb->s_dev); |
492 | 490 | ||
493 | param->requester.uid = param->requester.gid = -1; | 491 | param->requester.uid = param->requester.gid = -1; |
494 | 492 | ||
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 75f7ddacf7d6..3077d8f16523 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c | |||
@@ -70,8 +70,10 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) | |||
70 | * Otherwise it's an offset mount and we need to check | 70 | * Otherwise it's an offset mount and we need to check |
71 | * if we can umount its mount, if there is one. | 71 | * if we can umount its mount, if there is one. |
72 | */ | 72 | */ |
73 | if (!d_mountpoint(dentry)) | 73 | if (!d_mountpoint(dentry)) { |
74 | status = 0; | ||
74 | goto done; | 75 | goto done; |
76 | } | ||
75 | } | 77 | } |
76 | 78 | ||
77 | /* Update the expiry counter if fs is busy */ | 79 | /* Update the expiry counter if fs is busy */ |
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 70cfc4b84ae0..fdb66faa24f1 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c | |||
@@ -1388,7 +1388,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus, | |||
1388 | prstatus->pr_sigpend = p->pending.signal.sig[0]; | 1388 | prstatus->pr_sigpend = p->pending.signal.sig[0]; |
1389 | prstatus->pr_sighold = p->blocked.sig[0]; | 1389 | prstatus->pr_sighold = p->blocked.sig[0]; |
1390 | prstatus->pr_pid = task_pid_vnr(p); | 1390 | prstatus->pr_pid = task_pid_vnr(p); |
1391 | prstatus->pr_ppid = task_pid_vnr(p->parent); | 1391 | prstatus->pr_ppid = task_pid_vnr(p->real_parent); |
1392 | prstatus->pr_pgrp = task_pgrp_vnr(p); | 1392 | prstatus->pr_pgrp = task_pgrp_vnr(p); |
1393 | prstatus->pr_sid = task_session_vnr(p); | 1393 | prstatus->pr_sid = task_session_vnr(p); |
1394 | if (thread_group_leader(p)) { | 1394 | if (thread_group_leader(p)) { |
@@ -1433,7 +1433,7 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, | |||
1433 | psinfo->pr_psargs[len] = 0; | 1433 | psinfo->pr_psargs[len] = 0; |
1434 | 1434 | ||
1435 | psinfo->pr_pid = task_pid_vnr(p); | 1435 | psinfo->pr_pid = task_pid_vnr(p); |
1436 | psinfo->pr_ppid = task_pid_vnr(p->parent); | 1436 | psinfo->pr_ppid = task_pid_vnr(p->real_parent); |
1437 | psinfo->pr_pgrp = task_pgrp_vnr(p); | 1437 | psinfo->pr_pgrp = task_pgrp_vnr(p); |
1438 | psinfo->pr_sid = task_session_vnr(p); | 1438 | psinfo->pr_sid = task_session_vnr(p); |
1439 | 1439 | ||
@@ -175,14 +175,6 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, | |||
175 | struct bio_vec *bvl; | 175 | struct bio_vec *bvl; |
176 | 176 | ||
177 | /* | 177 | /* |
178 | * If 'bs' is given, lookup the pool and do the mempool alloc. | ||
179 | * If not, this is a bio_kmalloc() allocation and just do a | ||
180 | * kzalloc() for the exact number of vecs right away. | ||
181 | */ | ||
182 | if (!bs) | ||
183 | bvl = kmalloc(nr * sizeof(struct bio_vec), gfp_mask); | ||
184 | |||
185 | /* | ||
186 | * see comment near bvec_array define! | 178 | * see comment near bvec_array define! |
187 | */ | 179 | */ |
188 | switch (nr) { | 180 | switch (nr) { |
@@ -260,21 +252,6 @@ void bio_free(struct bio *bio, struct bio_set *bs) | |||
260 | mempool_free(p, bs->bio_pool); | 252 | mempool_free(p, bs->bio_pool); |
261 | } | 253 | } |
262 | 254 | ||
263 | /* | ||
264 | * default destructor for a bio allocated with bio_alloc_bioset() | ||
265 | */ | ||
266 | static void bio_fs_destructor(struct bio *bio) | ||
267 | { | ||
268 | bio_free(bio, fs_bio_set); | ||
269 | } | ||
270 | |||
271 | static void bio_kmalloc_destructor(struct bio *bio) | ||
272 | { | ||
273 | if (bio_has_allocated_vec(bio)) | ||
274 | kfree(bio->bi_io_vec); | ||
275 | kfree(bio); | ||
276 | } | ||
277 | |||
278 | void bio_init(struct bio *bio) | 255 | void bio_init(struct bio *bio) |
279 | { | 256 | { |
280 | memset(bio, 0, sizeof(*bio)); | 257 | memset(bio, 0, sizeof(*bio)); |
@@ -301,21 +278,15 @@ void bio_init(struct bio *bio) | |||
301 | **/ | 278 | **/ |
302 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | 279 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) |
303 | { | 280 | { |
281 | unsigned long idx = BIO_POOL_NONE; | ||
304 | struct bio_vec *bvl = NULL; | 282 | struct bio_vec *bvl = NULL; |
305 | struct bio *bio = NULL; | 283 | struct bio *bio; |
306 | unsigned long idx = 0; | 284 | void *p; |
307 | void *p = NULL; | 285 | |
308 | 286 | p = mempool_alloc(bs->bio_pool, gfp_mask); | |
309 | if (bs) { | 287 | if (unlikely(!p)) |
310 | p = mempool_alloc(bs->bio_pool, gfp_mask); | 288 | return NULL; |
311 | if (!p) | 289 | bio = p + bs->front_pad; |
312 | goto err; | ||
313 | bio = p + bs->front_pad; | ||
314 | } else { | ||
315 | bio = kmalloc(sizeof(*bio), gfp_mask); | ||
316 | if (!bio) | ||
317 | goto err; | ||
318 | } | ||
319 | 290 | ||
320 | bio_init(bio); | 291 | bio_init(bio); |
321 | 292 | ||
@@ -332,22 +303,33 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | |||
332 | 303 | ||
333 | nr_iovecs = bvec_nr_vecs(idx); | 304 | nr_iovecs = bvec_nr_vecs(idx); |
334 | } | 305 | } |
306 | out_set: | ||
335 | bio->bi_flags |= idx << BIO_POOL_OFFSET; | 307 | bio->bi_flags |= idx << BIO_POOL_OFFSET; |
336 | bio->bi_max_vecs = nr_iovecs; | 308 | bio->bi_max_vecs = nr_iovecs; |
337 | out_set: | ||
338 | bio->bi_io_vec = bvl; | 309 | bio->bi_io_vec = bvl; |
339 | |||
340 | return bio; | 310 | return bio; |
341 | 311 | ||
342 | err_free: | 312 | err_free: |
343 | if (bs) | 313 | mempool_free(p, bs->bio_pool); |
344 | mempool_free(p, bs->bio_pool); | ||
345 | else | ||
346 | kfree(bio); | ||
347 | err: | ||
348 | return NULL; | 314 | return NULL; |
349 | } | 315 | } |
350 | 316 | ||
317 | static void bio_fs_destructor(struct bio *bio) | ||
318 | { | ||
319 | bio_free(bio, fs_bio_set); | ||
320 | } | ||
321 | |||
322 | /** | ||
323 | * bio_alloc - allocate a new bio, memory pool backed | ||
324 | * @gfp_mask: allocation mask to use | ||
325 | * @nr_iovecs: number of iovecs | ||
326 | * | ||
327 | * Allocate a new bio with @nr_iovecs bvecs. If @gfp_mask | ||
328 | * contains __GFP_WAIT, the allocation is guaranteed to succeed. | ||
329 | * | ||
330 | * RETURNS: | ||
331 | * Pointer to new bio on success, NULL on failure. | ||
332 | */ | ||
351 | struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) | 333 | struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) |
352 | { | 334 | { |
353 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); | 335 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); |
@@ -358,19 +340,45 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) | |||
358 | return bio; | 340 | return bio; |
359 | } | 341 | } |
360 | 342 | ||
361 | /* | 343 | static void bio_kmalloc_destructor(struct bio *bio) |
362 | * Like bio_alloc(), but doesn't use a mempool backing. This means that | 344 | { |
363 | * it CAN fail, but while bio_alloc() can only be used for allocations | 345 | if (bio_integrity(bio)) |
364 | * that have a short (finite) life span, bio_kmalloc() should be used | 346 | bio_integrity_free(bio); |
365 | * for more permanent bio allocations (like allocating some bio's for | 347 | kfree(bio); |
366 | * initalization or setup purposes). | 348 | } |
367 | */ | 349 | |
350 | /** | ||
351 | * bio_alloc - allocate a bio for I/O | ||
352 | * @gfp_mask: the GFP_ mask given to the slab allocator | ||
353 | * @nr_iovecs: number of iovecs to pre-allocate | ||
354 | * | ||
355 | * Description: | ||
356 | * bio_alloc will allocate a bio and associated bio_vec array that can hold | ||
357 | * at least @nr_iovecs entries. Allocations will be done from the | ||
358 | * fs_bio_set. Also see @bio_alloc_bioset. | ||
359 | * | ||
360 | * If %__GFP_WAIT is set, then bio_alloc will always be able to allocate | ||
361 | * a bio. This is due to the mempool guarantees. To make this work, callers | ||
362 | * must never allocate more than 1 bio at the time from this pool. Callers | ||
363 | * that need to allocate more than 1 bio must always submit the previously | ||
364 | * allocate bio for IO before attempting to allocate a new one. Failure to | ||
365 | * do so can cause livelocks under memory pressure. | ||
366 | * | ||
367 | **/ | ||
368 | struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) | 368 | struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) |
369 | { | 369 | { |
370 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); | 370 | struct bio *bio; |
371 | 371 | ||
372 | if (bio) | 372 | bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec), |
373 | bio->bi_destructor = bio_kmalloc_destructor; | 373 | gfp_mask); |
374 | if (unlikely(!bio)) | ||
375 | return NULL; | ||
376 | |||
377 | bio_init(bio); | ||
378 | bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET; | ||
379 | bio->bi_max_vecs = nr_iovecs; | ||
380 | bio->bi_io_vec = bio->bi_inline_vecs; | ||
381 | bio->bi_destructor = bio_kmalloc_destructor; | ||
374 | 382 | ||
375 | return bio; | 383 | return bio; |
376 | } | 384 | } |
@@ -809,12 +817,15 @@ struct bio *bio_copy_user_iov(struct request_queue *q, | |||
809 | len += iov[i].iov_len; | 817 | len += iov[i].iov_len; |
810 | } | 818 | } |
811 | 819 | ||
820 | if (offset) | ||
821 | nr_pages++; | ||
822 | |||
812 | bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); | 823 | bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); |
813 | if (!bmd) | 824 | if (!bmd) |
814 | return ERR_PTR(-ENOMEM); | 825 | return ERR_PTR(-ENOMEM); |
815 | 826 | ||
816 | ret = -ENOMEM; | 827 | ret = -ENOMEM; |
817 | bio = bio_alloc(gfp_mask, nr_pages); | 828 | bio = bio_kmalloc(gfp_mask, nr_pages); |
818 | if (!bio) | 829 | if (!bio) |
819 | goto out_bmd; | 830 | goto out_bmd; |
820 | 831 | ||
@@ -938,7 +949,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, | |||
938 | if (!nr_pages) | 949 | if (!nr_pages) |
939 | return ERR_PTR(-EINVAL); | 950 | return ERR_PTR(-EINVAL); |
940 | 951 | ||
941 | bio = bio_alloc(gfp_mask, nr_pages); | 952 | bio = bio_kmalloc(gfp_mask, nr_pages); |
942 | if (!bio) | 953 | if (!bio) |
943 | return ERR_PTR(-ENOMEM); | 954 | return ERR_PTR(-ENOMEM); |
944 | 955 | ||
@@ -1122,7 +1133,7 @@ static struct bio *__bio_map_kern(struct request_queue *q, void *data, | |||
1122 | int offset, i; | 1133 | int offset, i; |
1123 | struct bio *bio; | 1134 | struct bio *bio; |
1124 | 1135 | ||
1125 | bio = bio_alloc(gfp_mask, nr_pages); | 1136 | bio = bio_kmalloc(gfp_mask, nr_pages); |
1126 | if (!bio) | 1137 | if (!bio) |
1127 | return ERR_PTR(-ENOMEM); | 1138 | return ERR_PTR(-ENOMEM); |
1128 | 1139 | ||
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 9adf5e4f7e96..94212844a9bc 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -1,25 +1,10 @@ | |||
1 | ifneq ($(KERNELRELEASE),) | ||
2 | # kbuild part of makefile | ||
3 | 1 | ||
4 | obj-$(CONFIG_BTRFS_FS) := btrfs.o | 2 | obj-$(CONFIG_BTRFS_FS) := btrfs.o |
5 | btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | 3 | |
4 | btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | ||
6 | file-item.o inode-item.o inode-map.o disk-io.o \ | 5 | file-item.o inode-item.o inode-map.o disk-io.o \ |
7 | transaction.o inode.o file.o tree-defrag.o \ | 6 | transaction.o inode.o file.o tree-defrag.o \ |
8 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 7 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
9 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
10 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ | 9 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ |
11 | compression.o delayed-ref.o | 10 | compression.o delayed-ref.o |
12 | else | ||
13 | |||
14 | # Normal Makefile | ||
15 | |||
16 | KERNELDIR := /lib/modules/`uname -r`/build | ||
17 | all: | ||
18 | $(MAKE) -C $(KERNELDIR) M=`pwd` CONFIG_BTRFS_FS=m modules | ||
19 | |||
20 | modules_install: | ||
21 | $(MAKE) -C $(KERNELDIR) M=`pwd` modules_install | ||
22 | clean: | ||
23 | $(MAKE) -C $(KERNELDIR) M=`pwd` clean | ||
24 | |||
25 | endif | ||
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 7fdd184a528d..cbba000dccbe 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
@@ -60,15 +60,20 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
60 | return ERR_PTR(-EINVAL); | 60 | return ERR_PTR(-EINVAL); |
61 | } | 61 | } |
62 | 62 | ||
63 | /* Handle the cached NULL acl case without locking */ | ||
64 | acl = ACCESS_ONCE(*p_acl); | ||
65 | if (!acl) | ||
66 | return acl; | ||
67 | |||
63 | spin_lock(&inode->i_lock); | 68 | spin_lock(&inode->i_lock); |
64 | if (*p_acl != BTRFS_ACL_NOT_CACHED) | 69 | acl = *p_acl; |
65 | acl = posix_acl_dup(*p_acl); | 70 | if (acl != BTRFS_ACL_NOT_CACHED) |
71 | acl = posix_acl_dup(acl); | ||
66 | spin_unlock(&inode->i_lock); | 72 | spin_unlock(&inode->i_lock); |
67 | 73 | ||
68 | if (acl) | 74 | if (acl != BTRFS_ACL_NOT_CACHED) |
69 | return acl; | 75 | return acl; |
70 | 76 | ||
71 | |||
72 | size = __btrfs_getxattr(inode, name, "", 0); | 77 | size = __btrfs_getxattr(inode, name, "", 0); |
73 | if (size > 0) { | 78 | if (size > 0) { |
74 | value = kzalloc(size, GFP_NOFS); | 79 | value = kzalloc(size, GFP_NOFS); |
@@ -80,9 +85,12 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
80 | btrfs_update_cached_acl(inode, p_acl, acl); | 85 | btrfs_update_cached_acl(inode, p_acl, acl); |
81 | } | 86 | } |
82 | kfree(value); | 87 | kfree(value); |
83 | } else if (size == -ENOENT) { | 88 | } else if (size == -ENOENT || size == -ENODATA || size == 0) { |
89 | /* FIXME, who returns -ENOENT? I think nobody */ | ||
84 | acl = NULL; | 90 | acl = NULL; |
85 | btrfs_update_cached_acl(inode, p_acl, acl); | 91 | btrfs_update_cached_acl(inode, p_acl, acl); |
92 | } else { | ||
93 | acl = ERR_PTR(-EIO); | ||
86 | } | 94 | } |
87 | 95 | ||
88 | return acl; | 96 | return acl; |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 51bfdfc8fcda..502c3d61de62 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #define WORK_QUEUED_BIT 0 | 25 | #define WORK_QUEUED_BIT 0 |
26 | #define WORK_DONE_BIT 1 | 26 | #define WORK_DONE_BIT 1 |
27 | #define WORK_ORDER_DONE_BIT 2 | 27 | #define WORK_ORDER_DONE_BIT 2 |
28 | #define WORK_HIGH_PRIO_BIT 3 | ||
28 | 29 | ||
29 | /* | 30 | /* |
30 | * container for the kthread task pointer and the list of pending work | 31 | * container for the kthread task pointer and the list of pending work |
@@ -36,6 +37,7 @@ struct btrfs_worker_thread { | |||
36 | 37 | ||
37 | /* list of struct btrfs_work that are waiting for service */ | 38 | /* list of struct btrfs_work that are waiting for service */ |
38 | struct list_head pending; | 39 | struct list_head pending; |
40 | struct list_head prio_pending; | ||
39 | 41 | ||
40 | /* list of worker threads from struct btrfs_workers */ | 42 | /* list of worker threads from struct btrfs_workers */ |
41 | struct list_head worker_list; | 43 | struct list_head worker_list; |
@@ -103,10 +105,16 @@ static noinline int run_ordered_completions(struct btrfs_workers *workers, | |||
103 | 105 | ||
104 | spin_lock_irqsave(&workers->lock, flags); | 106 | spin_lock_irqsave(&workers->lock, flags); |
105 | 107 | ||
106 | while (!list_empty(&workers->order_list)) { | 108 | while (1) { |
107 | work = list_entry(workers->order_list.next, | 109 | if (!list_empty(&workers->prio_order_list)) { |
108 | struct btrfs_work, order_list); | 110 | work = list_entry(workers->prio_order_list.next, |
109 | 111 | struct btrfs_work, order_list); | |
112 | } else if (!list_empty(&workers->order_list)) { | ||
113 | work = list_entry(workers->order_list.next, | ||
114 | struct btrfs_work, order_list); | ||
115 | } else { | ||
116 | break; | ||
117 | } | ||
110 | if (!test_bit(WORK_DONE_BIT, &work->flags)) | 118 | if (!test_bit(WORK_DONE_BIT, &work->flags)) |
111 | break; | 119 | break; |
112 | 120 | ||
@@ -143,8 +151,14 @@ static int worker_loop(void *arg) | |||
143 | do { | 151 | do { |
144 | spin_lock_irq(&worker->lock); | 152 | spin_lock_irq(&worker->lock); |
145 | again_locked: | 153 | again_locked: |
146 | while (!list_empty(&worker->pending)) { | 154 | while (1) { |
147 | cur = worker->pending.next; | 155 | if (!list_empty(&worker->prio_pending)) |
156 | cur = worker->prio_pending.next; | ||
157 | else if (!list_empty(&worker->pending)) | ||
158 | cur = worker->pending.next; | ||
159 | else | ||
160 | break; | ||
161 | |||
148 | work = list_entry(cur, struct btrfs_work, list); | 162 | work = list_entry(cur, struct btrfs_work, list); |
149 | list_del(&work->list); | 163 | list_del(&work->list); |
150 | clear_bit(WORK_QUEUED_BIT, &work->flags); | 164 | clear_bit(WORK_QUEUED_BIT, &work->flags); |
@@ -163,7 +177,6 @@ again_locked: | |||
163 | 177 | ||
164 | spin_lock_irq(&worker->lock); | 178 | spin_lock_irq(&worker->lock); |
165 | check_idle_worker(worker); | 179 | check_idle_worker(worker); |
166 | |||
167 | } | 180 | } |
168 | if (freezing(current)) { | 181 | if (freezing(current)) { |
169 | worker->working = 0; | 182 | worker->working = 0; |
@@ -178,7 +191,8 @@ again_locked: | |||
178 | * jump_in? | 191 | * jump_in? |
179 | */ | 192 | */ |
180 | smp_mb(); | 193 | smp_mb(); |
181 | if (!list_empty(&worker->pending)) | 194 | if (!list_empty(&worker->pending) || |
195 | !list_empty(&worker->prio_pending)) | ||
182 | continue; | 196 | continue; |
183 | 197 | ||
184 | /* | 198 | /* |
@@ -191,7 +205,8 @@ again_locked: | |||
191 | */ | 205 | */ |
192 | schedule_timeout(1); | 206 | schedule_timeout(1); |
193 | smp_mb(); | 207 | smp_mb(); |
194 | if (!list_empty(&worker->pending)) | 208 | if (!list_empty(&worker->pending) || |
209 | !list_empty(&worker->prio_pending)) | ||
195 | continue; | 210 | continue; |
196 | 211 | ||
197 | if (kthread_should_stop()) | 212 | if (kthread_should_stop()) |
@@ -200,7 +215,8 @@ again_locked: | |||
200 | /* still no more work?, sleep for real */ | 215 | /* still no more work?, sleep for real */ |
201 | spin_lock_irq(&worker->lock); | 216 | spin_lock_irq(&worker->lock); |
202 | set_current_state(TASK_INTERRUPTIBLE); | 217 | set_current_state(TASK_INTERRUPTIBLE); |
203 | if (!list_empty(&worker->pending)) | 218 | if (!list_empty(&worker->pending) || |
219 | !list_empty(&worker->prio_pending)) | ||
204 | goto again_locked; | 220 | goto again_locked; |
205 | 221 | ||
206 | /* | 222 | /* |
@@ -248,6 +264,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max) | |||
248 | INIT_LIST_HEAD(&workers->worker_list); | 264 | INIT_LIST_HEAD(&workers->worker_list); |
249 | INIT_LIST_HEAD(&workers->idle_list); | 265 | INIT_LIST_HEAD(&workers->idle_list); |
250 | INIT_LIST_HEAD(&workers->order_list); | 266 | INIT_LIST_HEAD(&workers->order_list); |
267 | INIT_LIST_HEAD(&workers->prio_order_list); | ||
251 | spin_lock_init(&workers->lock); | 268 | spin_lock_init(&workers->lock); |
252 | workers->max_workers = max; | 269 | workers->max_workers = max; |
253 | workers->idle_thresh = 32; | 270 | workers->idle_thresh = 32; |
@@ -273,6 +290,7 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) | |||
273 | } | 290 | } |
274 | 291 | ||
275 | INIT_LIST_HEAD(&worker->pending); | 292 | INIT_LIST_HEAD(&worker->pending); |
293 | INIT_LIST_HEAD(&worker->prio_pending); | ||
276 | INIT_LIST_HEAD(&worker->worker_list); | 294 | INIT_LIST_HEAD(&worker->worker_list); |
277 | spin_lock_init(&worker->lock); | 295 | spin_lock_init(&worker->lock); |
278 | atomic_set(&worker->num_pending, 0); | 296 | atomic_set(&worker->num_pending, 0); |
@@ -396,7 +414,10 @@ int btrfs_requeue_work(struct btrfs_work *work) | |||
396 | goto out; | 414 | goto out; |
397 | 415 | ||
398 | spin_lock_irqsave(&worker->lock, flags); | 416 | spin_lock_irqsave(&worker->lock, flags); |
399 | list_add_tail(&work->list, &worker->pending); | 417 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) |
418 | list_add_tail(&work->list, &worker->prio_pending); | ||
419 | else | ||
420 | list_add_tail(&work->list, &worker->pending); | ||
400 | atomic_inc(&worker->num_pending); | 421 | atomic_inc(&worker->num_pending); |
401 | 422 | ||
402 | /* by definition we're busy, take ourselves off the idle | 423 | /* by definition we're busy, take ourselves off the idle |
@@ -422,6 +443,11 @@ out: | |||
422 | return 0; | 443 | return 0; |
423 | } | 444 | } |
424 | 445 | ||
446 | void btrfs_set_work_high_prio(struct btrfs_work *work) | ||
447 | { | ||
448 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); | ||
449 | } | ||
450 | |||
425 | /* | 451 | /* |
426 | * places a struct btrfs_work into the pending queue of one of the kthreads | 452 | * places a struct btrfs_work into the pending queue of one of the kthreads |
427 | */ | 453 | */ |
@@ -438,7 +464,12 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
438 | worker = find_worker(workers); | 464 | worker = find_worker(workers); |
439 | if (workers->ordered) { | 465 | if (workers->ordered) { |
440 | spin_lock_irqsave(&workers->lock, flags); | 466 | spin_lock_irqsave(&workers->lock, flags); |
441 | list_add_tail(&work->order_list, &workers->order_list); | 467 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { |
468 | list_add_tail(&work->order_list, | ||
469 | &workers->prio_order_list); | ||
470 | } else { | ||
471 | list_add_tail(&work->order_list, &workers->order_list); | ||
472 | } | ||
442 | spin_unlock_irqrestore(&workers->lock, flags); | 473 | spin_unlock_irqrestore(&workers->lock, flags); |
443 | } else { | 474 | } else { |
444 | INIT_LIST_HEAD(&work->order_list); | 475 | INIT_LIST_HEAD(&work->order_list); |
@@ -446,7 +477,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | |||
446 | 477 | ||
447 | spin_lock_irqsave(&worker->lock, flags); | 478 | spin_lock_irqsave(&worker->lock, flags); |
448 | 479 | ||
449 | list_add_tail(&work->list, &worker->pending); | 480 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) |
481 | list_add_tail(&work->list, &worker->prio_pending); | ||
482 | else | ||
483 | list_add_tail(&work->list, &worker->pending); | ||
450 | atomic_inc(&worker->num_pending); | 484 | atomic_inc(&worker->num_pending); |
451 | check_busy_worker(worker); | 485 | check_busy_worker(worker); |
452 | 486 | ||
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 31be4ed8b63e..1b511c109db6 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
@@ -85,6 +85,7 @@ struct btrfs_workers { | |||
85 | * of work items waiting for completion | 85 | * of work items waiting for completion |
86 | */ | 86 | */ |
87 | struct list_head order_list; | 87 | struct list_head order_list; |
88 | struct list_head prio_order_list; | ||
88 | 89 | ||
89 | /* lock for finding the next worker thread to queue on */ | 90 | /* lock for finding the next worker thread to queue on */ |
90 | spinlock_t lock; | 91 | spinlock_t lock; |
@@ -98,4 +99,5 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); | |||
98 | int btrfs_stop_workers(struct btrfs_workers *workers); | 99 | int btrfs_stop_workers(struct btrfs_workers *workers); |
99 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); | 100 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max); |
100 | int btrfs_requeue_work(struct btrfs_work *work); | 101 | int btrfs_requeue_work(struct btrfs_work *work); |
102 | void btrfs_set_work_high_prio(struct btrfs_work *work); | ||
101 | #endif | 103 | #endif |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e5b2533b691a..a99f1c2a710d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -1325,12 +1325,12 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1325 | int ret = 0; | 1325 | int ret = 0; |
1326 | int blocksize; | 1326 | int blocksize; |
1327 | 1327 | ||
1328 | parent = path->nodes[level - 1]; | 1328 | parent = path->nodes[level + 1]; |
1329 | if (!parent) | 1329 | if (!parent) |
1330 | return 0; | 1330 | return 0; |
1331 | 1331 | ||
1332 | nritems = btrfs_header_nritems(parent); | 1332 | nritems = btrfs_header_nritems(parent); |
1333 | slot = path->slots[level]; | 1333 | slot = path->slots[level + 1]; |
1334 | blocksize = btrfs_level_size(root, level); | 1334 | blocksize = btrfs_level_size(root, level); |
1335 | 1335 | ||
1336 | if (slot > 0) { | 1336 | if (slot > 0) { |
@@ -1341,7 +1341,7 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1341 | block1 = 0; | 1341 | block1 = 0; |
1342 | free_extent_buffer(eb); | 1342 | free_extent_buffer(eb); |
1343 | } | 1343 | } |
1344 | if (slot < nritems) { | 1344 | if (slot + 1 < nritems) { |
1345 | block2 = btrfs_node_blockptr(parent, slot + 1); | 1345 | block2 = btrfs_node_blockptr(parent, slot + 1); |
1346 | gen = btrfs_node_ptr_generation(parent, slot + 1); | 1346 | gen = btrfs_node_ptr_generation(parent, slot + 1); |
1347 | eb = btrfs_find_tree_block(root, block2, blocksize); | 1347 | eb = btrfs_find_tree_block(root, block2, blocksize); |
@@ -1351,7 +1351,11 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1351 | } | 1351 | } |
1352 | if (block1 || block2) { | 1352 | if (block1 || block2) { |
1353 | ret = -EAGAIN; | 1353 | ret = -EAGAIN; |
1354 | |||
1355 | /* release the whole path */ | ||
1354 | btrfs_release_path(root, path); | 1356 | btrfs_release_path(root, path); |
1357 | |||
1358 | /* read the blocks */ | ||
1355 | if (block1) | 1359 | if (block1) |
1356 | readahead_tree_block(root, block1, blocksize, 0); | 1360 | readahead_tree_block(root, block1, blocksize, 0); |
1357 | if (block2) | 1361 | if (block2) |
@@ -1361,7 +1365,7 @@ static noinline int reada_for_balance(struct btrfs_root *root, | |||
1361 | eb = read_tree_block(root, block1, blocksize, 0); | 1365 | eb = read_tree_block(root, block1, blocksize, 0); |
1362 | free_extent_buffer(eb); | 1366 | free_extent_buffer(eb); |
1363 | } | 1367 | } |
1364 | if (block1) { | 1368 | if (block2) { |
1365 | eb = read_tree_block(root, block2, blocksize, 0); | 1369 | eb = read_tree_block(root, block2, blocksize, 0); |
1366 | free_extent_buffer(eb); | 1370 | free_extent_buffer(eb); |
1367 | } | 1371 | } |
@@ -1481,12 +1485,15 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1481 | * of the btree by dropping locks before | 1485 | * of the btree by dropping locks before |
1482 | * we read. | 1486 | * we read. |
1483 | */ | 1487 | */ |
1484 | btrfs_release_path(NULL, p); | 1488 | btrfs_unlock_up_safe(p, level + 1); |
1489 | btrfs_set_path_blocking(p); | ||
1490 | |||
1485 | if (tmp) | 1491 | if (tmp) |
1486 | free_extent_buffer(tmp); | 1492 | free_extent_buffer(tmp); |
1487 | if (p->reada) | 1493 | if (p->reada) |
1488 | reada_for_search(root, p, level, slot, key->objectid); | 1494 | reada_for_search(root, p, level, slot, key->objectid); |
1489 | 1495 | ||
1496 | btrfs_release_path(NULL, p); | ||
1490 | tmp = read_tree_block(root, blocknr, blocksize, gen); | 1497 | tmp = read_tree_block(root, blocknr, blocksize, gen); |
1491 | if (tmp) | 1498 | if (tmp) |
1492 | free_extent_buffer(tmp); | 1499 | free_extent_buffer(tmp); |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ad96495dedc5..4414a5d9983a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -881,6 +881,9 @@ struct btrfs_fs_info { | |||
881 | u64 metadata_alloc_profile; | 881 | u64 metadata_alloc_profile; |
882 | u64 system_alloc_profile; | 882 | u64 system_alloc_profile; |
883 | 883 | ||
884 | unsigned data_chunk_allocations; | ||
885 | unsigned metadata_ratio; | ||
886 | |||
884 | void *bdev_holder; | 887 | void *bdev_holder; |
885 | }; | 888 | }; |
886 | 889 | ||
@@ -2174,7 +2177,8 @@ int btrfs_check_file(struct btrfs_root *root, struct inode *inode); | |||
2174 | extern struct file_operations btrfs_file_operations; | 2177 | extern struct file_operations btrfs_file_operations; |
2175 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 2178 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
2176 | struct btrfs_root *root, struct inode *inode, | 2179 | struct btrfs_root *root, struct inode *inode, |
2177 | u64 start, u64 end, u64 inline_limit, u64 *hint_block); | 2180 | u64 start, u64 end, u64 locked_end, |
2181 | u64 inline_limit, u64 *hint_block); | ||
2178 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | 2182 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, |
2179 | struct btrfs_root *root, | 2183 | struct btrfs_root *root, |
2180 | struct inode *inode, u64 start, u64 end); | 2184 | struct inode *inode, u64 start, u64 end); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 92caa8035f36..0ff16d3331da 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -232,10 +232,14 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
232 | memcpy(&found, result, csum_size); | 232 | memcpy(&found, result, csum_size); |
233 | 233 | ||
234 | read_extent_buffer(buf, &val, 0, csum_size); | 234 | read_extent_buffer(buf, &val, 0, csum_size); |
235 | printk(KERN_INFO "btrfs: %s checksum verify failed " | 235 | if (printk_ratelimit()) { |
236 | "on %llu wanted %X found %X level %d\n", | 236 | printk(KERN_INFO "btrfs: %s checksum verify " |
237 | root->fs_info->sb->s_id, | 237 | "failed on %llu wanted %X found %X " |
238 | buf->start, val, found, btrfs_header_level(buf)); | 238 | "level %d\n", |
239 | root->fs_info->sb->s_id, | ||
240 | (unsigned long long)buf->start, val, found, | ||
241 | btrfs_header_level(buf)); | ||
242 | } | ||
239 | if (result != (char *)&inline_result) | 243 | if (result != (char *)&inline_result) |
240 | kfree(result); | 244 | kfree(result); |
241 | return 1; | 245 | return 1; |
@@ -268,10 +272,13 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
268 | ret = 0; | 272 | ret = 0; |
269 | goto out; | 273 | goto out; |
270 | } | 274 | } |
271 | printk("parent transid verify failed on %llu wanted %llu found %llu\n", | 275 | if (printk_ratelimit()) { |
272 | (unsigned long long)eb->start, | 276 | printk("parent transid verify failed on %llu wanted %llu " |
273 | (unsigned long long)parent_transid, | 277 | "found %llu\n", |
274 | (unsigned long long)btrfs_header_generation(eb)); | 278 | (unsigned long long)eb->start, |
279 | (unsigned long long)parent_transid, | ||
280 | (unsigned long long)btrfs_header_generation(eb)); | ||
281 | } | ||
275 | ret = 1; | 282 | ret = 1; |
276 | clear_extent_buffer_uptodate(io_tree, eb); | 283 | clear_extent_buffer_uptodate(io_tree, eb); |
277 | out: | 284 | out: |
@@ -415,9 +422,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
415 | 422 | ||
416 | found_start = btrfs_header_bytenr(eb); | 423 | found_start = btrfs_header_bytenr(eb); |
417 | if (found_start != start) { | 424 | if (found_start != start) { |
418 | printk(KERN_INFO "btrfs bad tree block start %llu %llu\n", | 425 | if (printk_ratelimit()) { |
419 | (unsigned long long)found_start, | 426 | printk(KERN_INFO "btrfs bad tree block start " |
420 | (unsigned long long)eb->start); | 427 | "%llu %llu\n", |
428 | (unsigned long long)found_start, | ||
429 | (unsigned long long)eb->start); | ||
430 | } | ||
421 | ret = -EIO; | 431 | ret = -EIO; |
422 | goto err; | 432 | goto err; |
423 | } | 433 | } |
@@ -429,8 +439,10 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
429 | goto err; | 439 | goto err; |
430 | } | 440 | } |
431 | if (check_tree_block_fsid(root, eb)) { | 441 | if (check_tree_block_fsid(root, eb)) { |
432 | printk(KERN_INFO "btrfs bad fsid on block %llu\n", | 442 | if (printk_ratelimit()) { |
433 | (unsigned long long)eb->start); | 443 | printk(KERN_INFO "btrfs bad fsid on block %llu\n", |
444 | (unsigned long long)eb->start); | ||
445 | } | ||
434 | ret = -EIO; | 446 | ret = -EIO; |
435 | goto err; | 447 | goto err; |
436 | } | 448 | } |
@@ -579,19 +591,12 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
579 | async->bio_flags = bio_flags; | 591 | async->bio_flags = bio_flags; |
580 | 592 | ||
581 | atomic_inc(&fs_info->nr_async_submits); | 593 | atomic_inc(&fs_info->nr_async_submits); |
594 | |||
595 | if (rw & (1 << BIO_RW_SYNCIO)) | ||
596 | btrfs_set_work_high_prio(&async->work); | ||
597 | |||
582 | btrfs_queue_worker(&fs_info->workers, &async->work); | 598 | btrfs_queue_worker(&fs_info->workers, &async->work); |
583 | #if 0 | ||
584 | int limit = btrfs_async_submit_limit(fs_info); | ||
585 | if (atomic_read(&fs_info->nr_async_submits) > limit) { | ||
586 | wait_event_timeout(fs_info->async_submit_wait, | ||
587 | (atomic_read(&fs_info->nr_async_submits) < limit), | ||
588 | HZ/10); | ||
589 | 599 | ||
590 | wait_event_timeout(fs_info->async_submit_wait, | ||
591 | (atomic_read(&fs_info->nr_async_bios) < limit), | ||
592 | HZ/10); | ||
593 | } | ||
594 | #endif | ||
595 | while (atomic_read(&fs_info->async_submit_draining) && | 600 | while (atomic_read(&fs_info->async_submit_draining) && |
596 | atomic_read(&fs_info->nr_async_submits)) { | 601 | atomic_read(&fs_info->nr_async_submits)) { |
597 | wait_event(fs_info->async_submit_wait, | 602 | wait_event(fs_info->async_submit_wait, |
@@ -656,6 +661,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
656 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 661 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
657 | mirror_num, 0); | 662 | mirror_num, 0); |
658 | } | 663 | } |
664 | |||
659 | /* | 665 | /* |
660 | * kthread helpers are used to submit writes so that checksumming | 666 | * kthread helpers are used to submit writes so that checksumming |
661 | * can happen in parallel across all CPUs | 667 | * can happen in parallel across all CPUs |
@@ -765,27 +771,6 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) | |||
765 | } | 771 | } |
766 | } | 772 | } |
767 | 773 | ||
768 | #if 0 | ||
769 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | ||
770 | { | ||
771 | struct buffer_head *bh; | ||
772 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
773 | struct buffer_head *head; | ||
774 | if (!page_has_buffers(page)) { | ||
775 | create_empty_buffers(page, root->fs_info->sb->s_blocksize, | ||
776 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | ||
777 | } | ||
778 | head = page_buffers(page); | ||
779 | bh = head; | ||
780 | do { | ||
781 | if (buffer_dirty(bh)) | ||
782 | csum_tree_block(root, bh, 0); | ||
783 | bh = bh->b_this_page; | ||
784 | } while (bh != head); | ||
785 | return block_write_full_page(page, btree_get_block, wbc); | ||
786 | } | ||
787 | #endif | ||
788 | |||
789 | static struct address_space_operations btree_aops = { | 774 | static struct address_space_operations btree_aops = { |
790 | .readpage = btree_readpage, | 775 | .readpage = btree_readpage, |
791 | .writepage = btree_writepage, | 776 | .writepage = btree_writepage, |
@@ -1273,11 +1258,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) | |||
1273 | int ret = 0; | 1258 | int ret = 0; |
1274 | struct btrfs_device *device; | 1259 | struct btrfs_device *device; |
1275 | struct backing_dev_info *bdi; | 1260 | struct backing_dev_info *bdi; |
1276 | #if 0 | 1261 | |
1277 | if ((bdi_bits & (1 << BDI_write_congested)) && | ||
1278 | btrfs_congested_async(info, 0)) | ||
1279 | return 1; | ||
1280 | #endif | ||
1281 | list_for_each_entry(device, &info->fs_devices->devices, dev_list) { | 1262 | list_for_each_entry(device, &info->fs_devices->devices, dev_list) { |
1282 | if (!device->bdev) | 1263 | if (!device->bdev) |
1283 | continue; | 1264 | continue; |
@@ -1599,6 +1580,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1599 | fs_info->btree_inode = new_inode(sb); | 1580 | fs_info->btree_inode = new_inode(sb); |
1600 | fs_info->btree_inode->i_ino = 1; | 1581 | fs_info->btree_inode->i_ino = 1; |
1601 | fs_info->btree_inode->i_nlink = 1; | 1582 | fs_info->btree_inode->i_nlink = 1; |
1583 | fs_info->metadata_ratio = 8; | ||
1602 | 1584 | ||
1603 | fs_info->thread_pool_size = min_t(unsigned long, | 1585 | fs_info->thread_pool_size = min_t(unsigned long, |
1604 | num_online_cpus() + 2, 8); | 1586 | num_online_cpus() + 2, 8); |
@@ -1689,7 +1671,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1689 | if (features) { | 1671 | if (features) { |
1690 | printk(KERN_ERR "BTRFS: couldn't mount because of " | 1672 | printk(KERN_ERR "BTRFS: couldn't mount because of " |
1691 | "unsupported optional features (%Lx).\n", | 1673 | "unsupported optional features (%Lx).\n", |
1692 | features); | 1674 | (unsigned long long)features); |
1693 | err = -EINVAL; | 1675 | err = -EINVAL; |
1694 | goto fail_iput; | 1676 | goto fail_iput; |
1695 | } | 1677 | } |
@@ -1699,7 +1681,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1699 | if (!(sb->s_flags & MS_RDONLY) && features) { | 1681 | if (!(sb->s_flags & MS_RDONLY) && features) { |
1700 | printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " | 1682 | printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " |
1701 | "unsupported option features (%Lx).\n", | 1683 | "unsupported option features (%Lx).\n", |
1702 | features); | 1684 | (unsigned long long)features); |
1703 | err = -EINVAL; | 1685 | err = -EINVAL; |
1704 | goto fail_iput; | 1686 | goto fail_iput; |
1705 | } | 1687 | } |
@@ -2095,10 +2077,10 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2095 | device->barriers = 0; | 2077 | device->barriers = 0; |
2096 | get_bh(bh); | 2078 | get_bh(bh); |
2097 | lock_buffer(bh); | 2079 | lock_buffer(bh); |
2098 | ret = submit_bh(WRITE, bh); | 2080 | ret = submit_bh(WRITE_SYNC, bh); |
2099 | } | 2081 | } |
2100 | } else { | 2082 | } else { |
2101 | ret = submit_bh(WRITE, bh); | 2083 | ret = submit_bh(WRITE_SYNC, bh); |
2102 | } | 2084 | } |
2103 | 2085 | ||
2104 | if (!ret && wait) { | 2086 | if (!ret && wait) { |
@@ -2291,7 +2273,7 @@ int close_ctree(struct btrfs_root *root) | |||
2291 | 2273 | ||
2292 | if (fs_info->delalloc_bytes) { | 2274 | if (fs_info->delalloc_bytes) { |
2293 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", | 2275 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", |
2294 | fs_info->delalloc_bytes); | 2276 | (unsigned long long)fs_info->delalloc_bytes); |
2295 | } | 2277 | } |
2296 | if (fs_info->total_ref_cache_size) { | 2278 | if (fs_info->total_ref_cache_size) { |
2297 | printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", | 2279 | printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", |
@@ -2328,16 +2310,6 @@ int close_ctree(struct btrfs_root *root) | |||
2328 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2310 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2329 | btrfs_stop_workers(&fs_info->submit_workers); | 2311 | btrfs_stop_workers(&fs_info->submit_workers); |
2330 | 2312 | ||
2331 | #if 0 | ||
2332 | while (!list_empty(&fs_info->hashers)) { | ||
2333 | struct btrfs_hasher *hasher; | ||
2334 | hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher, | ||
2335 | hashers); | ||
2336 | list_del(&hasher->hashers); | ||
2337 | crypto_free_hash(&fs_info->hash_tfm); | ||
2338 | kfree(hasher); | ||
2339 | } | ||
2340 | #endif | ||
2341 | btrfs_close_devices(fs_info->fs_devices); | 2313 | btrfs_close_devices(fs_info->fs_devices); |
2342 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2314 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
2343 | 2315 | ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 178df4c67de4..e4966444811b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -1844,10 +1844,14 @@ again: | |||
1844 | printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" | 1844 | printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" |
1845 | ", %llu bytes_used, %llu bytes_reserved, " | 1845 | ", %llu bytes_used, %llu bytes_reserved, " |
1846 | "%llu bytes_pinned, %llu bytes_readonly, %llu may use" | 1846 | "%llu bytes_pinned, %llu bytes_readonly, %llu may use" |
1847 | "%llu total\n", bytes, data_sinfo->bytes_delalloc, | 1847 | "%llu total\n", (unsigned long long)bytes, |
1848 | data_sinfo->bytes_used, data_sinfo->bytes_reserved, | 1848 | (unsigned long long)data_sinfo->bytes_delalloc, |
1849 | data_sinfo->bytes_pinned, data_sinfo->bytes_readonly, | 1849 | (unsigned long long)data_sinfo->bytes_used, |
1850 | data_sinfo->bytes_may_use, data_sinfo->total_bytes); | 1850 | (unsigned long long)data_sinfo->bytes_reserved, |
1851 | (unsigned long long)data_sinfo->bytes_pinned, | ||
1852 | (unsigned long long)data_sinfo->bytes_readonly, | ||
1853 | (unsigned long long)data_sinfo->bytes_may_use, | ||
1854 | (unsigned long long)data_sinfo->total_bytes); | ||
1851 | return -ENOSPC; | 1855 | return -ENOSPC; |
1852 | } | 1856 | } |
1853 | data_sinfo->bytes_may_use += bytes; | 1857 | data_sinfo->bytes_may_use += bytes; |
@@ -1918,15 +1922,29 @@ void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | |||
1918 | spin_unlock(&info->lock); | 1922 | spin_unlock(&info->lock); |
1919 | } | 1923 | } |
1920 | 1924 | ||
1925 | static void force_metadata_allocation(struct btrfs_fs_info *info) | ||
1926 | { | ||
1927 | struct list_head *head = &info->space_info; | ||
1928 | struct btrfs_space_info *found; | ||
1929 | |||
1930 | rcu_read_lock(); | ||
1931 | list_for_each_entry_rcu(found, head, list) { | ||
1932 | if (found->flags & BTRFS_BLOCK_GROUP_METADATA) | ||
1933 | found->force_alloc = 1; | ||
1934 | } | ||
1935 | rcu_read_unlock(); | ||
1936 | } | ||
1937 | |||
1921 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 1938 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
1922 | struct btrfs_root *extent_root, u64 alloc_bytes, | 1939 | struct btrfs_root *extent_root, u64 alloc_bytes, |
1923 | u64 flags, int force) | 1940 | u64 flags, int force) |
1924 | { | 1941 | { |
1925 | struct btrfs_space_info *space_info; | 1942 | struct btrfs_space_info *space_info; |
1943 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | ||
1926 | u64 thresh; | 1944 | u64 thresh; |
1927 | int ret = 0; | 1945 | int ret = 0; |
1928 | 1946 | ||
1929 | mutex_lock(&extent_root->fs_info->chunk_mutex); | 1947 | mutex_lock(&fs_info->chunk_mutex); |
1930 | 1948 | ||
1931 | flags = btrfs_reduce_alloc_profile(extent_root, flags); | 1949 | flags = btrfs_reduce_alloc_profile(extent_root, flags); |
1932 | 1950 | ||
@@ -1958,6 +1976,18 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
1958 | } | 1976 | } |
1959 | spin_unlock(&space_info->lock); | 1977 | spin_unlock(&space_info->lock); |
1960 | 1978 | ||
1979 | /* | ||
1980 | * if we're doing a data chunk, go ahead and make sure that | ||
1981 | * we keep a reasonable number of metadata chunks allocated in the | ||
1982 | * FS as well. | ||
1983 | */ | ||
1984 | if (flags & BTRFS_BLOCK_GROUP_DATA) { | ||
1985 | fs_info->data_chunk_allocations++; | ||
1986 | if (!(fs_info->data_chunk_allocations % | ||
1987 | fs_info->metadata_ratio)) | ||
1988 | force_metadata_allocation(fs_info); | ||
1989 | } | ||
1990 | |||
1961 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 1991 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
1962 | if (ret) | 1992 | if (ret) |
1963 | space_info->full = 1; | 1993 | space_info->full = 1; |
@@ -2798,9 +2828,12 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) | |||
2798 | info->bytes_pinned - info->bytes_reserved), | 2828 | info->bytes_pinned - info->bytes_reserved), |
2799 | (info->full) ? "" : "not "); | 2829 | (info->full) ? "" : "not "); |
2800 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 2830 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," |
2801 | " may_use=%llu, used=%llu\n", info->total_bytes, | 2831 | " may_use=%llu, used=%llu\n", |
2802 | info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use, | 2832 | (unsigned long long)info->total_bytes, |
2803 | info->bytes_used); | 2833 | (unsigned long long)info->bytes_pinned, |
2834 | (unsigned long long)info->bytes_delalloc, | ||
2835 | (unsigned long long)info->bytes_may_use, | ||
2836 | (unsigned long long)info->bytes_used); | ||
2804 | 2837 | ||
2805 | down_read(&info->groups_sem); | 2838 | down_read(&info->groups_sem); |
2806 | list_for_each_entry(cache, &info->block_groups, list) { | 2839 | list_for_each_entry(cache, &info->block_groups, list) { |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index eb2bee8b7fbf..fe9eb990e443 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -17,12 +17,6 @@ | |||
17 | #include "ctree.h" | 17 | #include "ctree.h" |
18 | #include "btrfs_inode.h" | 18 | #include "btrfs_inode.h" |
19 | 19 | ||
20 | /* temporary define until extent_map moves out of btrfs */ | ||
21 | struct kmem_cache *btrfs_cache_create(const char *name, size_t size, | ||
22 | unsigned long extra_flags, | ||
23 | void (*ctor)(void *, struct kmem_cache *, | ||
24 | unsigned long)); | ||
25 | |||
26 | static struct kmem_cache *extent_state_cache; | 20 | static struct kmem_cache *extent_state_cache; |
27 | static struct kmem_cache *extent_buffer_cache; | 21 | static struct kmem_cache *extent_buffer_cache; |
28 | 22 | ||
@@ -50,20 +44,23 @@ struct extent_page_data { | |||
50 | /* tells writepage not to lock the state bits for this range | 44 | /* tells writepage not to lock the state bits for this range |
51 | * it still does the unlocking | 45 | * it still does the unlocking |
52 | */ | 46 | */ |
53 | int extent_locked; | 47 | unsigned int extent_locked:1; |
48 | |||
49 | /* tells the submit_bio code to use a WRITE_SYNC */ | ||
50 | unsigned int sync_io:1; | ||
54 | }; | 51 | }; |
55 | 52 | ||
56 | int __init extent_io_init(void) | 53 | int __init extent_io_init(void) |
57 | { | 54 | { |
58 | extent_state_cache = btrfs_cache_create("extent_state", | 55 | extent_state_cache = kmem_cache_create("extent_state", |
59 | sizeof(struct extent_state), 0, | 56 | sizeof(struct extent_state), 0, |
60 | NULL); | 57 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
61 | if (!extent_state_cache) | 58 | if (!extent_state_cache) |
62 | return -ENOMEM; | 59 | return -ENOMEM; |
63 | 60 | ||
64 | extent_buffer_cache = btrfs_cache_create("extent_buffers", | 61 | extent_buffer_cache = kmem_cache_create("extent_buffers", |
65 | sizeof(struct extent_buffer), 0, | 62 | sizeof(struct extent_buffer), 0, |
66 | NULL); | 63 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
67 | if (!extent_buffer_cache) | 64 | if (!extent_buffer_cache) |
68 | goto free_state_cache; | 65 | goto free_state_cache; |
69 | return 0; | 66 | return 0; |
@@ -1404,69 +1401,6 @@ out: | |||
1404 | return total_bytes; | 1401 | return total_bytes; |
1405 | } | 1402 | } |
1406 | 1403 | ||
1407 | #if 0 | ||
1408 | /* | ||
1409 | * helper function to lock both pages and extents in the tree. | ||
1410 | * pages must be locked first. | ||
1411 | */ | ||
1412 | static int lock_range(struct extent_io_tree *tree, u64 start, u64 end) | ||
1413 | { | ||
1414 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
1415 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
1416 | struct page *page; | ||
1417 | int err; | ||
1418 | |||
1419 | while (index <= end_index) { | ||
1420 | page = grab_cache_page(tree->mapping, index); | ||
1421 | if (!page) { | ||
1422 | err = -ENOMEM; | ||
1423 | goto failed; | ||
1424 | } | ||
1425 | if (IS_ERR(page)) { | ||
1426 | err = PTR_ERR(page); | ||
1427 | goto failed; | ||
1428 | } | ||
1429 | index++; | ||
1430 | } | ||
1431 | lock_extent(tree, start, end, GFP_NOFS); | ||
1432 | return 0; | ||
1433 | |||
1434 | failed: | ||
1435 | /* | ||
1436 | * we failed above in getting the page at 'index', so we undo here | ||
1437 | * up to but not including the page at 'index' | ||
1438 | */ | ||
1439 | end_index = index; | ||
1440 | index = start >> PAGE_CACHE_SHIFT; | ||
1441 | while (index < end_index) { | ||
1442 | page = find_get_page(tree->mapping, index); | ||
1443 | unlock_page(page); | ||
1444 | page_cache_release(page); | ||
1445 | index++; | ||
1446 | } | ||
1447 | return err; | ||
1448 | } | ||
1449 | |||
1450 | /* | ||
1451 | * helper function to unlock both pages and extents in the tree. | ||
1452 | */ | ||
1453 | static int unlock_range(struct extent_io_tree *tree, u64 start, u64 end) | ||
1454 | { | ||
1455 | unsigned long index = start >> PAGE_CACHE_SHIFT; | ||
1456 | unsigned long end_index = end >> PAGE_CACHE_SHIFT; | ||
1457 | struct page *page; | ||
1458 | |||
1459 | while (index <= end_index) { | ||
1460 | page = find_get_page(tree->mapping, index); | ||
1461 | unlock_page(page); | ||
1462 | page_cache_release(page); | ||
1463 | index++; | ||
1464 | } | ||
1465 | unlock_extent(tree, start, end, GFP_NOFS); | ||
1466 | return 0; | ||
1467 | } | ||
1468 | #endif | ||
1469 | |||
1470 | /* | 1404 | /* |
1471 | * set the private field for a given byte offset in the tree. If there isn't | 1405 | * set the private field for a given byte offset in the tree. If there isn't |
1472 | * an extent_state there already, this does nothing. | 1406 | * an extent_state there already, this does nothing. |
@@ -2101,6 +2035,16 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | |||
2101 | return ret; | 2035 | return ret; |
2102 | } | 2036 | } |
2103 | 2037 | ||
2038 | static noinline void update_nr_written(struct page *page, | ||
2039 | struct writeback_control *wbc, | ||
2040 | unsigned long nr_written) | ||
2041 | { | ||
2042 | wbc->nr_to_write -= nr_written; | ||
2043 | if (wbc->range_cyclic || (wbc->nr_to_write > 0 && | ||
2044 | wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) | ||
2045 | page->mapping->writeback_index = page->index + nr_written; | ||
2046 | } | ||
2047 | |||
2104 | /* | 2048 | /* |
2105 | * the writepage semantics are similar to regular writepage. extent | 2049 | * the writepage semantics are similar to regular writepage. extent |
2106 | * records are inserted to lock ranges in the tree, and as dirty areas | 2050 | * records are inserted to lock ranges in the tree, and as dirty areas |
@@ -2136,8 +2080,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2136 | u64 delalloc_end; | 2080 | u64 delalloc_end; |
2137 | int page_started; | 2081 | int page_started; |
2138 | int compressed; | 2082 | int compressed; |
2083 | int write_flags; | ||
2139 | unsigned long nr_written = 0; | 2084 | unsigned long nr_written = 0; |
2140 | 2085 | ||
2086 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2087 | write_flags = WRITE_SYNC_PLUG; | ||
2088 | else | ||
2089 | write_flags = WRITE; | ||
2090 | |||
2141 | WARN_ON(!PageLocked(page)); | 2091 | WARN_ON(!PageLocked(page)); |
2142 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2092 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
2143 | if (page->index > end_index || | 2093 | if (page->index > end_index || |
@@ -2164,6 +2114,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2164 | delalloc_end = 0; | 2114 | delalloc_end = 0; |
2165 | page_started = 0; | 2115 | page_started = 0; |
2166 | if (!epd->extent_locked) { | 2116 | if (!epd->extent_locked) { |
2117 | /* | ||
2118 | * make sure the wbc mapping index is at least updated | ||
2119 | * to this page. | ||
2120 | */ | ||
2121 | update_nr_written(page, wbc, 0); | ||
2122 | |||
2167 | while (delalloc_end < page_end) { | 2123 | while (delalloc_end < page_end) { |
2168 | nr_delalloc = find_lock_delalloc_range(inode, tree, | 2124 | nr_delalloc = find_lock_delalloc_range(inode, tree, |
2169 | page, | 2125 | page, |
@@ -2185,7 +2141,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2185 | */ | 2141 | */ |
2186 | if (page_started) { | 2142 | if (page_started) { |
2187 | ret = 0; | 2143 | ret = 0; |
2188 | goto update_nr_written; | 2144 | /* |
2145 | * we've unlocked the page, so we can't update | ||
2146 | * the mapping's writeback index, just update | ||
2147 | * nr_to_write. | ||
2148 | */ | ||
2149 | wbc->nr_to_write -= nr_written; | ||
2150 | goto done_unlocked; | ||
2189 | } | 2151 | } |
2190 | } | 2152 | } |
2191 | lock_extent(tree, start, page_end, GFP_NOFS); | 2153 | lock_extent(tree, start, page_end, GFP_NOFS); |
@@ -2198,13 +2160,18 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2198 | if (ret == -EAGAIN) { | 2160 | if (ret == -EAGAIN) { |
2199 | unlock_extent(tree, start, page_end, GFP_NOFS); | 2161 | unlock_extent(tree, start, page_end, GFP_NOFS); |
2200 | redirty_page_for_writepage(wbc, page); | 2162 | redirty_page_for_writepage(wbc, page); |
2163 | update_nr_written(page, wbc, nr_written); | ||
2201 | unlock_page(page); | 2164 | unlock_page(page); |
2202 | ret = 0; | 2165 | ret = 0; |
2203 | goto update_nr_written; | 2166 | goto done_unlocked; |
2204 | } | 2167 | } |
2205 | } | 2168 | } |
2206 | 2169 | ||
2207 | nr_written++; | 2170 | /* |
2171 | * we don't want to touch the inode after unlocking the page, | ||
2172 | * so we update the mapping writeback index now | ||
2173 | */ | ||
2174 | update_nr_written(page, wbc, nr_written + 1); | ||
2208 | 2175 | ||
2209 | end = page_end; | 2176 | end = page_end; |
2210 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) | 2177 | if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) |
@@ -2314,9 +2281,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2314 | (unsigned long long)end); | 2281 | (unsigned long long)end); |
2315 | } | 2282 | } |
2316 | 2283 | ||
2317 | ret = submit_extent_page(WRITE, tree, page, sector, | 2284 | ret = submit_extent_page(write_flags, tree, page, |
2318 | iosize, pg_offset, bdev, | 2285 | sector, iosize, pg_offset, |
2319 | &epd->bio, max_nr, | 2286 | bdev, &epd->bio, max_nr, |
2320 | end_bio_extent_writepage, | 2287 | end_bio_extent_writepage, |
2321 | 0, 0, 0); | 2288 | 0, 0, 0); |
2322 | if (ret) | 2289 | if (ret) |
@@ -2336,11 +2303,8 @@ done: | |||
2336 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); | 2303 | unlock_extent(tree, unlock_start, page_end, GFP_NOFS); |
2337 | unlock_page(page); | 2304 | unlock_page(page); |
2338 | 2305 | ||
2339 | update_nr_written: | 2306 | done_unlocked: |
2340 | wbc->nr_to_write -= nr_written; | 2307 | |
2341 | if (wbc->range_cyclic || (wbc->nr_to_write > 0 && | ||
2342 | wbc->range_start == 0 && wbc->range_end == LLONG_MAX)) | ||
2343 | page->mapping->writeback_index = page->index + nr_written; | ||
2344 | return 0; | 2308 | return 0; |
2345 | } | 2309 | } |
2346 | 2310 | ||
@@ -2460,15 +2424,23 @@ retry: | |||
2460 | return ret; | 2424 | return ret; |
2461 | } | 2425 | } |
2462 | 2426 | ||
2463 | static noinline void flush_write_bio(void *data) | 2427 | static void flush_epd_write_bio(struct extent_page_data *epd) |
2464 | { | 2428 | { |
2465 | struct extent_page_data *epd = data; | ||
2466 | if (epd->bio) { | 2429 | if (epd->bio) { |
2467 | submit_one_bio(WRITE, epd->bio, 0, 0); | 2430 | if (epd->sync_io) |
2431 | submit_one_bio(WRITE_SYNC, epd->bio, 0, 0); | ||
2432 | else | ||
2433 | submit_one_bio(WRITE, epd->bio, 0, 0); | ||
2468 | epd->bio = NULL; | 2434 | epd->bio = NULL; |
2469 | } | 2435 | } |
2470 | } | 2436 | } |
2471 | 2437 | ||
2438 | static noinline void flush_write_bio(void *data) | ||
2439 | { | ||
2440 | struct extent_page_data *epd = data; | ||
2441 | flush_epd_write_bio(epd); | ||
2442 | } | ||
2443 | |||
2472 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | 2444 | int extent_write_full_page(struct extent_io_tree *tree, struct page *page, |
2473 | get_extent_t *get_extent, | 2445 | get_extent_t *get_extent, |
2474 | struct writeback_control *wbc) | 2446 | struct writeback_control *wbc) |
@@ -2480,23 +2452,22 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
2480 | .tree = tree, | 2452 | .tree = tree, |
2481 | .get_extent = get_extent, | 2453 | .get_extent = get_extent, |
2482 | .extent_locked = 0, | 2454 | .extent_locked = 0, |
2455 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | ||
2483 | }; | 2456 | }; |
2484 | struct writeback_control wbc_writepages = { | 2457 | struct writeback_control wbc_writepages = { |
2485 | .bdi = wbc->bdi, | 2458 | .bdi = wbc->bdi, |
2486 | .sync_mode = WB_SYNC_NONE, | 2459 | .sync_mode = wbc->sync_mode, |
2487 | .older_than_this = NULL, | 2460 | .older_than_this = NULL, |
2488 | .nr_to_write = 64, | 2461 | .nr_to_write = 64, |
2489 | .range_start = page_offset(page) + PAGE_CACHE_SIZE, | 2462 | .range_start = page_offset(page) + PAGE_CACHE_SIZE, |
2490 | .range_end = (loff_t)-1, | 2463 | .range_end = (loff_t)-1, |
2491 | }; | 2464 | }; |
2492 | 2465 | ||
2493 | |||
2494 | ret = __extent_writepage(page, wbc, &epd); | 2466 | ret = __extent_writepage(page, wbc, &epd); |
2495 | 2467 | ||
2496 | extent_write_cache_pages(tree, mapping, &wbc_writepages, | 2468 | extent_write_cache_pages(tree, mapping, &wbc_writepages, |
2497 | __extent_writepage, &epd, flush_write_bio); | 2469 | __extent_writepage, &epd, flush_write_bio); |
2498 | if (epd.bio) | 2470 | flush_epd_write_bio(&epd); |
2499 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2500 | return ret; | 2471 | return ret; |
2501 | } | 2472 | } |
2502 | 2473 | ||
@@ -2515,6 +2486,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2515 | .tree = tree, | 2486 | .tree = tree, |
2516 | .get_extent = get_extent, | 2487 | .get_extent = get_extent, |
2517 | .extent_locked = 1, | 2488 | .extent_locked = 1, |
2489 | .sync_io = mode == WB_SYNC_ALL, | ||
2518 | }; | 2490 | }; |
2519 | struct writeback_control wbc_writepages = { | 2491 | struct writeback_control wbc_writepages = { |
2520 | .bdi = inode->i_mapping->backing_dev_info, | 2492 | .bdi = inode->i_mapping->backing_dev_info, |
@@ -2540,8 +2512,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
2540 | start += PAGE_CACHE_SIZE; | 2512 | start += PAGE_CACHE_SIZE; |
2541 | } | 2513 | } |
2542 | 2514 | ||
2543 | if (epd.bio) | 2515 | flush_epd_write_bio(&epd); |
2544 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2545 | return ret; | 2516 | return ret; |
2546 | } | 2517 | } |
2547 | 2518 | ||
@@ -2556,13 +2527,13 @@ int extent_writepages(struct extent_io_tree *tree, | |||
2556 | .tree = tree, | 2527 | .tree = tree, |
2557 | .get_extent = get_extent, | 2528 | .get_extent = get_extent, |
2558 | .extent_locked = 0, | 2529 | .extent_locked = 0, |
2530 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | ||
2559 | }; | 2531 | }; |
2560 | 2532 | ||
2561 | ret = extent_write_cache_pages(tree, mapping, wbc, | 2533 | ret = extent_write_cache_pages(tree, mapping, wbc, |
2562 | __extent_writepage, &epd, | 2534 | __extent_writepage, &epd, |
2563 | flush_write_bio); | 2535 | flush_write_bio); |
2564 | if (epd.bio) | 2536 | flush_epd_write_bio(&epd); |
2565 | submit_one_bio(WRITE, epd.bio, 0, 0); | ||
2566 | return ret; | 2537 | return ret; |
2567 | } | 2538 | } |
2568 | 2539 | ||
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index b187917b36fa..30c9365861e6 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -6,19 +6,14 @@ | |||
6 | #include <linux/hardirq.h> | 6 | #include <linux/hardirq.h> |
7 | #include "extent_map.h" | 7 | #include "extent_map.h" |
8 | 8 | ||
9 | /* temporary define until extent_map moves out of btrfs */ | ||
10 | struct kmem_cache *btrfs_cache_create(const char *name, size_t size, | ||
11 | unsigned long extra_flags, | ||
12 | void (*ctor)(void *, struct kmem_cache *, | ||
13 | unsigned long)); | ||
14 | 9 | ||
15 | static struct kmem_cache *extent_map_cache; | 10 | static struct kmem_cache *extent_map_cache; |
16 | 11 | ||
17 | int __init extent_map_init(void) | 12 | int __init extent_map_init(void) |
18 | { | 13 | { |
19 | extent_map_cache = btrfs_cache_create("extent_map", | 14 | extent_map_cache = kmem_cache_create("extent_map", |
20 | sizeof(struct extent_map), 0, | 15 | sizeof(struct extent_map), 0, |
21 | NULL); | 16 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
22 | if (!extent_map_cache) | 17 | if (!extent_map_cache) |
23 | return -ENOMEM; | 18 | return -ENOMEM; |
24 | return 0; | 19 | return 0; |
@@ -43,7 +38,6 @@ void extent_map_tree_init(struct extent_map_tree *tree, gfp_t mask) | |||
43 | tree->map.rb_node = NULL; | 38 | tree->map.rb_node = NULL; |
44 | spin_lock_init(&tree->lock); | 39 | spin_lock_init(&tree->lock); |
45 | } | 40 | } |
46 | EXPORT_SYMBOL(extent_map_tree_init); | ||
47 | 41 | ||
48 | /** | 42 | /** |
49 | * alloc_extent_map - allocate new extent map structure | 43 | * alloc_extent_map - allocate new extent map structure |
@@ -64,7 +58,6 @@ struct extent_map *alloc_extent_map(gfp_t mask) | |||
64 | atomic_set(&em->refs, 1); | 58 | atomic_set(&em->refs, 1); |
65 | return em; | 59 | return em; |
66 | } | 60 | } |
67 | EXPORT_SYMBOL(alloc_extent_map); | ||
68 | 61 | ||
69 | /** | 62 | /** |
70 | * free_extent_map - drop reference count of an extent_map | 63 | * free_extent_map - drop reference count of an extent_map |
@@ -83,7 +76,6 @@ void free_extent_map(struct extent_map *em) | |||
83 | kmem_cache_free(extent_map_cache, em); | 76 | kmem_cache_free(extent_map_cache, em); |
84 | } | 77 | } |
85 | } | 78 | } |
86 | EXPORT_SYMBOL(free_extent_map); | ||
87 | 79 | ||
88 | static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | 80 | static struct rb_node *tree_insert(struct rb_root *root, u64 offset, |
89 | struct rb_node *node) | 81 | struct rb_node *node) |
@@ -264,7 +256,6 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
264 | out: | 256 | out: |
265 | return ret; | 257 | return ret; |
266 | } | 258 | } |
267 | EXPORT_SYMBOL(add_extent_mapping); | ||
268 | 259 | ||
269 | /* simple helper to do math around the end of an extent, handling wrap */ | 260 | /* simple helper to do math around the end of an extent, handling wrap */ |
270 | static u64 range_end(u64 start, u64 len) | 261 | static u64 range_end(u64 start, u64 len) |
@@ -326,7 +317,6 @@ found: | |||
326 | out: | 317 | out: |
327 | return em; | 318 | return em; |
328 | } | 319 | } |
329 | EXPORT_SYMBOL(lookup_extent_mapping); | ||
330 | 320 | ||
331 | /** | 321 | /** |
332 | * remove_extent_mapping - removes an extent_map from the extent tree | 322 | * remove_extent_mapping - removes an extent_map from the extent tree |
@@ -346,4 +336,3 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | |||
346 | em->in_tree = 0; | 336 | em->in_tree = 0; |
347 | return ret; | 337 | return ret; |
348 | } | 338 | } |
349 | EXPORT_SYMBOL(remove_extent_mapping); | ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9c9fb46ccd08..1d51dc38bb49 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -272,83 +272,6 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
272 | return 0; | 272 | return 0; |
273 | } | 273 | } |
274 | 274 | ||
275 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode) | ||
276 | { | ||
277 | return 0; | ||
278 | #if 0 | ||
279 | struct btrfs_path *path; | ||
280 | struct btrfs_key found_key; | ||
281 | struct extent_buffer *leaf; | ||
282 | struct btrfs_file_extent_item *extent; | ||
283 | u64 last_offset = 0; | ||
284 | int nritems; | ||
285 | int slot; | ||
286 | int found_type; | ||
287 | int ret; | ||
288 | int err = 0; | ||
289 | u64 extent_end = 0; | ||
290 | |||
291 | path = btrfs_alloc_path(); | ||
292 | ret = btrfs_lookup_file_extent(NULL, root, path, inode->i_ino, | ||
293 | last_offset, 0); | ||
294 | while (1) { | ||
295 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
296 | if (path->slots[0] >= nritems) { | ||
297 | ret = btrfs_next_leaf(root, path); | ||
298 | if (ret) | ||
299 | goto out; | ||
300 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
301 | } | ||
302 | slot = path->slots[0]; | ||
303 | leaf = path->nodes[0]; | ||
304 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
305 | if (found_key.objectid != inode->i_ino) | ||
306 | break; | ||
307 | if (found_key.type != BTRFS_EXTENT_DATA_KEY) | ||
308 | goto out; | ||
309 | |||
310 | if (found_key.offset < last_offset) { | ||
311 | WARN_ON(1); | ||
312 | btrfs_print_leaf(root, leaf); | ||
313 | printk(KERN_ERR "inode %lu found offset %llu " | ||
314 | "expected %llu\n", inode->i_ino, | ||
315 | (unsigned long long)found_key.offset, | ||
316 | (unsigned long long)last_offset); | ||
317 | err = 1; | ||
318 | goto out; | ||
319 | } | ||
320 | extent = btrfs_item_ptr(leaf, slot, | ||
321 | struct btrfs_file_extent_item); | ||
322 | found_type = btrfs_file_extent_type(leaf, extent); | ||
323 | if (found_type == BTRFS_FILE_EXTENT_REG) { | ||
324 | extent_end = found_key.offset + | ||
325 | btrfs_file_extent_num_bytes(leaf, extent); | ||
326 | } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { | ||
327 | struct btrfs_item *item; | ||
328 | item = btrfs_item_nr(leaf, slot); | ||
329 | extent_end = found_key.offset + | ||
330 | btrfs_file_extent_inline_len(leaf, extent); | ||
331 | extent_end = (extent_end + root->sectorsize - 1) & | ||
332 | ~((u64)root->sectorsize - 1); | ||
333 | } | ||
334 | last_offset = extent_end; | ||
335 | path->slots[0]++; | ||
336 | } | ||
337 | if (0 && last_offset < inode->i_size) { | ||
338 | WARN_ON(1); | ||
339 | btrfs_print_leaf(root, leaf); | ||
340 | printk(KERN_ERR "inode %lu found offset %llu size %llu\n", | ||
341 | inode->i_ino, (unsigned long long)last_offset, | ||
342 | (unsigned long long)inode->i_size); | ||
343 | err = 1; | ||
344 | |||
345 | } | ||
346 | out: | ||
347 | btrfs_free_path(path); | ||
348 | return err; | ||
349 | #endif | ||
350 | } | ||
351 | |||
352 | /* | 275 | /* |
353 | * this is very complex, but the basic idea is to drop all extents | 276 | * this is very complex, but the basic idea is to drop all extents |
354 | * in the range start - end. hint_block is filled in with a block number | 277 | * in the range start - end. hint_block is filled in with a block number |
@@ -363,15 +286,16 @@ out: | |||
363 | */ | 286 | */ |
364 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 287 | noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
365 | struct btrfs_root *root, struct inode *inode, | 288 | struct btrfs_root *root, struct inode *inode, |
366 | u64 start, u64 end, u64 inline_limit, u64 *hint_byte) | 289 | u64 start, u64 end, u64 locked_end, |
290 | u64 inline_limit, u64 *hint_byte) | ||
367 | { | 291 | { |
368 | u64 extent_end = 0; | 292 | u64 extent_end = 0; |
369 | u64 locked_end = end; | ||
370 | u64 search_start = start; | 293 | u64 search_start = start; |
371 | u64 leaf_start; | 294 | u64 leaf_start; |
372 | u64 ram_bytes = 0; | 295 | u64 ram_bytes = 0; |
373 | u64 orig_parent = 0; | 296 | u64 orig_parent = 0; |
374 | u64 disk_bytenr = 0; | 297 | u64 disk_bytenr = 0; |
298 | u64 orig_locked_end = locked_end; | ||
375 | u8 compression; | 299 | u8 compression; |
376 | u8 encryption; | 300 | u8 encryption; |
377 | u16 other_encoding = 0; | 301 | u16 other_encoding = 0; |
@@ -684,11 +608,10 @@ next_slot: | |||
684 | } | 608 | } |
685 | out: | 609 | out: |
686 | btrfs_free_path(path); | 610 | btrfs_free_path(path); |
687 | if (locked_end > end) { | 611 | if (locked_end > orig_locked_end) { |
688 | unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1, | 612 | unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end, |
689 | GFP_NOFS); | 613 | locked_end - 1, GFP_NOFS); |
690 | } | 614 | } |
691 | btrfs_check_file(root, inode); | ||
692 | return ret; | 615 | return ret; |
693 | } | 616 | } |
694 | 617 | ||
@@ -830,7 +753,7 @@ again: | |||
830 | 753 | ||
831 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); | 754 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); |
832 | BUG_ON(ret); | 755 | BUG_ON(ret); |
833 | goto done; | 756 | goto release; |
834 | } else if (split == start) { | 757 | } else if (split == start) { |
835 | if (locked_end < extent_end) { | 758 | if (locked_end < extent_end) { |
836 | ret = try_lock_extent(&BTRFS_I(inode)->io_tree, | 759 | ret = try_lock_extent(&BTRFS_I(inode)->io_tree, |
@@ -926,6 +849,8 @@ again: | |||
926 | } | 849 | } |
927 | done: | 850 | done: |
928 | btrfs_mark_buffer_dirty(leaf); | 851 | btrfs_mark_buffer_dirty(leaf); |
852 | |||
853 | release: | ||
929 | btrfs_release_path(root, path); | 854 | btrfs_release_path(root, path); |
930 | if (split_end && split == start) { | 855 | if (split_end && split == start) { |
931 | split = end; | 856 | split = end; |
@@ -1131,7 +1056,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
1131 | if (will_write) { | 1056 | if (will_write) { |
1132 | btrfs_fdatawrite_range(inode->i_mapping, pos, | 1057 | btrfs_fdatawrite_range(inode->i_mapping, pos, |
1133 | pos + write_bytes - 1, | 1058 | pos + write_bytes - 1, |
1134 | WB_SYNC_NONE); | 1059 | WB_SYNC_ALL); |
1135 | } else { | 1060 | } else { |
1136 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | 1061 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, |
1137 | num_pages); | 1062 | num_pages); |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 768b9523662d..0bc93657b460 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -332,13 +332,17 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | |||
332 | printk(KERN_ERR "couldn't find space %llu to free\n", | 332 | printk(KERN_ERR "couldn't find space %llu to free\n", |
333 | (unsigned long long)offset); | 333 | (unsigned long long)offset); |
334 | printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n", | 334 | printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n", |
335 | block_group->cached, block_group->key.objectid, | 335 | block_group->cached, |
336 | block_group->key.offset); | 336 | (unsigned long long)block_group->key.objectid, |
337 | (unsigned long long)block_group->key.offset); | ||
337 | btrfs_dump_free_space(block_group, bytes); | 338 | btrfs_dump_free_space(block_group, bytes); |
338 | } else if (info) { | 339 | } else if (info) { |
339 | printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, " | 340 | printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, " |
340 | "but wanted offset=%llu bytes=%llu\n", | 341 | "but wanted offset=%llu bytes=%llu\n", |
341 | info->offset, info->bytes, offset, bytes); | 342 | (unsigned long long)info->offset, |
343 | (unsigned long long)info->bytes, | ||
344 | (unsigned long long)offset, | ||
345 | (unsigned long long)bytes); | ||
342 | } | 346 | } |
343 | WARN_ON(1); | 347 | WARN_ON(1); |
344 | } | 348 | } |
@@ -357,8 +361,9 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | |||
357 | info = rb_entry(n, struct btrfs_free_space, offset_index); | 361 | info = rb_entry(n, struct btrfs_free_space, offset_index); |
358 | if (info->bytes >= bytes) | 362 | if (info->bytes >= bytes) |
359 | count++; | 363 | count++; |
360 | printk(KERN_ERR "entry offset %llu, bytes %llu\n", info->offset, | 364 | printk(KERN_ERR "entry offset %llu, bytes %llu\n", |
361 | info->bytes); | 365 | (unsigned long long)info->offset, |
366 | (unsigned long long)info->bytes); | ||
362 | } | 367 | } |
363 | printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" | 368 | printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" |
364 | "\n", count); | 369 | "\n", count); |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index cc7334d833c9..9abbced1123d 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -79,7 +79,7 @@ int btrfs_find_free_objectid(struct btrfs_trans_handle *trans, | |||
79 | } | 79 | } |
80 | path = btrfs_alloc_path(); | 80 | path = btrfs_alloc_path(); |
81 | BUG_ON(!path); | 81 | BUG_ON(!path); |
82 | search_start = max(search_start, BTRFS_FIRST_FREE_OBJECTID); | 82 | search_start = max(search_start, (u64)BTRFS_FIRST_FREE_OBJECTID); |
83 | search_key.objectid = search_start; | 83 | search_key.objectid = search_start; |
84 | search_key.type = 0; | 84 | search_key.type = 0; |
85 | search_key.offset = 0; | 85 | search_key.offset = 0; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a0d1dd492a58..90c23eb28829 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -70,7 +70,6 @@ static struct extent_io_ops btrfs_extent_io_ops; | |||
70 | static struct kmem_cache *btrfs_inode_cachep; | 70 | static struct kmem_cache *btrfs_inode_cachep; |
71 | struct kmem_cache *btrfs_trans_handle_cachep; | 71 | struct kmem_cache *btrfs_trans_handle_cachep; |
72 | struct kmem_cache *btrfs_transaction_cachep; | 72 | struct kmem_cache *btrfs_transaction_cachep; |
73 | struct kmem_cache *btrfs_bit_radix_cachep; | ||
74 | struct kmem_cache *btrfs_path_cachep; | 73 | struct kmem_cache *btrfs_path_cachep; |
75 | 74 | ||
76 | #define S_SHIFT 12 | 75 | #define S_SHIFT 12 |
@@ -234,7 +233,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
234 | } | 233 | } |
235 | 234 | ||
236 | ret = btrfs_drop_extents(trans, root, inode, start, | 235 | ret = btrfs_drop_extents(trans, root, inode, start, |
237 | aligned_end, start, &hint_byte); | 236 | aligned_end, aligned_end, start, &hint_byte); |
238 | BUG_ON(ret); | 237 | BUG_ON(ret); |
239 | 238 | ||
240 | if (isize > actual_end) | 239 | if (isize > actual_end) |
@@ -1439,6 +1438,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1439 | struct inode *inode, u64 file_pos, | 1438 | struct inode *inode, u64 file_pos, |
1440 | u64 disk_bytenr, u64 disk_num_bytes, | 1439 | u64 disk_bytenr, u64 disk_num_bytes, |
1441 | u64 num_bytes, u64 ram_bytes, | 1440 | u64 num_bytes, u64 ram_bytes, |
1441 | u64 locked_end, | ||
1442 | u8 compression, u8 encryption, | 1442 | u8 compression, u8 encryption, |
1443 | u16 other_encoding, int extent_type) | 1443 | u16 other_encoding, int extent_type) |
1444 | { | 1444 | { |
@@ -1455,7 +1455,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1455 | 1455 | ||
1456 | path->leave_spinning = 1; | 1456 | path->leave_spinning = 1; |
1457 | ret = btrfs_drop_extents(trans, root, inode, file_pos, | 1457 | ret = btrfs_drop_extents(trans, root, inode, file_pos, |
1458 | file_pos + num_bytes, file_pos, &hint); | 1458 | file_pos + num_bytes, locked_end, |
1459 | file_pos, &hint); | ||
1459 | BUG_ON(ret); | 1460 | BUG_ON(ret); |
1460 | 1461 | ||
1461 | ins.objectid = inode->i_ino; | 1462 | ins.objectid = inode->i_ino; |
@@ -1590,6 +1591,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1590 | ordered_extent->disk_len, | 1591 | ordered_extent->disk_len, |
1591 | ordered_extent->len, | 1592 | ordered_extent->len, |
1592 | ordered_extent->len, | 1593 | ordered_extent->len, |
1594 | ordered_extent->file_offset + | ||
1595 | ordered_extent->len, | ||
1593 | compressed, 0, 0, | 1596 | compressed, 0, 0, |
1594 | BTRFS_FILE_EXTENT_REG); | 1597 | BTRFS_FILE_EXTENT_REG); |
1595 | BUG_ON(ret); | 1598 | BUG_ON(ret); |
@@ -1819,10 +1822,12 @@ good: | |||
1819 | return 0; | 1822 | return 0; |
1820 | 1823 | ||
1821 | zeroit: | 1824 | zeroit: |
1822 | printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " | 1825 | if (printk_ratelimit()) { |
1823 | "private %llu\n", page->mapping->host->i_ino, | 1826 | printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u " |
1824 | (unsigned long long)start, csum, | 1827 | "private %llu\n", page->mapping->host->i_ino, |
1825 | (unsigned long long)private); | 1828 | (unsigned long long)start, csum, |
1829 | (unsigned long long)private); | ||
1830 | } | ||
1826 | memset(kaddr + offset, 1, end - start + 1); | 1831 | memset(kaddr + offset, 1, end - start + 1); |
1827 | flush_dcache_page(page); | 1832 | flush_dcache_page(page); |
1828 | kunmap_atomic(kaddr, KM_USER0); | 1833 | kunmap_atomic(kaddr, KM_USER0); |
@@ -2011,6 +2016,57 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2011 | } | 2016 | } |
2012 | 2017 | ||
2013 | /* | 2018 | /* |
2019 | * very simple check to peek ahead in the leaf looking for xattrs. If we | ||
2020 | * don't find any xattrs, we know there can't be any acls. | ||
2021 | * | ||
2022 | * slot is the slot the inode is in, objectid is the objectid of the inode | ||
2023 | */ | ||
2024 | static noinline int acls_after_inode_item(struct extent_buffer *leaf, | ||
2025 | int slot, u64 objectid) | ||
2026 | { | ||
2027 | u32 nritems = btrfs_header_nritems(leaf); | ||
2028 | struct btrfs_key found_key; | ||
2029 | int scanned = 0; | ||
2030 | |||
2031 | slot++; | ||
2032 | while (slot < nritems) { | ||
2033 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | ||
2034 | |||
2035 | /* we found a different objectid, there must not be acls */ | ||
2036 | if (found_key.objectid != objectid) | ||
2037 | return 0; | ||
2038 | |||
2039 | /* we found an xattr, assume we've got an acl */ | ||
2040 | if (found_key.type == BTRFS_XATTR_ITEM_KEY) | ||
2041 | return 1; | ||
2042 | |||
2043 | /* | ||
2044 | * we found a key greater than an xattr key, there can't | ||
2045 | * be any acls later on | ||
2046 | */ | ||
2047 | if (found_key.type > BTRFS_XATTR_ITEM_KEY) | ||
2048 | return 0; | ||
2049 | |||
2050 | slot++; | ||
2051 | scanned++; | ||
2052 | |||
2053 | /* | ||
2054 | * it goes inode, inode backrefs, xattrs, extents, | ||
2055 | * so if there are a ton of hard links to an inode there can | ||
2056 | * be a lot of backrefs. Don't waste time searching too hard, | ||
2057 | * this is just an optimization | ||
2058 | */ | ||
2059 | if (scanned >= 8) | ||
2060 | break; | ||
2061 | } | ||
2062 | /* we hit the end of the leaf before we found an xattr or | ||
2063 | * something larger than an xattr. We have to assume the inode | ||
2064 | * has acls | ||
2065 | */ | ||
2066 | return 1; | ||
2067 | } | ||
2068 | |||
2069 | /* | ||
2014 | * read an inode from the btree into the in-memory inode | 2070 | * read an inode from the btree into the in-memory inode |
2015 | */ | 2071 | */ |
2016 | void btrfs_read_locked_inode(struct inode *inode) | 2072 | void btrfs_read_locked_inode(struct inode *inode) |
@@ -2021,6 +2077,7 @@ void btrfs_read_locked_inode(struct inode *inode) | |||
2021 | struct btrfs_timespec *tspec; | 2077 | struct btrfs_timespec *tspec; |
2022 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2078 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2023 | struct btrfs_key location; | 2079 | struct btrfs_key location; |
2080 | int maybe_acls; | ||
2024 | u64 alloc_group_block; | 2081 | u64 alloc_group_block; |
2025 | u32 rdev; | 2082 | u32 rdev; |
2026 | int ret; | 2083 | int ret; |
@@ -2067,6 +2124,16 @@ void btrfs_read_locked_inode(struct inode *inode) | |||
2067 | 2124 | ||
2068 | alloc_group_block = btrfs_inode_block_group(leaf, inode_item); | 2125 | alloc_group_block = btrfs_inode_block_group(leaf, inode_item); |
2069 | 2126 | ||
2127 | /* | ||
2128 | * try to precache a NULL acl entry for files that don't have | ||
2129 | * any xattrs or acls | ||
2130 | */ | ||
2131 | maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); | ||
2132 | if (!maybe_acls) { | ||
2133 | BTRFS_I(inode)->i_acl = NULL; | ||
2134 | BTRFS_I(inode)->i_default_acl = NULL; | ||
2135 | } | ||
2136 | |||
2070 | BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, | 2137 | BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, |
2071 | alloc_group_block, 0); | 2138 | alloc_group_block, 0); |
2072 | btrfs_free_path(path); | 2139 | btrfs_free_path(path); |
@@ -2877,6 +2944,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
2877 | err = btrfs_drop_extents(trans, root, inode, | 2944 | err = btrfs_drop_extents(trans, root, inode, |
2878 | cur_offset, | 2945 | cur_offset, |
2879 | cur_offset + hole_size, | 2946 | cur_offset + hole_size, |
2947 | block_end, | ||
2880 | cur_offset, &hint_byte); | 2948 | cur_offset, &hint_byte); |
2881 | if (err) | 2949 | if (err) |
2882 | break; | 2950 | break; |
@@ -3041,8 +3109,8 @@ static noinline void init_btrfs_i(struct inode *inode) | |||
3041 | { | 3109 | { |
3042 | struct btrfs_inode *bi = BTRFS_I(inode); | 3110 | struct btrfs_inode *bi = BTRFS_I(inode); |
3043 | 3111 | ||
3044 | bi->i_acl = NULL; | 3112 | bi->i_acl = BTRFS_ACL_NOT_CACHED; |
3045 | bi->i_default_acl = NULL; | 3113 | bi->i_default_acl = BTRFS_ACL_NOT_CACHED; |
3046 | 3114 | ||
3047 | bi->generation = 0; | 3115 | bi->generation = 0; |
3048 | bi->sequence = 0; | 3116 | bi->sequence = 0; |
@@ -4634,47 +4702,36 @@ void btrfs_destroy_cachep(void) | |||
4634 | kmem_cache_destroy(btrfs_trans_handle_cachep); | 4702 | kmem_cache_destroy(btrfs_trans_handle_cachep); |
4635 | if (btrfs_transaction_cachep) | 4703 | if (btrfs_transaction_cachep) |
4636 | kmem_cache_destroy(btrfs_transaction_cachep); | 4704 | kmem_cache_destroy(btrfs_transaction_cachep); |
4637 | if (btrfs_bit_radix_cachep) | ||
4638 | kmem_cache_destroy(btrfs_bit_radix_cachep); | ||
4639 | if (btrfs_path_cachep) | 4705 | if (btrfs_path_cachep) |
4640 | kmem_cache_destroy(btrfs_path_cachep); | 4706 | kmem_cache_destroy(btrfs_path_cachep); |
4641 | } | 4707 | } |
4642 | 4708 | ||
4643 | struct kmem_cache *btrfs_cache_create(const char *name, size_t size, | ||
4644 | unsigned long extra_flags, | ||
4645 | void (*ctor)(void *)) | ||
4646 | { | ||
4647 | return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT | | ||
4648 | SLAB_MEM_SPREAD | extra_flags), ctor); | ||
4649 | } | ||
4650 | |||
4651 | int btrfs_init_cachep(void) | 4709 | int btrfs_init_cachep(void) |
4652 | { | 4710 | { |
4653 | btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache", | 4711 | btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache", |
4654 | sizeof(struct btrfs_inode), | 4712 | sizeof(struct btrfs_inode), 0, |
4655 | 0, init_once); | 4713 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once); |
4656 | if (!btrfs_inode_cachep) | 4714 | if (!btrfs_inode_cachep) |
4657 | goto fail; | 4715 | goto fail; |
4658 | btrfs_trans_handle_cachep = | 4716 | |
4659 | btrfs_cache_create("btrfs_trans_handle_cache", | 4717 | btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache", |
4660 | sizeof(struct btrfs_trans_handle), | 4718 | sizeof(struct btrfs_trans_handle), 0, |
4661 | 0, NULL); | 4719 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); |
4662 | if (!btrfs_trans_handle_cachep) | 4720 | if (!btrfs_trans_handle_cachep) |
4663 | goto fail; | 4721 | goto fail; |
4664 | btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache", | 4722 | |
4665 | sizeof(struct btrfs_transaction), | 4723 | btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache", |
4666 | 0, NULL); | 4724 | sizeof(struct btrfs_transaction), 0, |
4725 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | ||
4667 | if (!btrfs_transaction_cachep) | 4726 | if (!btrfs_transaction_cachep) |
4668 | goto fail; | 4727 | goto fail; |
4669 | btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache", | 4728 | |
4670 | sizeof(struct btrfs_path), | 4729 | btrfs_path_cachep = kmem_cache_create("btrfs_path_cache", |
4671 | 0, NULL); | 4730 | sizeof(struct btrfs_path), 0, |
4731 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | ||
4672 | if (!btrfs_path_cachep) | 4732 | if (!btrfs_path_cachep) |
4673 | goto fail; | 4733 | goto fail; |
4674 | btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256, | 4734 | |
4675 | SLAB_DESTROY_BY_RCU, NULL); | ||
4676 | if (!btrfs_bit_radix_cachep) | ||
4677 | goto fail; | ||
4678 | return 0; | 4735 | return 0; |
4679 | fail: | 4736 | fail: |
4680 | btrfs_destroy_cachep(); | 4737 | btrfs_destroy_cachep(); |
@@ -4970,10 +5027,10 @@ out_fail: | |||
4970 | return err; | 5027 | return err; |
4971 | } | 5028 | } |
4972 | 5029 | ||
4973 | static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | 5030 | static int prealloc_file_range(struct btrfs_trans_handle *trans, |
4974 | u64 alloc_hint, int mode) | 5031 | struct inode *inode, u64 start, u64 end, |
5032 | u64 locked_end, u64 alloc_hint, int mode) | ||
4975 | { | 5033 | { |
4976 | struct btrfs_trans_handle *trans; | ||
4977 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5034 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4978 | struct btrfs_key ins; | 5035 | struct btrfs_key ins; |
4979 | u64 alloc_size; | 5036 | u64 alloc_size; |
@@ -4981,10 +5038,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | |||
4981 | u64 num_bytes = end - start; | 5038 | u64 num_bytes = end - start; |
4982 | int ret = 0; | 5039 | int ret = 0; |
4983 | 5040 | ||
4984 | trans = btrfs_join_transaction(root, 1); | ||
4985 | BUG_ON(!trans); | ||
4986 | btrfs_set_trans_block_group(trans, inode); | ||
4987 | |||
4988 | while (num_bytes > 0) { | 5041 | while (num_bytes > 0) { |
4989 | alloc_size = min(num_bytes, root->fs_info->max_extent); | 5042 | alloc_size = min(num_bytes, root->fs_info->max_extent); |
4990 | ret = btrfs_reserve_extent(trans, root, alloc_size, | 5043 | ret = btrfs_reserve_extent(trans, root, alloc_size, |
@@ -4997,7 +5050,8 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | |||
4997 | ret = insert_reserved_file_extent(trans, inode, | 5050 | ret = insert_reserved_file_extent(trans, inode, |
4998 | cur_offset, ins.objectid, | 5051 | cur_offset, ins.objectid, |
4999 | ins.offset, ins.offset, | 5052 | ins.offset, ins.offset, |
5000 | ins.offset, 0, 0, 0, | 5053 | ins.offset, locked_end, |
5054 | 0, 0, 0, | ||
5001 | BTRFS_FILE_EXTENT_PREALLOC); | 5055 | BTRFS_FILE_EXTENT_PREALLOC); |
5002 | BUG_ON(ret); | 5056 | BUG_ON(ret); |
5003 | num_bytes -= ins.offset; | 5057 | num_bytes -= ins.offset; |
@@ -5015,7 +5069,6 @@ out: | |||
5015 | BUG_ON(ret); | 5069 | BUG_ON(ret); |
5016 | } | 5070 | } |
5017 | 5071 | ||
5018 | btrfs_end_transaction(trans, root); | ||
5019 | return ret; | 5072 | return ret; |
5020 | } | 5073 | } |
5021 | 5074 | ||
@@ -5027,13 +5080,21 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5027 | u64 alloc_start; | 5080 | u64 alloc_start; |
5028 | u64 alloc_end; | 5081 | u64 alloc_end; |
5029 | u64 alloc_hint = 0; | 5082 | u64 alloc_hint = 0; |
5083 | u64 locked_end; | ||
5030 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | 5084 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; |
5031 | struct extent_map *em; | 5085 | struct extent_map *em; |
5086 | struct btrfs_trans_handle *trans; | ||
5032 | int ret; | 5087 | int ret; |
5033 | 5088 | ||
5034 | alloc_start = offset & ~mask; | 5089 | alloc_start = offset & ~mask; |
5035 | alloc_end = (offset + len + mask) & ~mask; | 5090 | alloc_end = (offset + len + mask) & ~mask; |
5036 | 5091 | ||
5092 | /* | ||
5093 | * wait for ordered IO before we have any locks. We'll loop again | ||
5094 | * below with the locks held. | ||
5095 | */ | ||
5096 | btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); | ||
5097 | |||
5037 | mutex_lock(&inode->i_mutex); | 5098 | mutex_lock(&inode->i_mutex); |
5038 | if (alloc_start > inode->i_size) { | 5099 | if (alloc_start > inode->i_size) { |
5039 | ret = btrfs_cont_expand(inode, alloc_start); | 5100 | ret = btrfs_cont_expand(inode, alloc_start); |
@@ -5041,10 +5102,21 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5041 | goto out; | 5102 | goto out; |
5042 | } | 5103 | } |
5043 | 5104 | ||
5105 | locked_end = alloc_end - 1; | ||
5044 | while (1) { | 5106 | while (1) { |
5045 | struct btrfs_ordered_extent *ordered; | 5107 | struct btrfs_ordered_extent *ordered; |
5046 | lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, | 5108 | |
5047 | alloc_end - 1, GFP_NOFS); | 5109 | trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); |
5110 | if (!trans) { | ||
5111 | ret = -EIO; | ||
5112 | goto out; | ||
5113 | } | ||
5114 | |||
5115 | /* the extent lock is ordered inside the running | ||
5116 | * transaction | ||
5117 | */ | ||
5118 | lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | ||
5119 | GFP_NOFS); | ||
5048 | ordered = btrfs_lookup_first_ordered_extent(inode, | 5120 | ordered = btrfs_lookup_first_ordered_extent(inode, |
5049 | alloc_end - 1); | 5121 | alloc_end - 1); |
5050 | if (ordered && | 5122 | if (ordered && |
@@ -5052,7 +5124,13 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5052 | ordered->file_offset < alloc_end) { | 5124 | ordered->file_offset < alloc_end) { |
5053 | btrfs_put_ordered_extent(ordered); | 5125 | btrfs_put_ordered_extent(ordered); |
5054 | unlock_extent(&BTRFS_I(inode)->io_tree, | 5126 | unlock_extent(&BTRFS_I(inode)->io_tree, |
5055 | alloc_start, alloc_end - 1, GFP_NOFS); | 5127 | alloc_start, locked_end, GFP_NOFS); |
5128 | btrfs_end_transaction(trans, BTRFS_I(inode)->root); | ||
5129 | |||
5130 | /* | ||
5131 | * we can't wait on the range with the transaction | ||
5132 | * running or with the extent lock held | ||
5133 | */ | ||
5056 | btrfs_wait_ordered_range(inode, alloc_start, | 5134 | btrfs_wait_ordered_range(inode, alloc_start, |
5057 | alloc_end - alloc_start); | 5135 | alloc_end - alloc_start); |
5058 | } else { | 5136 | } else { |
@@ -5070,8 +5148,9 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5070 | last_byte = min(extent_map_end(em), alloc_end); | 5148 | last_byte = min(extent_map_end(em), alloc_end); |
5071 | last_byte = (last_byte + mask) & ~mask; | 5149 | last_byte = (last_byte + mask) & ~mask; |
5072 | if (em->block_start == EXTENT_MAP_HOLE) { | 5150 | if (em->block_start == EXTENT_MAP_HOLE) { |
5073 | ret = prealloc_file_range(inode, cur_offset, | 5151 | ret = prealloc_file_range(trans, inode, cur_offset, |
5074 | last_byte, alloc_hint, mode); | 5152 | last_byte, locked_end + 1, |
5153 | alloc_hint, mode); | ||
5075 | if (ret < 0) { | 5154 | if (ret < 0) { |
5076 | free_extent_map(em); | 5155 | free_extent_map(em); |
5077 | break; | 5156 | break; |
@@ -5087,8 +5166,10 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5087 | break; | 5166 | break; |
5088 | } | 5167 | } |
5089 | } | 5168 | } |
5090 | unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, | 5169 | unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, |
5091 | GFP_NOFS); | 5170 | GFP_NOFS); |
5171 | |||
5172 | btrfs_end_transaction(trans, BTRFS_I(inode)->root); | ||
5092 | out: | 5173 | out: |
5093 | mutex_unlock(&inode->i_mutex); | 5174 | mutex_unlock(&inode->i_mutex); |
5094 | return ret; | 5175 | return ret; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7594bec1be10..5e94ea6e1cbe 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -461,15 +461,9 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) | |||
461 | if (!capable(CAP_SYS_ADMIN)) | 461 | if (!capable(CAP_SYS_ADMIN)) |
462 | return -EPERM; | 462 | return -EPERM; |
463 | 463 | ||
464 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | 464 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
465 | 465 | if (IS_ERR(vol_args)) | |
466 | if (!vol_args) | 466 | return PTR_ERR(vol_args); |
467 | return -ENOMEM; | ||
468 | |||
469 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
470 | ret = -EFAULT; | ||
471 | goto out; | ||
472 | } | ||
473 | 467 | ||
474 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 468 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
475 | namelen = strlen(vol_args->name); | 469 | namelen = strlen(vol_args->name); |
@@ -483,11 +477,13 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) | |||
483 | *devstr = '\0'; | 477 | *devstr = '\0'; |
484 | devstr = vol_args->name; | 478 | devstr = vol_args->name; |
485 | devid = simple_strtoull(devstr, &end, 10); | 479 | devid = simple_strtoull(devstr, &end, 10); |
486 | printk(KERN_INFO "resizing devid %llu\n", devid); | 480 | printk(KERN_INFO "resizing devid %llu\n", |
481 | (unsigned long long)devid); | ||
487 | } | 482 | } |
488 | device = btrfs_find_device(root, devid, NULL, NULL); | 483 | device = btrfs_find_device(root, devid, NULL, NULL); |
489 | if (!device) { | 484 | if (!device) { |
490 | printk(KERN_INFO "resizer unable to find device %llu\n", devid); | 485 | printk(KERN_INFO "resizer unable to find device %llu\n", |
486 | (unsigned long long)devid); | ||
491 | ret = -EINVAL; | 487 | ret = -EINVAL; |
492 | goto out_unlock; | 488 | goto out_unlock; |
493 | } | 489 | } |
@@ -545,7 +541,6 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg) | |||
545 | 541 | ||
546 | out_unlock: | 542 | out_unlock: |
547 | mutex_unlock(&root->fs_info->volume_mutex); | 543 | mutex_unlock(&root->fs_info->volume_mutex); |
548 | out: | ||
549 | kfree(vol_args); | 544 | kfree(vol_args); |
550 | return ret; | 545 | return ret; |
551 | } | 546 | } |
@@ -565,15 +560,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file, | |||
565 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 560 | if (root->fs_info->sb->s_flags & MS_RDONLY) |
566 | return -EROFS; | 561 | return -EROFS; |
567 | 562 | ||
568 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | 563 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
569 | 564 | if (IS_ERR(vol_args)) | |
570 | if (!vol_args) | 565 | return PTR_ERR(vol_args); |
571 | return -ENOMEM; | ||
572 | |||
573 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
574 | ret = -EFAULT; | ||
575 | goto out; | ||
576 | } | ||
577 | 566 | ||
578 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 567 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
579 | namelen = strlen(vol_args->name); | 568 | namelen = strlen(vol_args->name); |
@@ -675,19 +664,13 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) | |||
675 | if (!capable(CAP_SYS_ADMIN)) | 664 | if (!capable(CAP_SYS_ADMIN)) |
676 | return -EPERM; | 665 | return -EPERM; |
677 | 666 | ||
678 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | 667 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
668 | if (IS_ERR(vol_args)) | ||
669 | return PTR_ERR(vol_args); | ||
679 | 670 | ||
680 | if (!vol_args) | ||
681 | return -ENOMEM; | ||
682 | |||
683 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
684 | ret = -EFAULT; | ||
685 | goto out; | ||
686 | } | ||
687 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 671 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
688 | ret = btrfs_init_new_device(root, vol_args->name); | 672 | ret = btrfs_init_new_device(root, vol_args->name); |
689 | 673 | ||
690 | out: | ||
691 | kfree(vol_args); | 674 | kfree(vol_args); |
692 | return ret; | 675 | return ret; |
693 | } | 676 | } |
@@ -703,19 +686,13 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) | |||
703 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 686 | if (root->fs_info->sb->s_flags & MS_RDONLY) |
704 | return -EROFS; | 687 | return -EROFS; |
705 | 688 | ||
706 | vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS); | 689 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
690 | if (IS_ERR(vol_args)) | ||
691 | return PTR_ERR(vol_args); | ||
707 | 692 | ||
708 | if (!vol_args) | ||
709 | return -ENOMEM; | ||
710 | |||
711 | if (copy_from_user(vol_args, arg, sizeof(*vol_args))) { | ||
712 | ret = -EFAULT; | ||
713 | goto out; | ||
714 | } | ||
715 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; | 693 | vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; |
716 | ret = btrfs_rm_device(root, vol_args->name); | 694 | ret = btrfs_rm_device(root, vol_args->name); |
717 | 695 | ||
718 | out: | ||
719 | kfree(vol_args); | 696 | kfree(vol_args); |
720 | return ret; | 697 | return ret; |
721 | } | 698 | } |
@@ -830,7 +807,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
830 | BUG_ON(!trans); | 807 | BUG_ON(!trans); |
831 | 808 | ||
832 | /* punch hole in destination first */ | 809 | /* punch hole in destination first */ |
833 | btrfs_drop_extents(trans, root, inode, off, off+len, 0, &hint_byte); | 810 | btrfs_drop_extents(trans, root, inode, off, off + len, |
811 | off + len, 0, &hint_byte); | ||
834 | 812 | ||
835 | /* clone data */ | 813 | /* clone data */ |
836 | key.objectid = src->i_ino; | 814 | key.objectid = src->i_ino; |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 53c87b197d70..d6f0806c682f 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -489,7 +489,7 @@ again: | |||
489 | /* start IO across the range first to instantiate any delalloc | 489 | /* start IO across the range first to instantiate any delalloc |
490 | * extents | 490 | * extents |
491 | */ | 491 | */ |
492 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_NONE); | 492 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); |
493 | 493 | ||
494 | /* The compression code will leave pages locked but return from | 494 | /* The compression code will leave pages locked but return from |
495 | * writepage without setting the page writeback. Starting again | 495 | * writepage without setting the page writeback. Starting again |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9744af9d71e9..3536bdb2d7cb 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -68,7 +68,7 @@ enum { | |||
68 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, | 68 | Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, |
69 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, | 69 | Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, |
70 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog, | 70 | Opt_ssd, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_notreelog, |
71 | Opt_flushoncommit, Opt_err, | 71 | Opt_ratio, Opt_flushoncommit, Opt_err, |
72 | }; | 72 | }; |
73 | 73 | ||
74 | static match_table_t tokens = { | 74 | static match_table_t tokens = { |
@@ -87,6 +87,7 @@ static match_table_t tokens = { | |||
87 | {Opt_noacl, "noacl"}, | 87 | {Opt_noacl, "noacl"}, |
88 | {Opt_notreelog, "notreelog"}, | 88 | {Opt_notreelog, "notreelog"}, |
89 | {Opt_flushoncommit, "flushoncommit"}, | 89 | {Opt_flushoncommit, "flushoncommit"}, |
90 | {Opt_ratio, "metadata_ratio=%d"}, | ||
90 | {Opt_err, NULL}, | 91 | {Opt_err, NULL}, |
91 | }; | 92 | }; |
92 | 93 | ||
@@ -195,7 +196,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
195 | info->max_extent = max_t(u64, | 196 | info->max_extent = max_t(u64, |
196 | info->max_extent, root->sectorsize); | 197 | info->max_extent, root->sectorsize); |
197 | printk(KERN_INFO "btrfs: max_extent at %llu\n", | 198 | printk(KERN_INFO "btrfs: max_extent at %llu\n", |
198 | info->max_extent); | 199 | (unsigned long long)info->max_extent); |
199 | } | 200 | } |
200 | break; | 201 | break; |
201 | case Opt_max_inline: | 202 | case Opt_max_inline: |
@@ -210,7 +211,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
210 | root->sectorsize); | 211 | root->sectorsize); |
211 | } | 212 | } |
212 | printk(KERN_INFO "btrfs: max_inline at %llu\n", | 213 | printk(KERN_INFO "btrfs: max_inline at %llu\n", |
213 | info->max_inline); | 214 | (unsigned long long)info->max_inline); |
214 | } | 215 | } |
215 | break; | 216 | break; |
216 | case Opt_alloc_start: | 217 | case Opt_alloc_start: |
@@ -220,7 +221,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
220 | kfree(num); | 221 | kfree(num); |
221 | printk(KERN_INFO | 222 | printk(KERN_INFO |
222 | "btrfs: allocations start at %llu\n", | 223 | "btrfs: allocations start at %llu\n", |
223 | info->alloc_start); | 224 | (unsigned long long)info->alloc_start); |
224 | } | 225 | } |
225 | break; | 226 | break; |
226 | case Opt_noacl: | 227 | case Opt_noacl: |
@@ -234,6 +235,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
234 | printk(KERN_INFO "btrfs: turning on flush-on-commit\n"); | 235 | printk(KERN_INFO "btrfs: turning on flush-on-commit\n"); |
235 | btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); | 236 | btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT); |
236 | break; | 237 | break; |
238 | case Opt_ratio: | ||
239 | intarg = 0; | ||
240 | match_int(&args[0], &intarg); | ||
241 | if (intarg) { | ||
242 | info->metadata_ratio = intarg; | ||
243 | printk(KERN_INFO "btrfs: metadata ratio %d\n", | ||
244 | info->metadata_ratio); | ||
245 | } | ||
246 | break; | ||
237 | default: | 247 | default: |
238 | break; | 248 | break; |
239 | } | 249 | } |
@@ -410,11 +420,14 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
410 | if (btrfs_test_opt(root, NOBARRIER)) | 420 | if (btrfs_test_opt(root, NOBARRIER)) |
411 | seq_puts(seq, ",nobarrier"); | 421 | seq_puts(seq, ",nobarrier"); |
412 | if (info->max_extent != (u64)-1) | 422 | if (info->max_extent != (u64)-1) |
413 | seq_printf(seq, ",max_extent=%llu", info->max_extent); | 423 | seq_printf(seq, ",max_extent=%llu", |
424 | (unsigned long long)info->max_extent); | ||
414 | if (info->max_inline != 8192 * 1024) | 425 | if (info->max_inline != 8192 * 1024) |
415 | seq_printf(seq, ",max_inline=%llu", info->max_inline); | 426 | seq_printf(seq, ",max_inline=%llu", |
427 | (unsigned long long)info->max_inline); | ||
416 | if (info->alloc_start != 0) | 428 | if (info->alloc_start != 0) |
417 | seq_printf(seq, ",alloc_start=%llu", info->alloc_start); | 429 | seq_printf(seq, ",alloc_start=%llu", |
430 | (unsigned long long)info->alloc_start); | ||
418 | if (info->thread_pool_size != min_t(unsigned long, | 431 | if (info->thread_pool_size != min_t(unsigned long, |
419 | num_online_cpus() + 2, 8)) | 432 | num_online_cpus() + 2, 8)) |
420 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); | 433 | seq_printf(seq, ",thread_pool=%d", info->thread_pool_size); |
@@ -635,14 +648,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, | |||
635 | if (!capable(CAP_SYS_ADMIN)) | 648 | if (!capable(CAP_SYS_ADMIN)) |
636 | return -EPERM; | 649 | return -EPERM; |
637 | 650 | ||
638 | vol = kmalloc(sizeof(*vol), GFP_KERNEL); | 651 | vol = memdup_user((void __user *)arg, sizeof(*vol)); |
639 | if (!vol) | 652 | if (IS_ERR(vol)) |
640 | return -ENOMEM; | 653 | return PTR_ERR(vol); |
641 | |||
642 | if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) { | ||
643 | ret = -EFAULT; | ||
644 | goto out; | ||
645 | } | ||
646 | 654 | ||
647 | switch (cmd) { | 655 | switch (cmd) { |
648 | case BTRFS_IOC_SCAN_DEV: | 656 | case BTRFS_IOC_SCAN_DEV: |
@@ -650,7 +658,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, | |||
650 | &btrfs_fs_type, &fs_devices); | 658 | &btrfs_fs_type, &fs_devices); |
651 | break; | 659 | break; |
652 | } | 660 | } |
653 | out: | 661 | |
654 | kfree(vol); | 662 | kfree(vol); |
655 | return ret; | 663 | return ret; |
656 | } | 664 | } |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2869b3361eb6..01b143605ec1 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -687,7 +687,13 @@ static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) | |||
687 | prepare_to_wait(&info->transaction_wait, &wait, | 687 | prepare_to_wait(&info->transaction_wait, &wait, |
688 | TASK_UNINTERRUPTIBLE); | 688 | TASK_UNINTERRUPTIBLE); |
689 | mutex_unlock(&info->trans_mutex); | 689 | mutex_unlock(&info->trans_mutex); |
690 | |||
691 | atomic_dec(&info->throttles); | ||
692 | wake_up(&info->transaction_throttle); | ||
693 | |||
690 | schedule(); | 694 | schedule(); |
695 | |||
696 | atomic_inc(&info->throttles); | ||
691 | mutex_lock(&info->trans_mutex); | 697 | mutex_lock(&info->trans_mutex); |
692 | finish_wait(&info->transaction_wait, &wait); | 698 | finish_wait(&info->transaction_wait, &wait); |
693 | } | 699 | } |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 25f20ea11f27..db5e212e8445 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -536,7 +536,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
536 | saved_nbytes = inode_get_bytes(inode); | 536 | saved_nbytes = inode_get_bytes(inode); |
537 | /* drop any overlapping extents */ | 537 | /* drop any overlapping extents */ |
538 | ret = btrfs_drop_extents(trans, root, inode, | 538 | ret = btrfs_drop_extents(trans, root, inode, |
539 | start, extent_end, start, &alloc_hint); | 539 | start, extent_end, extent_end, start, &alloc_hint); |
540 | BUG_ON(ret); | 540 | BUG_ON(ret); |
541 | 541 | ||
542 | if (found_type == BTRFS_FILE_EXTENT_REG || | 542 | if (found_type == BTRFS_FILE_EXTENT_REG || |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e0913e469728..5f01dad4b696 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -125,6 +125,20 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) | |||
125 | return NULL; | 125 | return NULL; |
126 | } | 126 | } |
127 | 127 | ||
128 | static void requeue_list(struct btrfs_pending_bios *pending_bios, | ||
129 | struct bio *head, struct bio *tail) | ||
130 | { | ||
131 | |||
132 | struct bio *old_head; | ||
133 | |||
134 | old_head = pending_bios->head; | ||
135 | pending_bios->head = head; | ||
136 | if (pending_bios->tail) | ||
137 | tail->bi_next = old_head; | ||
138 | else | ||
139 | pending_bios->tail = tail; | ||
140 | } | ||
141 | |||
128 | /* | 142 | /* |
129 | * we try to collect pending bios for a device so we don't get a large | 143 | * we try to collect pending bios for a device so we don't get a large |
130 | * number of procs sending bios down to the same device. This greatly | 144 | * number of procs sending bios down to the same device. This greatly |
@@ -141,10 +155,12 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
141 | struct bio *pending; | 155 | struct bio *pending; |
142 | struct backing_dev_info *bdi; | 156 | struct backing_dev_info *bdi; |
143 | struct btrfs_fs_info *fs_info; | 157 | struct btrfs_fs_info *fs_info; |
158 | struct btrfs_pending_bios *pending_bios; | ||
144 | struct bio *tail; | 159 | struct bio *tail; |
145 | struct bio *cur; | 160 | struct bio *cur; |
146 | int again = 0; | 161 | int again = 0; |
147 | unsigned long num_run = 0; | 162 | unsigned long num_run; |
163 | unsigned long num_sync_run; | ||
148 | unsigned long limit; | 164 | unsigned long limit; |
149 | unsigned long last_waited = 0; | 165 | unsigned long last_waited = 0; |
150 | 166 | ||
@@ -153,20 +169,30 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) | |||
153 | limit = btrfs_async_submit_limit(fs_info); | 169 | limit = btrfs_async_submit_limit(fs_info); |
154 | limit = limit * 2 / 3; | 170 | limit = limit * 2 / 3; |
155 | 171 | ||
172 | /* we want to make sure that every time we switch from the sync | ||
173 | * list to the normal list, we unplug | ||
174 | */ | ||
175 | num_sync_run = 0; | ||
176 | |||
156 | loop: | 177 | loop: |
157 | spin_lock(&device->io_lock); | 178 | spin_lock(&device->io_lock); |
179 | num_run = 0; | ||
158 | 180 | ||
159 | loop_lock: | 181 | loop_lock: |
182 | |||
160 | /* take all the bios off the list at once and process them | 183 | /* take all the bios off the list at once and process them |
161 | * later on (without the lock held). But, remember the | 184 | * later on (without the lock held). But, remember the |
162 | * tail and other pointers so the bios can be properly reinserted | 185 | * tail and other pointers so the bios can be properly reinserted |
163 | * into the list if we hit congestion | 186 | * into the list if we hit congestion |
164 | */ | 187 | */ |
165 | pending = device->pending_bios; | 188 | if (device->pending_sync_bios.head) |
166 | tail = device->pending_bio_tail; | 189 | pending_bios = &device->pending_sync_bios; |
190 | else | ||
191 | pending_bios = &device->pending_bios; | ||
192 | |||
193 | pending = pending_bios->head; | ||
194 | tail = pending_bios->tail; | ||
167 | WARN_ON(pending && !tail); | 195 | WARN_ON(pending && !tail); |
168 | device->pending_bios = NULL; | ||
169 | device->pending_bio_tail = NULL; | ||
170 | 196 | ||
171 | /* | 197 | /* |
172 | * if pending was null this time around, no bios need processing | 198 | * if pending was null this time around, no bios need processing |
@@ -176,16 +202,41 @@ loop_lock: | |||
176 | * device->running_pending is used to synchronize with the | 202 | * device->running_pending is used to synchronize with the |
177 | * schedule_bio code. | 203 | * schedule_bio code. |
178 | */ | 204 | */ |
179 | if (pending) { | 205 | if (device->pending_sync_bios.head == NULL && |
180 | again = 1; | 206 | device->pending_bios.head == NULL) { |
181 | device->running_pending = 1; | ||
182 | } else { | ||
183 | again = 0; | 207 | again = 0; |
184 | device->running_pending = 0; | 208 | device->running_pending = 0; |
209 | } else { | ||
210 | again = 1; | ||
211 | device->running_pending = 1; | ||
185 | } | 212 | } |
213 | |||
214 | pending_bios->head = NULL; | ||
215 | pending_bios->tail = NULL; | ||
216 | |||
186 | spin_unlock(&device->io_lock); | 217 | spin_unlock(&device->io_lock); |
187 | 218 | ||
219 | /* | ||
220 | * if we're doing the regular priority list, make sure we unplug | ||
221 | * for any high prio bios we've sent down | ||
222 | */ | ||
223 | if (pending_bios == &device->pending_bios && num_sync_run > 0) { | ||
224 | num_sync_run = 0; | ||
225 | blk_run_backing_dev(bdi, NULL); | ||
226 | } | ||
227 | |||
188 | while (pending) { | 228 | while (pending) { |
229 | |||
230 | rmb(); | ||
231 | if (pending_bios != &device->pending_sync_bios && | ||
232 | device->pending_sync_bios.head && | ||
233 | num_run > 16) { | ||
234 | cond_resched(); | ||
235 | spin_lock(&device->io_lock); | ||
236 | requeue_list(pending_bios, pending, tail); | ||
237 | goto loop_lock; | ||
238 | } | ||
239 | |||
189 | cur = pending; | 240 | cur = pending; |
190 | pending = pending->bi_next; | 241 | pending = pending->bi_next; |
191 | cur->bi_next = NULL; | 242 | cur->bi_next = NULL; |
@@ -196,10 +247,18 @@ loop_lock: | |||
196 | wake_up(&fs_info->async_submit_wait); | 247 | wake_up(&fs_info->async_submit_wait); |
197 | 248 | ||
198 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); | 249 | BUG_ON(atomic_read(&cur->bi_cnt) == 0); |
199 | bio_get(cur); | ||
200 | submit_bio(cur->bi_rw, cur); | 250 | submit_bio(cur->bi_rw, cur); |
201 | bio_put(cur); | ||
202 | num_run++; | 251 | num_run++; |
252 | if (bio_sync(cur)) | ||
253 | num_sync_run++; | ||
254 | |||
255 | if (need_resched()) { | ||
256 | if (num_sync_run) { | ||
257 | blk_run_backing_dev(bdi, NULL); | ||
258 | num_sync_run = 0; | ||
259 | } | ||
260 | cond_resched(); | ||
261 | } | ||
203 | 262 | ||
204 | /* | 263 | /* |
205 | * we made progress, there is more work to do and the bdi | 264 | * we made progress, there is more work to do and the bdi |
@@ -208,7 +267,6 @@ loop_lock: | |||
208 | */ | 267 | */ |
209 | if (pending && bdi_write_congested(bdi) && num_run > 16 && | 268 | if (pending && bdi_write_congested(bdi) && num_run > 16 && |
210 | fs_info->fs_devices->open_devices > 1) { | 269 | fs_info->fs_devices->open_devices > 1) { |
211 | struct bio *old_head; | ||
212 | struct io_context *ioc; | 270 | struct io_context *ioc; |
213 | 271 | ||
214 | ioc = current->io_context; | 272 | ioc = current->io_context; |
@@ -233,17 +291,17 @@ loop_lock: | |||
233 | * against it before looping | 291 | * against it before looping |
234 | */ | 292 | */ |
235 | last_waited = ioc->last_waited; | 293 | last_waited = ioc->last_waited; |
294 | if (need_resched()) { | ||
295 | if (num_sync_run) { | ||
296 | blk_run_backing_dev(bdi, NULL); | ||
297 | num_sync_run = 0; | ||
298 | } | ||
299 | cond_resched(); | ||
300 | } | ||
236 | continue; | 301 | continue; |
237 | } | 302 | } |
238 | spin_lock(&device->io_lock); | 303 | spin_lock(&device->io_lock); |
239 | 304 | requeue_list(pending_bios, pending, tail); | |
240 | old_head = device->pending_bios; | ||
241 | device->pending_bios = pending; | ||
242 | if (device->pending_bio_tail) | ||
243 | tail->bi_next = old_head; | ||
244 | else | ||
245 | device->pending_bio_tail = tail; | ||
246 | |||
247 | device->running_pending = 1; | 305 | device->running_pending = 1; |
248 | 306 | ||
249 | spin_unlock(&device->io_lock); | 307 | spin_unlock(&device->io_lock); |
@@ -251,11 +309,18 @@ loop_lock: | |||
251 | goto done; | 309 | goto done; |
252 | } | 310 | } |
253 | } | 311 | } |
312 | |||
313 | if (num_sync_run) { | ||
314 | num_sync_run = 0; | ||
315 | blk_run_backing_dev(bdi, NULL); | ||
316 | } | ||
317 | |||
318 | cond_resched(); | ||
254 | if (again) | 319 | if (again) |
255 | goto loop; | 320 | goto loop; |
256 | 321 | ||
257 | spin_lock(&device->io_lock); | 322 | spin_lock(&device->io_lock); |
258 | if (device->pending_bios) | 323 | if (device->pending_bios.head || device->pending_sync_bios.head) |
259 | goto loop_lock; | 324 | goto loop_lock; |
260 | spin_unlock(&device->io_lock); | 325 | spin_unlock(&device->io_lock); |
261 | 326 | ||
@@ -1478,7 +1543,7 @@ static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, | |||
1478 | btrfs_set_device_io_align(leaf, dev_item, device->io_align); | 1543 | btrfs_set_device_io_align(leaf, dev_item, device->io_align); |
1479 | btrfs_set_device_io_width(leaf, dev_item, device->io_width); | 1544 | btrfs_set_device_io_width(leaf, dev_item, device->io_width); |
1480 | btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); | 1545 | btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); |
1481 | btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); | 1546 | btrfs_set_device_total_bytes(leaf, dev_item, device->disk_total_bytes); |
1482 | btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); | 1547 | btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); |
1483 | btrfs_mark_buffer_dirty(leaf); | 1548 | btrfs_mark_buffer_dirty(leaf); |
1484 | 1549 | ||
@@ -1875,14 +1940,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
1875 | device->total_bytes = new_size; | 1940 | device->total_bytes = new_size; |
1876 | if (device->writeable) | 1941 | if (device->writeable) |
1877 | device->fs_devices->total_rw_bytes -= diff; | 1942 | device->fs_devices->total_rw_bytes -= diff; |
1878 | ret = btrfs_update_device(trans, device); | ||
1879 | if (ret) { | ||
1880 | unlock_chunks(root); | ||
1881 | btrfs_end_transaction(trans, root); | ||
1882 | goto done; | ||
1883 | } | ||
1884 | WARN_ON(diff > old_total); | ||
1885 | btrfs_set_super_total_bytes(super_copy, old_total - diff); | ||
1886 | unlock_chunks(root); | 1943 | unlock_chunks(root); |
1887 | btrfs_end_transaction(trans, root); | 1944 | btrfs_end_transaction(trans, root); |
1888 | 1945 | ||
@@ -1914,7 +1971,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
1914 | length = btrfs_dev_extent_length(l, dev_extent); | 1971 | length = btrfs_dev_extent_length(l, dev_extent); |
1915 | 1972 | ||
1916 | if (key.offset + length <= new_size) | 1973 | if (key.offset + length <= new_size) |
1917 | goto done; | 1974 | break; |
1918 | 1975 | ||
1919 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); | 1976 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); |
1920 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); | 1977 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); |
@@ -1927,6 +1984,26 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
1927 | goto done; | 1984 | goto done; |
1928 | } | 1985 | } |
1929 | 1986 | ||
1987 | /* Shrinking succeeded, else we would be at "done". */ | ||
1988 | trans = btrfs_start_transaction(root, 1); | ||
1989 | if (!trans) { | ||
1990 | ret = -ENOMEM; | ||
1991 | goto done; | ||
1992 | } | ||
1993 | lock_chunks(root); | ||
1994 | |||
1995 | device->disk_total_bytes = new_size; | ||
1996 | /* Now btrfs_update_device() will change the on-disk size. */ | ||
1997 | ret = btrfs_update_device(trans, device); | ||
1998 | if (ret) { | ||
1999 | unlock_chunks(root); | ||
2000 | btrfs_end_transaction(trans, root); | ||
2001 | goto done; | ||
2002 | } | ||
2003 | WARN_ON(diff > old_total); | ||
2004 | btrfs_set_super_total_bytes(super_copy, old_total - diff); | ||
2005 | unlock_chunks(root); | ||
2006 | btrfs_end_transaction(trans, root); | ||
1930 | done: | 2007 | done: |
1931 | btrfs_free_path(path); | 2008 | btrfs_free_path(path); |
1932 | return ret; | 2009 | return ret; |
@@ -2497,7 +2574,7 @@ again: | |||
2497 | max_errors = 1; | 2574 | max_errors = 1; |
2498 | } | 2575 | } |
2499 | } | 2576 | } |
2500 | if (multi_ret && rw == WRITE && | 2577 | if (multi_ret && (rw & (1 << BIO_RW)) && |
2501 | stripes_allocated < stripes_required) { | 2578 | stripes_allocated < stripes_required) { |
2502 | stripes_allocated = map->num_stripes; | 2579 | stripes_allocated = map->num_stripes; |
2503 | free_extent_map(em); | 2580 | free_extent_map(em); |
@@ -2762,6 +2839,7 @@ static noinline int schedule_bio(struct btrfs_root *root, | |||
2762 | int rw, struct bio *bio) | 2839 | int rw, struct bio *bio) |
2763 | { | 2840 | { |
2764 | int should_queue = 1; | 2841 | int should_queue = 1; |
2842 | struct btrfs_pending_bios *pending_bios; | ||
2765 | 2843 | ||
2766 | /* don't bother with additional async steps for reads, right now */ | 2844 | /* don't bother with additional async steps for reads, right now */ |
2767 | if (!(rw & (1 << BIO_RW))) { | 2845 | if (!(rw & (1 << BIO_RW))) { |
@@ -2783,13 +2861,17 @@ static noinline int schedule_bio(struct btrfs_root *root, | |||
2783 | bio->bi_rw |= rw; | 2861 | bio->bi_rw |= rw; |
2784 | 2862 | ||
2785 | spin_lock(&device->io_lock); | 2863 | spin_lock(&device->io_lock); |
2864 | if (bio_sync(bio)) | ||
2865 | pending_bios = &device->pending_sync_bios; | ||
2866 | else | ||
2867 | pending_bios = &device->pending_bios; | ||
2786 | 2868 | ||
2787 | if (device->pending_bio_tail) | 2869 | if (pending_bios->tail) |
2788 | device->pending_bio_tail->bi_next = bio; | 2870 | pending_bios->tail->bi_next = bio; |
2789 | 2871 | ||
2790 | device->pending_bio_tail = bio; | 2872 | pending_bios->tail = bio; |
2791 | if (!device->pending_bios) | 2873 | if (!pending_bios->head) |
2792 | device->pending_bios = bio; | 2874 | pending_bios->head = bio; |
2793 | if (device->running_pending) | 2875 | if (device->running_pending) |
2794 | should_queue = 0; | 2876 | should_queue = 0; |
2795 | 2877 | ||
@@ -3006,7 +3088,8 @@ static int fill_device_from_item(struct extent_buffer *leaf, | |||
3006 | unsigned long ptr; | 3088 | unsigned long ptr; |
3007 | 3089 | ||
3008 | device->devid = btrfs_device_id(leaf, dev_item); | 3090 | device->devid = btrfs_device_id(leaf, dev_item); |
3009 | device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); | 3091 | device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item); |
3092 | device->total_bytes = device->disk_total_bytes; | ||
3010 | device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); | 3093 | device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); |
3011 | device->type = btrfs_device_type(leaf, dev_item); | 3094 | device->type = btrfs_device_type(leaf, dev_item); |
3012 | device->io_align = btrfs_device_io_align(leaf, dev_item); | 3095 | device->io_align = btrfs_device_io_align(leaf, dev_item); |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2185de72ff7d..5c3ff6d02fd7 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -23,13 +23,22 @@ | |||
23 | #include "async-thread.h" | 23 | #include "async-thread.h" |
24 | 24 | ||
25 | struct buffer_head; | 25 | struct buffer_head; |
26 | struct btrfs_pending_bios { | ||
27 | struct bio *head; | ||
28 | struct bio *tail; | ||
29 | }; | ||
30 | |||
26 | struct btrfs_device { | 31 | struct btrfs_device { |
27 | struct list_head dev_list; | 32 | struct list_head dev_list; |
28 | struct list_head dev_alloc_list; | 33 | struct list_head dev_alloc_list; |
29 | struct btrfs_fs_devices *fs_devices; | 34 | struct btrfs_fs_devices *fs_devices; |
30 | struct btrfs_root *dev_root; | 35 | struct btrfs_root *dev_root; |
31 | struct bio *pending_bios; | 36 | |
32 | struct bio *pending_bio_tail; | 37 | /* regular prio bios */ |
38 | struct btrfs_pending_bios pending_bios; | ||
39 | /* WRITE_SYNC bios */ | ||
40 | struct btrfs_pending_bios pending_sync_bios; | ||
41 | |||
33 | int running_pending; | 42 | int running_pending; |
34 | u64 generation; | 43 | u64 generation; |
35 | 44 | ||
@@ -52,6 +61,9 @@ struct btrfs_device { | |||
52 | /* size of the device */ | 61 | /* size of the device */ |
53 | u64 total_bytes; | 62 | u64 total_bytes; |
54 | 63 | ||
64 | /* size of the disk */ | ||
65 | u64 disk_total_bytes; | ||
66 | |||
55 | /* bytes used */ | 67 | /* bytes used */ |
56 | u64 bytes_used; | 68 | u64 bytes_used; |
57 | 69 | ||
diff --git a/fs/buffer.c b/fs/buffer.c index 13edf7ad3ff1..aed297739eb0 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -360,7 +360,7 @@ still_busy: | |||
360 | * Completion handler for block_write_full_page() - pages which are unlocked | 360 | * Completion handler for block_write_full_page() - pages which are unlocked |
361 | * during I/O, and which have PageWriteback cleared upon I/O completion. | 361 | * during I/O, and which have PageWriteback cleared upon I/O completion. |
362 | */ | 362 | */ |
363 | static void end_buffer_async_write(struct buffer_head *bh, int uptodate) | 363 | void end_buffer_async_write(struct buffer_head *bh, int uptodate) |
364 | { | 364 | { |
365 | char b[BDEVNAME_SIZE]; | 365 | char b[BDEVNAME_SIZE]; |
366 | unsigned long flags; | 366 | unsigned long flags; |
@@ -438,11 +438,17 @@ static void mark_buffer_async_read(struct buffer_head *bh) | |||
438 | set_buffer_async_read(bh); | 438 | set_buffer_async_read(bh); |
439 | } | 439 | } |
440 | 440 | ||
441 | void mark_buffer_async_write(struct buffer_head *bh) | 441 | void mark_buffer_async_write_endio(struct buffer_head *bh, |
442 | bh_end_io_t *handler) | ||
442 | { | 443 | { |
443 | bh->b_end_io = end_buffer_async_write; | 444 | bh->b_end_io = handler; |
444 | set_buffer_async_write(bh); | 445 | set_buffer_async_write(bh); |
445 | } | 446 | } |
447 | |||
448 | void mark_buffer_async_write(struct buffer_head *bh) | ||
449 | { | ||
450 | mark_buffer_async_write_endio(bh, end_buffer_async_write); | ||
451 | } | ||
446 | EXPORT_SYMBOL(mark_buffer_async_write); | 452 | EXPORT_SYMBOL(mark_buffer_async_write); |
447 | 453 | ||
448 | 454 | ||
@@ -547,7 +553,7 @@ repeat: | |||
547 | return err; | 553 | return err; |
548 | } | 554 | } |
549 | 555 | ||
550 | void do_thaw_all(unsigned long unused) | 556 | void do_thaw_all(struct work_struct *work) |
551 | { | 557 | { |
552 | struct super_block *sb; | 558 | struct super_block *sb; |
553 | char b[BDEVNAME_SIZE]; | 559 | char b[BDEVNAME_SIZE]; |
@@ -567,6 +573,7 @@ restart: | |||
567 | goto restart; | 573 | goto restart; |
568 | } | 574 | } |
569 | spin_unlock(&sb_lock); | 575 | spin_unlock(&sb_lock); |
576 | kfree(work); | ||
570 | printk(KERN_WARNING "Emergency Thaw complete\n"); | 577 | printk(KERN_WARNING "Emergency Thaw complete\n"); |
571 | } | 578 | } |
572 | 579 | ||
@@ -577,7 +584,13 @@ restart: | |||
577 | */ | 584 | */ |
578 | void emergency_thaw_all(void) | 585 | void emergency_thaw_all(void) |
579 | { | 586 | { |
580 | pdflush_operation(do_thaw_all, 0); | 587 | struct work_struct *work; |
588 | |||
589 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | ||
590 | if (work) { | ||
591 | INIT_WORK(work, do_thaw_all); | ||
592 | schedule_work(work); | ||
593 | } | ||
581 | } | 594 | } |
582 | 595 | ||
583 | /** | 596 | /** |
@@ -1608,7 +1621,8 @@ EXPORT_SYMBOL(unmap_underlying_metadata); | |||
1608 | * unplugging the device queue. | 1621 | * unplugging the device queue. |
1609 | */ | 1622 | */ |
1610 | static int __block_write_full_page(struct inode *inode, struct page *page, | 1623 | static int __block_write_full_page(struct inode *inode, struct page *page, |
1611 | get_block_t *get_block, struct writeback_control *wbc) | 1624 | get_block_t *get_block, struct writeback_control *wbc, |
1625 | bh_end_io_t *handler) | ||
1612 | { | 1626 | { |
1613 | int err; | 1627 | int err; |
1614 | sector_t block; | 1628 | sector_t block; |
@@ -1693,7 +1707,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1693 | continue; | 1707 | continue; |
1694 | } | 1708 | } |
1695 | if (test_clear_buffer_dirty(bh)) { | 1709 | if (test_clear_buffer_dirty(bh)) { |
1696 | mark_buffer_async_write(bh); | 1710 | mark_buffer_async_write_endio(bh, handler); |
1697 | } else { | 1711 | } else { |
1698 | unlock_buffer(bh); | 1712 | unlock_buffer(bh); |
1699 | } | 1713 | } |
@@ -1746,7 +1760,7 @@ recover: | |||
1746 | if (buffer_mapped(bh) && buffer_dirty(bh) && | 1760 | if (buffer_mapped(bh) && buffer_dirty(bh) && |
1747 | !buffer_delay(bh)) { | 1761 | !buffer_delay(bh)) { |
1748 | lock_buffer(bh); | 1762 | lock_buffer(bh); |
1749 | mark_buffer_async_write(bh); | 1763 | mark_buffer_async_write_endio(bh, handler); |
1750 | } else { | 1764 | } else { |
1751 | /* | 1765 | /* |
1752 | * The buffer may have been set dirty during | 1766 | * The buffer may have been set dirty during |
@@ -2383,7 +2397,8 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
2383 | if ((page->mapping != inode->i_mapping) || | 2397 | if ((page->mapping != inode->i_mapping) || |
2384 | (page_offset(page) > size)) { | 2398 | (page_offset(page) > size)) { |
2385 | /* page got truncated out from underneath us */ | 2399 | /* page got truncated out from underneath us */ |
2386 | goto out_unlock; | 2400 | unlock_page(page); |
2401 | goto out; | ||
2387 | } | 2402 | } |
2388 | 2403 | ||
2389 | /* page is wholly or partially inside EOF */ | 2404 | /* page is wholly or partially inside EOF */ |
@@ -2397,14 +2412,15 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
2397 | ret = block_commit_write(page, 0, end); | 2412 | ret = block_commit_write(page, 0, end); |
2398 | 2413 | ||
2399 | if (unlikely(ret)) { | 2414 | if (unlikely(ret)) { |
2415 | unlock_page(page); | ||
2400 | if (ret == -ENOMEM) | 2416 | if (ret == -ENOMEM) |
2401 | ret = VM_FAULT_OOM; | 2417 | ret = VM_FAULT_OOM; |
2402 | else /* -ENOSPC, -EIO, etc */ | 2418 | else /* -ENOSPC, -EIO, etc */ |
2403 | ret = VM_FAULT_SIGBUS; | 2419 | ret = VM_FAULT_SIGBUS; |
2404 | } | 2420 | } else |
2421 | ret = VM_FAULT_LOCKED; | ||
2405 | 2422 | ||
2406 | out_unlock: | 2423 | out: |
2407 | unlock_page(page); | ||
2408 | return ret; | 2424 | return ret; |
2409 | } | 2425 | } |
2410 | 2426 | ||
@@ -2672,7 +2688,8 @@ int nobh_writepage(struct page *page, get_block_t *get_block, | |||
2672 | out: | 2688 | out: |
2673 | ret = mpage_writepage(page, get_block, wbc); | 2689 | ret = mpage_writepage(page, get_block, wbc); |
2674 | if (ret == -EAGAIN) | 2690 | if (ret == -EAGAIN) |
2675 | ret = __block_write_full_page(inode, page, get_block, wbc); | 2691 | ret = __block_write_full_page(inode, page, get_block, wbc, |
2692 | end_buffer_async_write); | ||
2676 | return ret; | 2693 | return ret; |
2677 | } | 2694 | } |
2678 | EXPORT_SYMBOL(nobh_writepage); | 2695 | EXPORT_SYMBOL(nobh_writepage); |
@@ -2830,9 +2847,10 @@ out: | |||
2830 | 2847 | ||
2831 | /* | 2848 | /* |
2832 | * The generic ->writepage function for buffer-backed address_spaces | 2849 | * The generic ->writepage function for buffer-backed address_spaces |
2850 | * this form passes in the end_io handler used to finish the IO. | ||
2833 | */ | 2851 | */ |
2834 | int block_write_full_page(struct page *page, get_block_t *get_block, | 2852 | int block_write_full_page_endio(struct page *page, get_block_t *get_block, |
2835 | struct writeback_control *wbc) | 2853 | struct writeback_control *wbc, bh_end_io_t *handler) |
2836 | { | 2854 | { |
2837 | struct inode * const inode = page->mapping->host; | 2855 | struct inode * const inode = page->mapping->host; |
2838 | loff_t i_size = i_size_read(inode); | 2856 | loff_t i_size = i_size_read(inode); |
@@ -2841,7 +2859,8 @@ int block_write_full_page(struct page *page, get_block_t *get_block, | |||
2841 | 2859 | ||
2842 | /* Is the page fully inside i_size? */ | 2860 | /* Is the page fully inside i_size? */ |
2843 | if (page->index < end_index) | 2861 | if (page->index < end_index) |
2844 | return __block_write_full_page(inode, page, get_block, wbc); | 2862 | return __block_write_full_page(inode, page, get_block, wbc, |
2863 | handler); | ||
2845 | 2864 | ||
2846 | /* Is the page fully outside i_size? (truncate in progress) */ | 2865 | /* Is the page fully outside i_size? (truncate in progress) */ |
2847 | offset = i_size & (PAGE_CACHE_SIZE-1); | 2866 | offset = i_size & (PAGE_CACHE_SIZE-1); |
@@ -2864,9 +2883,20 @@ int block_write_full_page(struct page *page, get_block_t *get_block, | |||
2864 | * writes to that region are not written out to the file." | 2883 | * writes to that region are not written out to the file." |
2865 | */ | 2884 | */ |
2866 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); | 2885 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); |
2867 | return __block_write_full_page(inode, page, get_block, wbc); | 2886 | return __block_write_full_page(inode, page, get_block, wbc, handler); |
2887 | } | ||
2888 | |||
2889 | /* | ||
2890 | * The generic ->writepage function for buffer-backed address_spaces | ||
2891 | */ | ||
2892 | int block_write_full_page(struct page *page, get_block_t *get_block, | ||
2893 | struct writeback_control *wbc) | ||
2894 | { | ||
2895 | return block_write_full_page_endio(page, get_block, wbc, | ||
2896 | end_buffer_async_write); | ||
2868 | } | 2897 | } |
2869 | 2898 | ||
2899 | |||
2870 | sector_t generic_block_bmap(struct address_space *mapping, sector_t block, | 2900 | sector_t generic_block_bmap(struct address_space *mapping, sector_t block, |
2871 | get_block_t *get_block) | 2901 | get_block_t *get_block) |
2872 | { | 2902 | { |
@@ -3335,9 +3365,11 @@ EXPORT_SYMBOL(block_read_full_page); | |||
3335 | EXPORT_SYMBOL(block_sync_page); | 3365 | EXPORT_SYMBOL(block_sync_page); |
3336 | EXPORT_SYMBOL(block_truncate_page); | 3366 | EXPORT_SYMBOL(block_truncate_page); |
3337 | EXPORT_SYMBOL(block_write_full_page); | 3367 | EXPORT_SYMBOL(block_write_full_page); |
3368 | EXPORT_SYMBOL(block_write_full_page_endio); | ||
3338 | EXPORT_SYMBOL(cont_write_begin); | 3369 | EXPORT_SYMBOL(cont_write_begin); |
3339 | EXPORT_SYMBOL(end_buffer_read_sync); | 3370 | EXPORT_SYMBOL(end_buffer_read_sync); |
3340 | EXPORT_SYMBOL(end_buffer_write_sync); | 3371 | EXPORT_SYMBOL(end_buffer_write_sync); |
3372 | EXPORT_SYMBOL(end_buffer_async_write); | ||
3341 | EXPORT_SYMBOL(file_fsync); | 3373 | EXPORT_SYMBOL(file_fsync); |
3342 | EXPORT_SYMBOL(generic_block_bmap); | 3374 | EXPORT_SYMBOL(generic_block_bmap); |
3343 | EXPORT_SYMBOL(generic_cont_expand_simple); | 3375 | EXPORT_SYMBOL(generic_cont_expand_simple); |
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 65984006192c..9d1fb6ec8a5a 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES | |||
@@ -15,7 +15,8 @@ Posix file open support added (turned off after one attempt if server | |||
15 | fails to support it properly, as with Samba server versions prior to 3.3.2) | 15 | fails to support it properly, as with Samba server versions prior to 3.3.2) |
16 | Fix "redzone overwritten" bug in cifs_put_tcon (CIFSTcon may allocate too | 16 | Fix "redzone overwritten" bug in cifs_put_tcon (CIFSTcon may allocate too |
17 | little memory for the "nativeFileSystem" field returned by the server | 17 | little memory for the "nativeFileSystem" field returned by the server |
18 | during mount). | 18 | during mount). Endian convert inode numbers if necessary (makes it easier |
19 | to compare inode numbers on network files from big endian systems). | ||
19 | 20 | ||
20 | Version 1.56 | 21 | Version 1.56 |
21 | ------------ | 22 | ------------ |
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 3fd3a9df043a..67bf93a40d2e 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c | |||
@@ -41,7 +41,7 @@ cifs_spnego_key_instantiate(struct key *key, const void *data, size_t datalen) | |||
41 | 41 | ||
42 | /* attach the data */ | 42 | /* attach the data */ |
43 | memcpy(payload, data, datalen); | 43 | memcpy(payload, data, datalen); |
44 | rcu_assign_pointer(key->payload.data, payload); | 44 | key->payload.data = payload; |
45 | ret = 0; | 45 | ret = 0; |
46 | 46 | ||
47 | error: | 47 | error: |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 38491fd3871d..0d6d8b573652 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -66,9 +66,6 @@ unsigned int sign_CIFS_PDUs = 1; | |||
66 | extern struct task_struct *oplockThread; /* remove sparse warning */ | 66 | extern struct task_struct *oplockThread; /* remove sparse warning */ |
67 | struct task_struct *oplockThread = NULL; | 67 | struct task_struct *oplockThread = NULL; |
68 | /* extern struct task_struct * dnotifyThread; remove sparse warning */ | 68 | /* extern struct task_struct * dnotifyThread; remove sparse warning */ |
69 | #ifdef CONFIG_CIFS_EXPERIMENTAL | ||
70 | static struct task_struct *dnotifyThread = NULL; | ||
71 | #endif | ||
72 | static const struct super_operations cifs_super_ops; | 69 | static const struct super_operations cifs_super_ops; |
73 | unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE; | 70 | unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE; |
74 | module_param(CIFSMaxBufSize, int, 0); | 71 | module_param(CIFSMaxBufSize, int, 0); |
@@ -316,6 +313,7 @@ cifs_alloc_inode(struct super_block *sb) | |||
316 | cifs_inode->clientCanCacheAll = false; | 313 | cifs_inode->clientCanCacheAll = false; |
317 | cifs_inode->delete_pending = false; | 314 | cifs_inode->delete_pending = false; |
318 | cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ | 315 | cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ |
316 | cifs_inode->server_eof = 0; | ||
319 | 317 | ||
320 | /* Can not set i_flags here - they get immediately overwritten | 318 | /* Can not set i_flags here - they get immediately overwritten |
321 | to zero by the VFS */ | 319 | to zero by the VFS */ |
@@ -1040,34 +1038,6 @@ static int cifs_oplock_thread(void *dummyarg) | |||
1040 | return 0; | 1038 | return 0; |
1041 | } | 1039 | } |
1042 | 1040 | ||
1043 | #ifdef CONFIG_CIFS_EXPERIMENTAL | ||
1044 | static int cifs_dnotify_thread(void *dummyarg) | ||
1045 | { | ||
1046 | struct list_head *tmp; | ||
1047 | struct TCP_Server_Info *server; | ||
1048 | |||
1049 | do { | ||
1050 | if (try_to_freeze()) | ||
1051 | continue; | ||
1052 | set_current_state(TASK_INTERRUPTIBLE); | ||
1053 | schedule_timeout(15*HZ); | ||
1054 | /* check if any stuck requests that need | ||
1055 | to be woken up and wakeq so the | ||
1056 | thread can wake up and error out */ | ||
1057 | read_lock(&cifs_tcp_ses_lock); | ||
1058 | list_for_each(tmp, &cifs_tcp_ses_list) { | ||
1059 | server = list_entry(tmp, struct TCP_Server_Info, | ||
1060 | tcp_ses_list); | ||
1061 | if (atomic_read(&server->inFlight)) | ||
1062 | wake_up_all(&server->response_q); | ||
1063 | } | ||
1064 | read_unlock(&cifs_tcp_ses_lock); | ||
1065 | } while (!kthread_should_stop()); | ||
1066 | |||
1067 | return 0; | ||
1068 | } | ||
1069 | #endif | ||
1070 | |||
1071 | static int __init | 1041 | static int __init |
1072 | init_cifs(void) | 1042 | init_cifs(void) |
1073 | { | 1043 | { |
@@ -1144,21 +1114,8 @@ init_cifs(void) | |||
1144 | goto out_unregister_dfs_key_type; | 1114 | goto out_unregister_dfs_key_type; |
1145 | } | 1115 | } |
1146 | 1116 | ||
1147 | #ifdef CONFIG_CIFS_EXPERIMENTAL | ||
1148 | dnotifyThread = kthread_run(cifs_dnotify_thread, NULL, "cifsdnotifyd"); | ||
1149 | if (IS_ERR(dnotifyThread)) { | ||
1150 | rc = PTR_ERR(dnotifyThread); | ||
1151 | cERROR(1, ("error %d create dnotify thread", rc)); | ||
1152 | goto out_stop_oplock_thread; | ||
1153 | } | ||
1154 | #endif | ||
1155 | |||
1156 | return 0; | 1117 | return 0; |
1157 | 1118 | ||
1158 | #ifdef CONFIG_CIFS_EXPERIMENTAL | ||
1159 | out_stop_oplock_thread: | ||
1160 | #endif | ||
1161 | kthread_stop(oplockThread); | ||
1162 | out_unregister_dfs_key_type: | 1119 | out_unregister_dfs_key_type: |
1163 | #ifdef CONFIG_CIFS_DFS_UPCALL | 1120 | #ifdef CONFIG_CIFS_DFS_UPCALL |
1164 | unregister_key_type(&key_type_dns_resolver); | 1121 | unregister_key_type(&key_type_dns_resolver); |
@@ -1196,9 +1153,6 @@ exit_cifs(void) | |||
1196 | cifs_destroy_inodecache(); | 1153 | cifs_destroy_inodecache(); |
1197 | cifs_destroy_mids(); | 1154 | cifs_destroy_mids(); |
1198 | cifs_destroy_request_bufs(); | 1155 | cifs_destroy_request_bufs(); |
1199 | #ifdef CONFIG_CIFS_EXPERIMENTAL | ||
1200 | kthread_stop(dnotifyThread); | ||
1201 | #endif | ||
1202 | kthread_stop(oplockThread); | 1156 | kthread_stop(oplockThread); |
1203 | } | 1157 | } |
1204 | 1158 | ||
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 9fbf4dff5da6..df40ab64cd95 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -350,7 +350,7 @@ struct cifsFileInfo { | |||
350 | bool invalidHandle:1; /* file closed via session abend */ | 350 | bool invalidHandle:1; /* file closed via session abend */ |
351 | bool messageMode:1; /* for pipes: message vs byte mode */ | 351 | bool messageMode:1; /* for pipes: message vs byte mode */ |
352 | atomic_t wrtPending; /* handle in use - defer close */ | 352 | atomic_t wrtPending; /* handle in use - defer close */ |
353 | struct semaphore fh_sem; /* prevents reopen race after dead ses*/ | 353 | struct mutex fh_mutex; /* prevents reopen race after dead ses*/ |
354 | struct cifs_search_info srch_inf; | 354 | struct cifs_search_info srch_inf; |
355 | }; | 355 | }; |
356 | 356 | ||
@@ -370,6 +370,7 @@ struct cifsInodeInfo { | |||
370 | bool clientCanCacheAll:1; /* read and writebehind oplock */ | 370 | bool clientCanCacheAll:1; /* read and writebehind oplock */ |
371 | bool oplockPending:1; | 371 | bool oplockPending:1; |
372 | bool delete_pending:1; /* DELETE_ON_CLOSE is set */ | 372 | bool delete_pending:1; /* DELETE_ON_CLOSE is set */ |
373 | u64 server_eof; /* current file size on server */ | ||
373 | struct inode vfs_inode; | 374 | struct inode vfs_inode; |
374 | }; | 375 | }; |
375 | 376 | ||
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index b370489c8da5..a785f69dbc9f 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h | |||
@@ -2163,7 +2163,7 @@ typedef struct { | |||
2163 | __le32 Type; | 2163 | __le32 Type; |
2164 | __le64 DevMajor; | 2164 | __le64 DevMajor; |
2165 | __le64 DevMinor; | 2165 | __le64 DevMinor; |
2166 | __u64 UniqueId; | 2166 | __le64 UniqueId; |
2167 | __le64 Permissions; | 2167 | __le64 Permissions; |
2168 | __le64 Nlinks; | 2168 | __le64 Nlinks; |
2169 | } __attribute__((packed)) FILE_UNIX_BASIC_INFO; /* level 0x200 QPathInfo */ | 2169 | } __attribute__((packed)) FILE_UNIX_BASIC_INFO; /* level 0x200 QPathInfo */ |
@@ -2308,7 +2308,7 @@ struct unlink_psx_rq { /* level 0x20a SetPathInfo */ | |||
2308 | } __attribute__((packed)); | 2308 | } __attribute__((packed)); |
2309 | 2309 | ||
2310 | struct file_internal_info { | 2310 | struct file_internal_info { |
2311 | __u64 UniqueId; /* inode number */ | 2311 | __le64 UniqueId; /* inode number */ |
2312 | } __attribute__((packed)); /* level 0x3ee */ | 2312 | } __attribute__((packed)); /* level 0x3ee */ |
2313 | 2313 | ||
2314 | struct file_mode_info { | 2314 | struct file_mode_info { |
@@ -2338,7 +2338,7 @@ typedef struct { | |||
2338 | __le32 Type; | 2338 | __le32 Type; |
2339 | __le64 DevMajor; | 2339 | __le64 DevMajor; |
2340 | __le64 DevMinor; | 2340 | __le64 DevMinor; |
2341 | __u64 UniqueId; | 2341 | __le64 UniqueId; |
2342 | __le64 Permissions; | 2342 | __le64 Permissions; |
2343 | __le64 Nlinks; | 2343 | __le64 Nlinks; |
2344 | char FileName[1]; | 2344 | char FileName[1]; |
@@ -2386,7 +2386,7 @@ typedef struct { | |||
2386 | __le32 FileNameLength; | 2386 | __le32 FileNameLength; |
2387 | __le32 EaSize; /* EA size */ | 2387 | __le32 EaSize; /* EA size */ |
2388 | __le32 Reserved; | 2388 | __le32 Reserved; |
2389 | __u64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/ | 2389 | __le64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/ |
2390 | char FileName[1]; | 2390 | char FileName[1]; |
2391 | } __attribute__((packed)) SEARCH_ID_FULL_DIR_INFO; /* level 0x105 FF rsp data */ | 2391 | } __attribute__((packed)) SEARCH_ID_FULL_DIR_INFO; /* level 0x105 FF rsp data */ |
2392 | 2392 | ||
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index bc09c998631f..a0845dc7b8a9 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -1626,6 +1626,8 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon, | |||
1626 | int smb_hdr_len; | 1626 | int smb_hdr_len; |
1627 | int resp_buf_type = 0; | 1627 | int resp_buf_type = 0; |
1628 | 1628 | ||
1629 | *nbytes = 0; | ||
1630 | |||
1629 | cFYI(1, ("write2 at %lld %d bytes", (long long)offset, count)); | 1631 | cFYI(1, ("write2 at %lld %d bytes", (long long)offset, count)); |
1630 | 1632 | ||
1631 | if (tcon->ses->capabilities & CAP_LARGE_FILES) { | 1633 | if (tcon->ses->capabilities & CAP_LARGE_FILES) { |
@@ -1682,11 +1684,9 @@ CIFSSMBWrite2(const int xid, struct cifsTconInfo *tcon, | |||
1682 | cifs_stats_inc(&tcon->num_writes); | 1684 | cifs_stats_inc(&tcon->num_writes); |
1683 | if (rc) { | 1685 | if (rc) { |
1684 | cFYI(1, ("Send error Write2 = %d", rc)); | 1686 | cFYI(1, ("Send error Write2 = %d", rc)); |
1685 | *nbytes = 0; | ||
1686 | } else if (resp_buf_type == 0) { | 1687 | } else if (resp_buf_type == 0) { |
1687 | /* presumably this can not happen, but best to be safe */ | 1688 | /* presumably this can not happen, but best to be safe */ |
1688 | rc = -EIO; | 1689 | rc = -EIO; |
1689 | *nbytes = 0; | ||
1690 | } else { | 1690 | } else { |
1691 | WRITE_RSP *pSMBr = (WRITE_RSP *)iov[0].iov_base; | 1691 | WRITE_RSP *pSMBr = (WRITE_RSP *)iov[0].iov_base; |
1692 | *nbytes = le16_to_cpu(pSMBr->CountHigh); | 1692 | *nbytes = le16_to_cpu(pSMBr->CountHigh); |
@@ -3918,7 +3918,7 @@ GetInodeNumberRetry: | |||
3918 | } | 3918 | } |
3919 | pfinfo = (struct file_internal_info *) | 3919 | pfinfo = (struct file_internal_info *) |
3920 | (data_offset + (char *) &pSMBr->hdr.Protocol); | 3920 | (data_offset + (char *) &pSMBr->hdr.Protocol); |
3921 | *inode_number = pfinfo->UniqueId; | 3921 | *inode_number = le64_to_cpu(pfinfo->UniqueId); |
3922 | } | 3922 | } |
3923 | } | 3923 | } |
3924 | GetInodeNumOut: | 3924 | GetInodeNumOut: |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0de3b5615a22..bacdef1546b7 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -2214,9 +2214,58 @@ is_path_accessible(int xid, struct cifsTconInfo *tcon, | |||
2214 | return rc; | 2214 | return rc; |
2215 | } | 2215 | } |
2216 | 2216 | ||
2217 | static void | ||
2218 | cleanup_volume_info(struct smb_vol **pvolume_info) | ||
2219 | { | ||
2220 | struct smb_vol *volume_info; | ||
2221 | |||
2222 | if (!pvolume_info && !*pvolume_info) | ||
2223 | return; | ||
2224 | |||
2225 | volume_info = *pvolume_info; | ||
2226 | kzfree(volume_info->password); | ||
2227 | kfree(volume_info->UNC); | ||
2228 | kfree(volume_info->prepath); | ||
2229 | kfree(volume_info); | ||
2230 | *pvolume_info = NULL; | ||
2231 | return; | ||
2232 | } | ||
2233 | |||
2234 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
2235 | /* build_path_to_root returns full path to root when | ||
2236 | * we do not have an exiting connection (tcon) */ | ||
2237 | static char * | ||
2238 | build_unc_path_to_root(const struct smb_vol *volume_info, | ||
2239 | const struct cifs_sb_info *cifs_sb) | ||
2240 | { | ||
2241 | char *full_path; | ||
2242 | |||
2243 | int unc_len = strnlen(volume_info->UNC, MAX_TREE_SIZE + 1); | ||
2244 | full_path = kmalloc(unc_len + cifs_sb->prepathlen + 1, GFP_KERNEL); | ||
2245 | if (full_path == NULL) | ||
2246 | return ERR_PTR(-ENOMEM); | ||
2247 | |||
2248 | strncpy(full_path, volume_info->UNC, unc_len); | ||
2249 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) { | ||
2250 | int i; | ||
2251 | for (i = 0; i < unc_len; i++) { | ||
2252 | if (full_path[i] == '\\') | ||
2253 | full_path[i] = '/'; | ||
2254 | } | ||
2255 | } | ||
2256 | |||
2257 | if (cifs_sb->prepathlen) | ||
2258 | strncpy(full_path + unc_len, cifs_sb->prepath, | ||
2259 | cifs_sb->prepathlen); | ||
2260 | |||
2261 | full_path[unc_len + cifs_sb->prepathlen] = 0; /* add trailing null */ | ||
2262 | return full_path; | ||
2263 | } | ||
2264 | #endif | ||
2265 | |||
2217 | int | 2266 | int |
2218 | cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, | 2267 | cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, |
2219 | char *mount_data, const char *devname) | 2268 | char *mount_data_global, const char *devname) |
2220 | { | 2269 | { |
2221 | int rc = 0; | 2270 | int rc = 0; |
2222 | int xid; | 2271 | int xid; |
@@ -2225,6 +2274,13 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, | |||
2225 | struct cifsTconInfo *tcon = NULL; | 2274 | struct cifsTconInfo *tcon = NULL; |
2226 | struct TCP_Server_Info *srvTcp = NULL; | 2275 | struct TCP_Server_Info *srvTcp = NULL; |
2227 | char *full_path; | 2276 | char *full_path; |
2277 | char *mount_data = mount_data_global; | ||
2278 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
2279 | struct dfs_info3_param *referrals = NULL; | ||
2280 | unsigned int num_referrals = 0; | ||
2281 | try_mount_again: | ||
2282 | #endif | ||
2283 | full_path = NULL; | ||
2228 | 2284 | ||
2229 | xid = GetXid(); | 2285 | xid = GetXid(); |
2230 | 2286 | ||
@@ -2371,11 +2427,9 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, | |||
2371 | } | 2427 | } |
2372 | } | 2428 | } |
2373 | 2429 | ||
2374 | /* check for null share name ie connect to dfs root */ | ||
2375 | if ((strchr(volume_info->UNC + 3, '\\') == NULL) | 2430 | if ((strchr(volume_info->UNC + 3, '\\') == NULL) |
2376 | && (strchr(volume_info->UNC + 3, '/') == NULL)) { | 2431 | && (strchr(volume_info->UNC + 3, '/') == NULL)) { |
2377 | /* rc = connect_to_dfs_path(...) */ | 2432 | cERROR(1, ("Missing share name")); |
2378 | cFYI(1, ("DFS root not supported")); | ||
2379 | rc = -ENODEV; | 2433 | rc = -ENODEV; |
2380 | goto mount_fail_check; | 2434 | goto mount_fail_check; |
2381 | } else { | 2435 | } else { |
@@ -2392,7 +2446,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, | |||
2392 | } | 2446 | } |
2393 | } | 2447 | } |
2394 | if (rc) | 2448 | if (rc) |
2395 | goto mount_fail_check; | 2449 | goto remote_path_check; |
2396 | tcon->seal = volume_info->seal; | 2450 | tcon->seal = volume_info->seal; |
2397 | write_lock(&cifs_tcp_ses_lock); | 2451 | write_lock(&cifs_tcp_ses_lock); |
2398 | list_add(&tcon->tcon_list, &pSesInfo->tcon_list); | 2452 | list_add(&tcon->tcon_list, &pSesInfo->tcon_list); |
@@ -2417,19 +2471,9 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, | |||
2417 | /* BB FIXME fix time_gran to be larger for LANMAN sessions */ | 2471 | /* BB FIXME fix time_gran to be larger for LANMAN sessions */ |
2418 | sb->s_time_gran = 100; | 2472 | sb->s_time_gran = 100; |
2419 | 2473 | ||
2420 | mount_fail_check: | 2474 | if (rc) |
2421 | /* on error free sesinfo and tcon struct if needed */ | 2475 | goto remote_path_check; |
2422 | if (rc) { | 2476 | |
2423 | /* If find_unc succeeded then rc == 0 so we can not end */ | ||
2424 | /* up accidently freeing someone elses tcon struct */ | ||
2425 | if (tcon) | ||
2426 | cifs_put_tcon(tcon); | ||
2427 | else if (pSesInfo) | ||
2428 | cifs_put_smb_ses(pSesInfo); | ||
2429 | else | ||
2430 | cifs_put_tcp_session(srvTcp); | ||
2431 | goto out; | ||
2432 | } | ||
2433 | cifs_sb->tcon = tcon; | 2477 | cifs_sb->tcon = tcon; |
2434 | 2478 | ||
2435 | /* do not care if following two calls succeed - informational */ | 2479 | /* do not care if following two calls succeed - informational */ |
@@ -2461,7 +2505,9 @@ mount_fail_check: | |||
2461 | cifs_sb->rsize = min(cifs_sb->rsize, | 2505 | cifs_sb->rsize = min(cifs_sb->rsize, |
2462 | (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE)); | 2506 | (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE)); |
2463 | 2507 | ||
2464 | if (!rc && cifs_sb->prepathlen) { | 2508 | remote_path_check: |
2509 | /* check if a whole path (including prepath) is not remote */ | ||
2510 | if (!rc && cifs_sb->prepathlen && tcon) { | ||
2465 | /* build_path_to_root works only when we have a valid tcon */ | 2511 | /* build_path_to_root works only when we have a valid tcon */ |
2466 | full_path = cifs_build_path_to_root(cifs_sb); | 2512 | full_path = cifs_build_path_to_root(cifs_sb); |
2467 | if (full_path == NULL) { | 2513 | if (full_path == NULL) { |
@@ -2469,31 +2515,79 @@ mount_fail_check: | |||
2469 | goto mount_fail_check; | 2515 | goto mount_fail_check; |
2470 | } | 2516 | } |
2471 | rc = is_path_accessible(xid, tcon, cifs_sb, full_path); | 2517 | rc = is_path_accessible(xid, tcon, cifs_sb, full_path); |
2472 | if (rc) { | 2518 | if (rc != -EREMOTE) { |
2473 | cERROR(1, ("Path %s in not accessible: %d", | ||
2474 | full_path, rc)); | ||
2475 | kfree(full_path); | 2519 | kfree(full_path); |
2476 | goto mount_fail_check; | 2520 | goto mount_fail_check; |
2477 | } | 2521 | } |
2478 | kfree(full_path); | 2522 | kfree(full_path); |
2479 | } | 2523 | } |
2480 | 2524 | ||
2525 | /* get referral if needed */ | ||
2526 | if (rc == -EREMOTE) { | ||
2527 | #ifdef CONFIG_CIFS_DFS_UPCALL | ||
2528 | /* convert forward to back slashes in prepath here if needed */ | ||
2529 | if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) == 0) | ||
2530 | convert_delimiter(cifs_sb->prepath, | ||
2531 | CIFS_DIR_SEP(cifs_sb)); | ||
2532 | full_path = build_unc_path_to_root(volume_info, cifs_sb); | ||
2533 | if (IS_ERR(full_path)) { | ||
2534 | rc = PTR_ERR(full_path); | ||
2535 | goto mount_fail_check; | ||
2536 | } | ||
2537 | |||
2538 | cFYI(1, ("Getting referral for: %s", full_path)); | ||
2539 | rc = get_dfs_path(xid, pSesInfo , full_path + 1, | ||
2540 | cifs_sb->local_nls, &num_referrals, &referrals, | ||
2541 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
2542 | if (!rc && num_referrals > 0) { | ||
2543 | char *fake_devname = NULL; | ||
2544 | |||
2545 | if (mount_data != mount_data_global) | ||
2546 | kfree(mount_data); | ||
2547 | mount_data = cifs_compose_mount_options( | ||
2548 | cifs_sb->mountdata, full_path + 1, | ||
2549 | referrals, &fake_devname); | ||
2550 | kfree(fake_devname); | ||
2551 | free_dfs_info_array(referrals, num_referrals); | ||
2552 | |||
2553 | if (tcon) | ||
2554 | cifs_put_tcon(tcon); | ||
2555 | else if (pSesInfo) | ||
2556 | cifs_put_smb_ses(pSesInfo); | ||
2557 | |||
2558 | cleanup_volume_info(&volume_info); | ||
2559 | FreeXid(xid); | ||
2560 | kfree(full_path); | ||
2561 | goto try_mount_again; | ||
2562 | } | ||
2563 | #else /* No DFS support, return error on mount */ | ||
2564 | rc = -EOPNOTSUPP; | ||
2565 | #endif | ||
2566 | } | ||
2567 | |||
2568 | mount_fail_check: | ||
2569 | /* on error free sesinfo and tcon struct if needed */ | ||
2570 | if (rc) { | ||
2571 | if (mount_data != mount_data_global) | ||
2572 | kfree(mount_data); | ||
2573 | /* If find_unc succeeded then rc == 0 so we can not end */ | ||
2574 | /* up accidently freeing someone elses tcon struct */ | ||
2575 | if (tcon) | ||
2576 | cifs_put_tcon(tcon); | ||
2577 | else if (pSesInfo) | ||
2578 | cifs_put_smb_ses(pSesInfo); | ||
2579 | else | ||
2580 | cifs_put_tcp_session(srvTcp); | ||
2581 | goto out; | ||
2582 | } | ||
2583 | |||
2481 | /* volume_info->password is freed above when existing session found | 2584 | /* volume_info->password is freed above when existing session found |
2482 | (in which case it is not needed anymore) but when new sesion is created | 2585 | (in which case it is not needed anymore) but when new sesion is created |
2483 | the password ptr is put in the new session structure (in which case the | 2586 | the password ptr is put in the new session structure (in which case the |
2484 | password will be freed at unmount time) */ | 2587 | password will be freed at unmount time) */ |
2485 | out: | 2588 | out: |
2486 | /* zero out password before freeing */ | 2589 | /* zero out password before freeing */ |
2487 | if (volume_info) { | 2590 | cleanup_volume_info(&volume_info); |
2488 | if (volume_info->password != NULL) { | ||
2489 | memset(volume_info->password, 0, | ||
2490 | strlen(volume_info->password)); | ||
2491 | kfree(volume_info->password); | ||
2492 | } | ||
2493 | kfree(volume_info->UNC); | ||
2494 | kfree(volume_info->prepath); | ||
2495 | kfree(volume_info); | ||
2496 | } | ||
2497 | FreeXid(xid); | 2591 | FreeXid(xid); |
2498 | return rc; | 2592 | return rc; |
2499 | } | 2593 | } |
@@ -2673,8 +2767,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2673 | /* We look for obvious messed up bcc or strings in response so we do not go off | 2767 | /* We look for obvious messed up bcc or strings in response so we do not go off |
2674 | the end since (at least) WIN2K and Windows XP have a major bug in not null | 2768 | the end since (at least) WIN2K and Windows XP have a major bug in not null |
2675 | terminating last Unicode string in response */ | 2769 | terminating last Unicode string in response */ |
2676 | if (ses->serverOS) | 2770 | kfree(ses->serverOS); |
2677 | kfree(ses->serverOS); | ||
2678 | ses->serverOS = kzalloc(2 * (len + 1), | 2771 | ses->serverOS = kzalloc(2 * (len + 1), |
2679 | GFP_KERNEL); | 2772 | GFP_KERNEL); |
2680 | if (ses->serverOS == NULL) | 2773 | if (ses->serverOS == NULL) |
@@ -2710,8 +2803,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2710 | len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); | 2803 | len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); |
2711 | /* last string is not always null terminated | 2804 | /* last string is not always null terminated |
2712 | (for e.g. for Windows XP & 2000) */ | 2805 | (for e.g. for Windows XP & 2000) */ |
2713 | if (ses->serverDomain) | 2806 | kfree(ses->serverDomain); |
2714 | kfree(ses->serverDomain); | ||
2715 | ses->serverDomain = | 2807 | ses->serverDomain = |
2716 | kzalloc(2*(len+1), | 2808 | kzalloc(2*(len+1), |
2717 | GFP_KERNEL); | 2809 | GFP_KERNEL); |
@@ -2725,8 +2817,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2725 | ses->serverDomain[1+(2*len)] = 0; | 2817 | ses->serverDomain[1+(2*len)] = 0; |
2726 | } else { /* else no more room so create | 2818 | } else { /* else no more room so create |
2727 | dummy domain string */ | 2819 | dummy domain string */ |
2728 | if (ses->serverDomain) | 2820 | kfree(ses->serverDomain); |
2729 | kfree(ses->serverDomain); | ||
2730 | ses->serverDomain = | 2821 | ses->serverDomain = |
2731 | kzalloc(2, GFP_KERNEL); | 2822 | kzalloc(2, GFP_KERNEL); |
2732 | } | 2823 | } |
@@ -2772,8 +2863,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
2772 | bcc_ptr++; | 2863 | bcc_ptr++; |
2773 | 2864 | ||
2774 | len = strnlen(bcc_ptr, 1024); | 2865 | len = strnlen(bcc_ptr, 1024); |
2775 | if (ses->serverDomain) | 2866 | kfree(ses->serverDomain); |
2776 | kfree(ses->serverDomain); | ||
2777 | ses->serverDomain = kzalloc(len + 1, | 2867 | ses->serverDomain = kzalloc(len + 1, |
2778 | GFP_KERNEL); | 2868 | GFP_KERNEL); |
2779 | if (ses->serverDomain == NULL) | 2869 | if (ses->serverDomain == NULL) |
@@ -3013,8 +3103,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, | |||
3013 | /* We look for obvious messed up bcc or strings in response so we do not go off | 3103 | /* We look for obvious messed up bcc or strings in response so we do not go off |
3014 | the end since (at least) WIN2K and Windows XP have a major bug in not null | 3104 | the end since (at least) WIN2K and Windows XP have a major bug in not null |
3015 | terminating last Unicode string in response */ | 3105 | terminating last Unicode string in response */ |
3016 | if (ses->serverOS) | 3106 | kfree(ses->serverOS); |
3017 | kfree(ses->serverOS); | ||
3018 | ses->serverOS = | 3107 | ses->serverOS = |
3019 | kzalloc(2 * (len + 1), GFP_KERNEL); | 3108 | kzalloc(2 * (len + 1), GFP_KERNEL); |
3020 | cifs_strfromUCS_le(ses->serverOS, | 3109 | cifs_strfromUCS_le(ses->serverOS, |
@@ -3086,8 +3175,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid, | |||
3086 | if (((long) bcc_ptr + len) - (long) | 3175 | if (((long) bcc_ptr + len) - (long) |
3087 | pByteArea(smb_buffer_response) | 3176 | pByteArea(smb_buffer_response) |
3088 | <= BCC(smb_buffer_response)) { | 3177 | <= BCC(smb_buffer_response)) { |
3089 | if (ses->serverOS) | 3178 | kfree(ses->serverOS); |
3090 | kfree(ses->serverOS); | ||
3091 | ses->serverOS = | 3179 | ses->serverOS = |
3092 | kzalloc(len + 1, | 3180 | kzalloc(len + 1, |
3093 | GFP_KERNEL); | 3181 | GFP_KERNEL); |
@@ -3414,8 +3502,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
3414 | /* We look for obvious messed up bcc or strings in response so we do not go off | 3502 | /* We look for obvious messed up bcc or strings in response so we do not go off |
3415 | the end since (at least) WIN2K and Windows XP have a major bug in not null | 3503 | the end since (at least) WIN2K and Windows XP have a major bug in not null |
3416 | terminating last Unicode string in response */ | 3504 | terminating last Unicode string in response */ |
3417 | if (ses->serverOS) | 3505 | kfree(ses->serverOS); |
3418 | kfree(ses->serverOS); | ||
3419 | ses->serverOS = | 3506 | ses->serverOS = |
3420 | kzalloc(2 * (len + 1), GFP_KERNEL); | 3507 | kzalloc(2 * (len + 1), GFP_KERNEL); |
3421 | cifs_strfromUCS_le(ses->serverOS, | 3508 | cifs_strfromUCS_le(ses->serverOS, |
@@ -3448,8 +3535,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
3448 | if (remaining_words > 0) { | 3535 | if (remaining_words > 0) { |
3449 | len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); | 3536 | len = UniStrnlen((wchar_t *) bcc_ptr, remaining_words); |
3450 | /* last string not always null terminated (e.g. for Windows XP & 2000) */ | 3537 | /* last string not always null terminated (e.g. for Windows XP & 2000) */ |
3451 | if (ses->serverDomain) | 3538 | kfree(ses->serverDomain); |
3452 | kfree(ses->serverDomain); | ||
3453 | ses->serverDomain = | 3539 | ses->serverDomain = |
3454 | kzalloc(2 * | 3540 | kzalloc(2 * |
3455 | (len + | 3541 | (len + |
@@ -3476,13 +3562,11 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
3476 | = 0; | 3562 | = 0; |
3477 | } /* else no more room so create dummy domain string */ | 3563 | } /* else no more room so create dummy domain string */ |
3478 | else { | 3564 | else { |
3479 | if (ses->serverDomain) | 3565 | kfree(ses->serverDomain); |
3480 | kfree(ses->serverDomain); | ||
3481 | ses->serverDomain = kzalloc(2,GFP_KERNEL); | 3566 | ses->serverDomain = kzalloc(2,GFP_KERNEL); |
3482 | } | 3567 | } |
3483 | } else { /* no room so create dummy domain and NOS string */ | 3568 | } else { /* no room so create dummy domain and NOS string */ |
3484 | if (ses->serverDomain) | 3569 | kfree(ses->serverDomain); |
3485 | kfree(ses->serverDomain); | ||
3486 | ses->serverDomain = kzalloc(2, GFP_KERNEL); | 3570 | ses->serverDomain = kzalloc(2, GFP_KERNEL); |
3487 | kfree(ses->serverNOS); | 3571 | kfree(ses->serverNOS); |
3488 | ses->serverNOS = kzalloc(2, GFP_KERNEL); | 3572 | ses->serverNOS = kzalloc(2, GFP_KERNEL); |
@@ -3492,8 +3576,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
3492 | if (((long) bcc_ptr + len) - | 3576 | if (((long) bcc_ptr + len) - |
3493 | (long) pByteArea(smb_buffer_response) | 3577 | (long) pByteArea(smb_buffer_response) |
3494 | <= BCC(smb_buffer_response)) { | 3578 | <= BCC(smb_buffer_response)) { |
3495 | if (ses->serverOS) | 3579 | kfree(ses->serverOS); |
3496 | kfree(ses->serverOS); | ||
3497 | ses->serverOS = kzalloc(len + 1, GFP_KERNEL); | 3580 | ses->serverOS = kzalloc(len + 1, GFP_KERNEL); |
3498 | strncpy(ses->serverOS,bcc_ptr, len); | 3581 | strncpy(ses->serverOS,bcc_ptr, len); |
3499 | 3582 | ||
@@ -3512,8 +3595,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
3512 | bcc_ptr++; | 3595 | bcc_ptr++; |
3513 | 3596 | ||
3514 | len = strnlen(bcc_ptr, 1024); | 3597 | len = strnlen(bcc_ptr, 1024); |
3515 | if (ses->serverDomain) | 3598 | kfree(ses->serverDomain); |
3516 | kfree(ses->serverDomain); | ||
3517 | ses->serverDomain = | 3599 | ses->serverDomain = |
3518 | kzalloc(len+1, | 3600 | kzalloc(len+1, |
3519 | GFP_KERNEL); | 3601 | GFP_KERNEL); |
@@ -3674,16 +3756,15 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, | |||
3674 | BCC(smb_buffer_response)) { | 3756 | BCC(smb_buffer_response)) { |
3675 | kfree(tcon->nativeFileSystem); | 3757 | kfree(tcon->nativeFileSystem); |
3676 | tcon->nativeFileSystem = | 3758 | tcon->nativeFileSystem = |
3677 | kzalloc(2*(length + 1), GFP_KERNEL); | 3759 | kzalloc((4 * length) + 2, GFP_KERNEL); |
3678 | if (tcon->nativeFileSystem) | 3760 | if (tcon->nativeFileSystem) { |
3679 | cifs_strfromUCS_le( | 3761 | cifs_strfromUCS_le( |
3680 | tcon->nativeFileSystem, | 3762 | tcon->nativeFileSystem, |
3681 | (__le16 *) bcc_ptr, | 3763 | (__le16 *) bcc_ptr, |
3682 | length, nls_codepage); | 3764 | length, nls_codepage); |
3683 | bcc_ptr += 2 * length; | 3765 | cFYI(1, ("nativeFileSystem=%s", |
3684 | bcc_ptr[0] = 0; /* null terminate the string */ | 3766 | tcon->nativeFileSystem)); |
3685 | bcc_ptr[1] = 0; | 3767 | } |
3686 | bcc_ptr += 2; | ||
3687 | } | 3768 | } |
3688 | /* else do not bother copying these information fields*/ | 3769 | /* else do not bother copying these information fields*/ |
3689 | } else { | 3770 | } else { |
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 54dce78fbb73..461750e01364 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -129,12 +129,62 @@ cifs_bp_rename_retry: | |||
129 | return full_path; | 129 | return full_path; |
130 | } | 130 | } |
131 | 131 | ||
132 | static void | ||
133 | cifs_fill_fileinfo(struct inode *newinode, __u16 fileHandle, | ||
134 | struct cifsTconInfo *tcon, bool write_only) | ||
135 | { | ||
136 | int oplock = 0; | ||
137 | struct cifsFileInfo *pCifsFile; | ||
138 | struct cifsInodeInfo *pCifsInode; | ||
139 | |||
140 | pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); | ||
141 | |||
142 | if (pCifsFile == NULL) | ||
143 | return; | ||
144 | |||
145 | if (oplockEnabled) | ||
146 | oplock = REQ_OPLOCK; | ||
147 | |||
148 | pCifsFile->netfid = fileHandle; | ||
149 | pCifsFile->pid = current->tgid; | ||
150 | pCifsFile->pInode = newinode; | ||
151 | pCifsFile->invalidHandle = false; | ||
152 | pCifsFile->closePend = false; | ||
153 | mutex_init(&pCifsFile->fh_mutex); | ||
154 | mutex_init(&pCifsFile->lock_mutex); | ||
155 | INIT_LIST_HEAD(&pCifsFile->llist); | ||
156 | atomic_set(&pCifsFile->wrtPending, 0); | ||
157 | |||
158 | /* set the following in open now | ||
159 | pCifsFile->pfile = file; */ | ||
160 | write_lock(&GlobalSMBSeslock); | ||
161 | list_add(&pCifsFile->tlist, &tcon->openFileList); | ||
162 | pCifsInode = CIFS_I(newinode); | ||
163 | if (pCifsInode) { | ||
164 | /* if readable file instance put first in list*/ | ||
165 | if (write_only) | ||
166 | list_add_tail(&pCifsFile->flist, | ||
167 | &pCifsInode->openFileList); | ||
168 | else | ||
169 | list_add(&pCifsFile->flist, &pCifsInode->openFileList); | ||
170 | |||
171 | if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { | ||
172 | pCifsInode->clientCanCacheAll = true; | ||
173 | pCifsInode->clientCanCacheRead = true; | ||
174 | cFYI(1, ("Exclusive Oplock inode %p", newinode)); | ||
175 | } else if ((oplock & 0xF) == OPLOCK_READ) | ||
176 | pCifsInode->clientCanCacheRead = true; | ||
177 | } | ||
178 | write_unlock(&GlobalSMBSeslock); | ||
179 | } | ||
180 | |||
132 | int cifs_posix_open(char *full_path, struct inode **pinode, | 181 | int cifs_posix_open(char *full_path, struct inode **pinode, |
133 | struct super_block *sb, int mode, int oflags, | 182 | struct super_block *sb, int mode, int oflags, |
134 | int *poplock, __u16 *pnetfid, int xid) | 183 | int *poplock, __u16 *pnetfid, int xid) |
135 | { | 184 | { |
136 | int rc; | 185 | int rc; |
137 | __u32 oplock; | 186 | __u32 oplock; |
187 | bool write_only = false; | ||
138 | FILE_UNIX_BASIC_INFO *presp_data; | 188 | FILE_UNIX_BASIC_INFO *presp_data; |
139 | __u32 posix_flags = 0; | 189 | __u32 posix_flags = 0; |
140 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | 190 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); |
@@ -172,6 +222,8 @@ int cifs_posix_open(char *full_path, struct inode **pinode, | |||
172 | if (oflags & O_DIRECT) | 222 | if (oflags & O_DIRECT) |
173 | posix_flags |= SMB_O_DIRECT; | 223 | posix_flags |= SMB_O_DIRECT; |
174 | 224 | ||
225 | if (!(oflags & FMODE_READ)) | ||
226 | write_only = true; | ||
175 | 227 | ||
176 | rc = CIFSPOSIXCreate(xid, cifs_sb->tcon, posix_flags, mode, | 228 | rc = CIFSPOSIXCreate(xid, cifs_sb->tcon, posix_flags, mode, |
177 | pnetfid, presp_data, &oplock, full_path, | 229 | pnetfid, presp_data, &oplock, full_path, |
@@ -187,8 +239,10 @@ int cifs_posix_open(char *full_path, struct inode **pinode, | |||
187 | if (!pinode) | 239 | if (!pinode) |
188 | goto posix_open_ret; /* caller does not need info */ | 240 | goto posix_open_ret; /* caller does not need info */ |
189 | 241 | ||
190 | if (*pinode == NULL) | 242 | if (*pinode == NULL) { |
191 | *pinode = cifs_new_inode(sb, &presp_data->UniqueId); | 243 | __u64 unique_id = le64_to_cpu(presp_data->UniqueId); |
244 | *pinode = cifs_new_inode(sb, &unique_id); | ||
245 | } | ||
192 | /* else an inode was passed in. Update its info, don't create one */ | 246 | /* else an inode was passed in. Update its info, don't create one */ |
193 | 247 | ||
194 | /* We do not need to close the file if new_inode fails since | 248 | /* We do not need to close the file if new_inode fails since |
@@ -198,6 +252,8 @@ int cifs_posix_open(char *full_path, struct inode **pinode, | |||
198 | 252 | ||
199 | posix_fill_in_inode(*pinode, presp_data, 1); | 253 | posix_fill_in_inode(*pinode, presp_data, 1); |
200 | 254 | ||
255 | cifs_fill_fileinfo(*pinode, *pnetfid, cifs_sb->tcon, write_only); | ||
256 | |||
201 | posix_open_ret: | 257 | posix_open_ret: |
202 | kfree(presp_data); | 258 | kfree(presp_data); |
203 | return rc; | 259 | return rc; |
@@ -239,7 +295,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, | |||
239 | char *full_path = NULL; | 295 | char *full_path = NULL; |
240 | FILE_ALL_INFO *buf = NULL; | 296 | FILE_ALL_INFO *buf = NULL; |
241 | struct inode *newinode = NULL; | 297 | struct inode *newinode = NULL; |
242 | struct cifsInodeInfo *pCifsInode; | ||
243 | int disposition = FILE_OVERWRITE_IF; | 298 | int disposition = FILE_OVERWRITE_IF; |
244 | bool write_only = false; | 299 | bool write_only = false; |
245 | 300 | ||
@@ -410,44 +465,8 @@ cifs_create_set_dentry: | |||
410 | /* mknod case - do not leave file open */ | 465 | /* mknod case - do not leave file open */ |
411 | CIFSSMBClose(xid, tcon, fileHandle); | 466 | CIFSSMBClose(xid, tcon, fileHandle); |
412 | } else if (newinode) { | 467 | } else if (newinode) { |
413 | struct cifsFileInfo *pCifsFile = | 468 | cifs_fill_fileinfo(newinode, fileHandle, |
414 | kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); | 469 | cifs_sb->tcon, write_only); |
415 | |||
416 | if (pCifsFile == NULL) | ||
417 | goto cifs_create_out; | ||
418 | pCifsFile->netfid = fileHandle; | ||
419 | pCifsFile->pid = current->tgid; | ||
420 | pCifsFile->pInode = newinode; | ||
421 | pCifsFile->invalidHandle = false; | ||
422 | pCifsFile->closePend = false; | ||
423 | init_MUTEX(&pCifsFile->fh_sem); | ||
424 | mutex_init(&pCifsFile->lock_mutex); | ||
425 | INIT_LIST_HEAD(&pCifsFile->llist); | ||
426 | atomic_set(&pCifsFile->wrtPending, 0); | ||
427 | |||
428 | /* set the following in open now | ||
429 | pCifsFile->pfile = file; */ | ||
430 | write_lock(&GlobalSMBSeslock); | ||
431 | list_add(&pCifsFile->tlist, &tcon->openFileList); | ||
432 | pCifsInode = CIFS_I(newinode); | ||
433 | if (pCifsInode) { | ||
434 | /* if readable file instance put first in list*/ | ||
435 | if (write_only) { | ||
436 | list_add_tail(&pCifsFile->flist, | ||
437 | &pCifsInode->openFileList); | ||
438 | } else { | ||
439 | list_add(&pCifsFile->flist, | ||
440 | &pCifsInode->openFileList); | ||
441 | } | ||
442 | if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { | ||
443 | pCifsInode->clientCanCacheAll = true; | ||
444 | pCifsInode->clientCanCacheRead = true; | ||
445 | cFYI(1, ("Exclusive Oplock inode %p", | ||
446 | newinode)); | ||
447 | } else if ((oplock & 0xF) == OPLOCK_READ) | ||
448 | pCifsInode->clientCanCacheRead = true; | ||
449 | } | ||
450 | write_unlock(&GlobalSMBSeslock); | ||
451 | } | 470 | } |
452 | cifs_create_out: | 471 | cifs_create_out: |
453 | kfree(buf); | 472 | kfree(buf); |
@@ -580,17 +599,21 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, | |||
580 | return rc; | 599 | return rc; |
581 | } | 600 | } |
582 | 601 | ||
583 | |||
584 | struct dentry * | 602 | struct dentry * |
585 | cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, | 603 | cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, |
586 | struct nameidata *nd) | 604 | struct nameidata *nd) |
587 | { | 605 | { |
588 | int xid; | 606 | int xid; |
589 | int rc = 0; /* to get around spurious gcc warning, set to zero here */ | 607 | int rc = 0; /* to get around spurious gcc warning, set to zero here */ |
608 | int oplock = 0; | ||
609 | int mode; | ||
610 | __u16 fileHandle = 0; | ||
611 | bool posix_open = false; | ||
590 | struct cifs_sb_info *cifs_sb; | 612 | struct cifs_sb_info *cifs_sb; |
591 | struct cifsTconInfo *pTcon; | 613 | struct cifsTconInfo *pTcon; |
592 | struct inode *newInode = NULL; | 614 | struct inode *newInode = NULL; |
593 | char *full_path = NULL; | 615 | char *full_path = NULL; |
616 | struct file *filp; | ||
594 | 617 | ||
595 | xid = GetXid(); | 618 | xid = GetXid(); |
596 | 619 | ||
@@ -632,12 +655,37 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, | |||
632 | } | 655 | } |
633 | cFYI(1, ("Full path: %s inode = 0x%p", full_path, direntry->d_inode)); | 656 | cFYI(1, ("Full path: %s inode = 0x%p", full_path, direntry->d_inode)); |
634 | 657 | ||
635 | if (pTcon->unix_ext) | 658 | if (pTcon->unix_ext) { |
636 | rc = cifs_get_inode_info_unix(&newInode, full_path, | 659 | if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && |
637 | parent_dir_inode->i_sb, xid); | 660 | (nd->flags & LOOKUP_OPEN)) { |
638 | else | 661 | if (!((nd->intent.open.flags & O_CREAT) && |
662 | (nd->intent.open.flags & O_EXCL))) { | ||
663 | mode = nd->intent.open.create_mode & | ||
664 | ~current_umask(); | ||
665 | rc = cifs_posix_open(full_path, &newInode, | ||
666 | parent_dir_inode->i_sb, mode, | ||
667 | nd->intent.open.flags, &oplock, | ||
668 | &fileHandle, xid); | ||
669 | /* | ||
670 | * This code works around a bug in | ||
671 | * samba posix open in samba versions 3.3.1 | ||
672 | * and earlier where create works | ||
673 | * but open fails with invalid parameter. | ||
674 | * If either of these error codes are | ||
675 | * returned, follow the normal lookup. | ||
676 | * Otherwise, the error during posix open | ||
677 | * is handled. | ||
678 | */ | ||
679 | if ((rc != -EINVAL) && (rc != -EOPNOTSUPP)) | ||
680 | posix_open = true; | ||
681 | } | ||
682 | } | ||
683 | if (!posix_open) | ||
684 | rc = cifs_get_inode_info_unix(&newInode, full_path, | ||
685 | parent_dir_inode->i_sb, xid); | ||
686 | } else | ||
639 | rc = cifs_get_inode_info(&newInode, full_path, NULL, | 687 | rc = cifs_get_inode_info(&newInode, full_path, NULL, |
640 | parent_dir_inode->i_sb, xid, NULL); | 688 | parent_dir_inode->i_sb, xid, NULL); |
641 | 689 | ||
642 | if ((rc == 0) && (newInode != NULL)) { | 690 | if ((rc == 0) && (newInode != NULL)) { |
643 | if (pTcon->nocase) | 691 | if (pTcon->nocase) |
@@ -645,7 +693,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, | |||
645 | else | 693 | else |
646 | direntry->d_op = &cifs_dentry_ops; | 694 | direntry->d_op = &cifs_dentry_ops; |
647 | d_add(direntry, newInode); | 695 | d_add(direntry, newInode); |
648 | 696 | if (posix_open) | |
697 | filp = lookup_instantiate_filp(nd, direntry, NULL); | ||
649 | /* since paths are not looked up by component - the parent | 698 | /* since paths are not looked up by component - the parent |
650 | directories are presumed to be good here */ | 699 | directories are presumed to be good here */ |
651 | renew_parental_timestamps(direntry); | 700 | renew_parental_timestamps(direntry); |
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index 1e0c1bd8f2e4..df4a306f697e 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c | |||
@@ -78,7 +78,7 @@ dns_resolver_instantiate(struct key *key, const void *data, | |||
78 | } | 78 | } |
79 | 79 | ||
80 | key->type_data.x[0] = datalen; | 80 | key->type_data.x[0] = datalen; |
81 | rcu_assign_pointer(key->payload.data, ip); | 81 | key->payload.data = ip; |
82 | 82 | ||
83 | return rc; | 83 | return rc; |
84 | } | 84 | } |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 81747acca4c4..50ca088d8860 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -46,7 +46,7 @@ static inline struct cifsFileInfo *cifs_init_private( | |||
46 | memset(private_data, 0, sizeof(struct cifsFileInfo)); | 46 | memset(private_data, 0, sizeof(struct cifsFileInfo)); |
47 | private_data->netfid = netfid; | 47 | private_data->netfid = netfid; |
48 | private_data->pid = current->tgid; | 48 | private_data->pid = current->tgid; |
49 | init_MUTEX(&private_data->fh_sem); | 49 | mutex_init(&private_data->fh_mutex); |
50 | mutex_init(&private_data->lock_mutex); | 50 | mutex_init(&private_data->lock_mutex); |
51 | INIT_LIST_HEAD(&private_data->llist); | 51 | INIT_LIST_HEAD(&private_data->llist); |
52 | private_data->pfile = file; /* needed for writepage */ | 52 | private_data->pfile = file; /* needed for writepage */ |
@@ -284,35 +284,32 @@ int cifs_open(struct inode *inode, struct file *file) | |||
284 | cifs_sb = CIFS_SB(inode->i_sb); | 284 | cifs_sb = CIFS_SB(inode->i_sb); |
285 | tcon = cifs_sb->tcon; | 285 | tcon = cifs_sb->tcon; |
286 | 286 | ||
287 | if (file->f_flags & O_CREAT) { | 287 | /* search inode for this file and fill in file->private_data */ |
288 | /* search inode for this file and fill in file->private_data */ | 288 | pCifsInode = CIFS_I(file->f_path.dentry->d_inode); |
289 | pCifsInode = CIFS_I(file->f_path.dentry->d_inode); | 289 | read_lock(&GlobalSMBSeslock); |
290 | read_lock(&GlobalSMBSeslock); | 290 | list_for_each(tmp, &pCifsInode->openFileList) { |
291 | list_for_each(tmp, &pCifsInode->openFileList) { | 291 | pCifsFile = list_entry(tmp, struct cifsFileInfo, |
292 | pCifsFile = list_entry(tmp, struct cifsFileInfo, | 292 | flist); |
293 | flist); | 293 | if ((pCifsFile->pfile == NULL) && |
294 | if ((pCifsFile->pfile == NULL) && | 294 | (pCifsFile->pid == current->tgid)) { |
295 | (pCifsFile->pid == current->tgid)) { | 295 | /* mode set in cifs_create */ |
296 | /* mode set in cifs_create */ | 296 | |
297 | 297 | /* needed for writepage */ | |
298 | /* needed for writepage */ | 298 | pCifsFile->pfile = file; |
299 | pCifsFile->pfile = file; | 299 | |
300 | 300 | file->private_data = pCifsFile; | |
301 | file->private_data = pCifsFile; | 301 | break; |
302 | break; | ||
303 | } | ||
304 | } | ||
305 | read_unlock(&GlobalSMBSeslock); | ||
306 | if (file->private_data != NULL) { | ||
307 | rc = 0; | ||
308 | FreeXid(xid); | ||
309 | return rc; | ||
310 | } else { | ||
311 | if (file->f_flags & O_EXCL) | ||
312 | cERROR(1, ("could not find file instance for " | ||
313 | "new file %p", file)); | ||
314 | } | 302 | } |
315 | } | 303 | } |
304 | read_unlock(&GlobalSMBSeslock); | ||
305 | |||
306 | if (file->private_data != NULL) { | ||
307 | rc = 0; | ||
308 | FreeXid(xid); | ||
309 | return rc; | ||
310 | } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL)) | ||
311 | cERROR(1, ("could not find file instance for " | ||
312 | "new file %p", file)); | ||
316 | 313 | ||
317 | full_path = build_path_from_dentry(file->f_path.dentry); | 314 | full_path = build_path_from_dentry(file->f_path.dentry); |
318 | if (full_path == NULL) { | 315 | if (full_path == NULL) { |
@@ -500,9 +497,9 @@ static int cifs_reopen_file(struct file *file, bool can_flush) | |||
500 | return -EBADF; | 497 | return -EBADF; |
501 | 498 | ||
502 | xid = GetXid(); | 499 | xid = GetXid(); |
503 | down(&pCifsFile->fh_sem); | 500 | mutex_unlock(&pCifsFile->fh_mutex); |
504 | if (!pCifsFile->invalidHandle) { | 501 | if (!pCifsFile->invalidHandle) { |
505 | up(&pCifsFile->fh_sem); | 502 | mutex_lock(&pCifsFile->fh_mutex); |
506 | FreeXid(xid); | 503 | FreeXid(xid); |
507 | return 0; | 504 | return 0; |
508 | } | 505 | } |
@@ -533,7 +530,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush) | |||
533 | if (full_path == NULL) { | 530 | if (full_path == NULL) { |
534 | rc = -ENOMEM; | 531 | rc = -ENOMEM; |
535 | reopen_error_exit: | 532 | reopen_error_exit: |
536 | up(&pCifsFile->fh_sem); | 533 | mutex_lock(&pCifsFile->fh_mutex); |
537 | FreeXid(xid); | 534 | FreeXid(xid); |
538 | return rc; | 535 | return rc; |
539 | } | 536 | } |
@@ -575,14 +572,14 @@ reopen_error_exit: | |||
575 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | 572 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & |
576 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 573 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
577 | if (rc) { | 574 | if (rc) { |
578 | up(&pCifsFile->fh_sem); | 575 | mutex_lock(&pCifsFile->fh_mutex); |
579 | cFYI(1, ("cifs_open returned 0x%x", rc)); | 576 | cFYI(1, ("cifs_open returned 0x%x", rc)); |
580 | cFYI(1, ("oplock: %d", oplock)); | 577 | cFYI(1, ("oplock: %d", oplock)); |
581 | } else { | 578 | } else { |
582 | reopen_success: | 579 | reopen_success: |
583 | pCifsFile->netfid = netfid; | 580 | pCifsFile->netfid = netfid; |
584 | pCifsFile->invalidHandle = false; | 581 | pCifsFile->invalidHandle = false; |
585 | up(&pCifsFile->fh_sem); | 582 | mutex_lock(&pCifsFile->fh_mutex); |
586 | pCifsInode = CIFS_I(inode); | 583 | pCifsInode = CIFS_I(inode); |
587 | if (pCifsInode) { | 584 | if (pCifsInode) { |
588 | if (can_flush) { | 585 | if (can_flush) { |
@@ -971,6 +968,40 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) | |||
971 | return rc; | 968 | return rc; |
972 | } | 969 | } |
973 | 970 | ||
971 | /* | ||
972 | * Set the timeout on write requests past EOF. For some servers (Windows) | ||
973 | * these calls can be very long. | ||
974 | * | ||
975 | * If we're writing >10M past the EOF we give a 180s timeout. Anything less | ||
976 | * than that gets a 45s timeout. Writes not past EOF get 15s timeouts. | ||
977 | * The 10M cutoff is totally arbitrary. A better scheme for this would be | ||
978 | * welcome if someone wants to suggest one. | ||
979 | * | ||
980 | * We may be able to do a better job with this if there were some way to | ||
981 | * declare that a file should be sparse. | ||
982 | */ | ||
983 | static int | ||
984 | cifs_write_timeout(struct cifsInodeInfo *cifsi, loff_t offset) | ||
985 | { | ||
986 | if (offset <= cifsi->server_eof) | ||
987 | return CIFS_STD_OP; | ||
988 | else if (offset > (cifsi->server_eof + (10 * 1024 * 1024))) | ||
989 | return CIFS_VLONG_OP; | ||
990 | else | ||
991 | return CIFS_LONG_OP; | ||
992 | } | ||
993 | |||
994 | /* update the file size (if needed) after a write */ | ||
995 | static void | ||
996 | cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, | ||
997 | unsigned int bytes_written) | ||
998 | { | ||
999 | loff_t end_of_write = offset + bytes_written; | ||
1000 | |||
1001 | if (end_of_write > cifsi->server_eof) | ||
1002 | cifsi->server_eof = end_of_write; | ||
1003 | } | ||
1004 | |||
974 | ssize_t cifs_user_write(struct file *file, const char __user *write_data, | 1005 | ssize_t cifs_user_write(struct file *file, const char __user *write_data, |
975 | size_t write_size, loff_t *poffset) | 1006 | size_t write_size, loff_t *poffset) |
976 | { | 1007 | { |
@@ -981,6 +1012,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
981 | struct cifsTconInfo *pTcon; | 1012 | struct cifsTconInfo *pTcon; |
982 | int xid, long_op; | 1013 | int xid, long_op; |
983 | struct cifsFileInfo *open_file; | 1014 | struct cifsFileInfo *open_file; |
1015 | struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode); | ||
984 | 1016 | ||
985 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 1017 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
986 | 1018 | ||
@@ -1000,11 +1032,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
1000 | 1032 | ||
1001 | xid = GetXid(); | 1033 | xid = GetXid(); |
1002 | 1034 | ||
1003 | if (*poffset > file->f_path.dentry->d_inode->i_size) | 1035 | long_op = cifs_write_timeout(cifsi, *poffset); |
1004 | long_op = CIFS_VLONG_OP; /* writes past EOF take long time */ | ||
1005 | else | ||
1006 | long_op = CIFS_LONG_OP; | ||
1007 | |||
1008 | for (total_written = 0; write_size > total_written; | 1036 | for (total_written = 0; write_size > total_written; |
1009 | total_written += bytes_written) { | 1037 | total_written += bytes_written) { |
1010 | rc = -EAGAIN; | 1038 | rc = -EAGAIN; |
@@ -1048,8 +1076,10 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
1048 | FreeXid(xid); | 1076 | FreeXid(xid); |
1049 | return rc; | 1077 | return rc; |
1050 | } | 1078 | } |
1051 | } else | 1079 | } else { |
1080 | cifs_update_eof(cifsi, *poffset, bytes_written); | ||
1052 | *poffset += bytes_written; | 1081 | *poffset += bytes_written; |
1082 | } | ||
1053 | long_op = CIFS_STD_OP; /* subsequent writes fast - | 1083 | long_op = CIFS_STD_OP; /* subsequent writes fast - |
1054 | 15 seconds is plenty */ | 1084 | 15 seconds is plenty */ |
1055 | } | 1085 | } |
@@ -1085,6 +1115,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data, | |||
1085 | struct cifsTconInfo *pTcon; | 1115 | struct cifsTconInfo *pTcon; |
1086 | int xid, long_op; | 1116 | int xid, long_op; |
1087 | struct cifsFileInfo *open_file; | 1117 | struct cifsFileInfo *open_file; |
1118 | struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode); | ||
1088 | 1119 | ||
1089 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 1120 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
1090 | 1121 | ||
@@ -1099,11 +1130,7 @@ static ssize_t cifs_write(struct file *file, const char *write_data, | |||
1099 | 1130 | ||
1100 | xid = GetXid(); | 1131 | xid = GetXid(); |
1101 | 1132 | ||
1102 | if (*poffset > file->f_path.dentry->d_inode->i_size) | 1133 | long_op = cifs_write_timeout(cifsi, *poffset); |
1103 | long_op = CIFS_VLONG_OP; /* writes past EOF can be slow */ | ||
1104 | else | ||
1105 | long_op = CIFS_LONG_OP; | ||
1106 | |||
1107 | for (total_written = 0; write_size > total_written; | 1134 | for (total_written = 0; write_size > total_written; |
1108 | total_written += bytes_written) { | 1135 | total_written += bytes_written) { |
1109 | rc = -EAGAIN; | 1136 | rc = -EAGAIN; |
@@ -1166,8 +1193,10 @@ static ssize_t cifs_write(struct file *file, const char *write_data, | |||
1166 | FreeXid(xid); | 1193 | FreeXid(xid); |
1167 | return rc; | 1194 | return rc; |
1168 | } | 1195 | } |
1169 | } else | 1196 | } else { |
1197 | cifs_update_eof(cifsi, *poffset, bytes_written); | ||
1170 | *poffset += bytes_written; | 1198 | *poffset += bytes_written; |
1199 | } | ||
1171 | long_op = CIFS_STD_OP; /* subsequent writes fast - | 1200 | long_op = CIFS_STD_OP; /* subsequent writes fast - |
1172 | 15 seconds is plenty */ | 1201 | 15 seconds is plenty */ |
1173 | } | 1202 | } |
@@ -1380,11 +1409,12 @@ static int cifs_writepages(struct address_space *mapping, | |||
1380 | int nr_pages; | 1409 | int nr_pages; |
1381 | __u64 offset = 0; | 1410 | __u64 offset = 0; |
1382 | struct cifsFileInfo *open_file; | 1411 | struct cifsFileInfo *open_file; |
1412 | struct cifsInodeInfo *cifsi = CIFS_I(mapping->host); | ||
1383 | struct page *page; | 1413 | struct page *page; |
1384 | struct pagevec pvec; | 1414 | struct pagevec pvec; |
1385 | int rc = 0; | 1415 | int rc = 0; |
1386 | int scanned = 0; | 1416 | int scanned = 0; |
1387 | int xid; | 1417 | int xid, long_op; |
1388 | 1418 | ||
1389 | cifs_sb = CIFS_SB(mapping->host->i_sb); | 1419 | cifs_sb = CIFS_SB(mapping->host->i_sb); |
1390 | 1420 | ||
@@ -1528,12 +1558,15 @@ retry: | |||
1528 | cERROR(1, ("No writable handles for inode")); | 1558 | cERROR(1, ("No writable handles for inode")); |
1529 | rc = -EBADF; | 1559 | rc = -EBADF; |
1530 | } else { | 1560 | } else { |
1561 | long_op = cifs_write_timeout(cifsi, offset); | ||
1531 | rc = CIFSSMBWrite2(xid, cifs_sb->tcon, | 1562 | rc = CIFSSMBWrite2(xid, cifs_sb->tcon, |
1532 | open_file->netfid, | 1563 | open_file->netfid, |
1533 | bytes_to_write, offset, | 1564 | bytes_to_write, offset, |
1534 | &bytes_written, iov, n_iov, | 1565 | &bytes_written, iov, n_iov, |
1535 | CIFS_LONG_OP); | 1566 | long_op); |
1536 | atomic_dec(&open_file->wrtPending); | 1567 | atomic_dec(&open_file->wrtPending); |
1568 | cifs_update_eof(cifsi, offset, bytes_written); | ||
1569 | |||
1537 | if (rc || bytes_written < bytes_to_write) { | 1570 | if (rc || bytes_written < bytes_to_write) { |
1538 | cERROR(1, ("Write2 ret %d, wrote %d", | 1571 | cERROR(1, ("Write2 ret %d, wrote %d", |
1539 | rc, bytes_written)); | 1572 | rc, bytes_written)); |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index f121a80fdd6f..f36b4e40e443 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -143,6 +143,7 @@ static void cifs_unix_info_to_inode(struct inode *inode, | |||
143 | 143 | ||
144 | inode->i_nlink = le64_to_cpu(info->Nlinks); | 144 | inode->i_nlink = le64_to_cpu(info->Nlinks); |
145 | 145 | ||
146 | cifsInfo->server_eof = end_of_file; | ||
146 | spin_lock(&inode->i_lock); | 147 | spin_lock(&inode->i_lock); |
147 | if (is_size_safe_to_change(cifsInfo, end_of_file)) { | 148 | if (is_size_safe_to_change(cifsInfo, end_of_file)) { |
148 | /* | 149 | /* |
@@ -276,7 +277,8 @@ int cifs_get_inode_info_unix(struct inode **pinode, | |||
276 | 277 | ||
277 | /* get new inode */ | 278 | /* get new inode */ |
278 | if (*pinode == NULL) { | 279 | if (*pinode == NULL) { |
279 | *pinode = cifs_new_inode(sb, &find_data.UniqueId); | 280 | __u64 unique_id = le64_to_cpu(find_data.UniqueId); |
281 | *pinode = cifs_new_inode(sb, &unique_id); | ||
280 | if (*pinode == NULL) { | 282 | if (*pinode == NULL) { |
281 | rc = -ENOMEM; | 283 | rc = -ENOMEM; |
282 | goto cgiiu_exit; | 284 | goto cgiiu_exit; |
@@ -605,12 +607,12 @@ int cifs_get_inode_info(struct inode **pinode, | |||
605 | inode->i_mode |= S_IFREG; | 607 | inode->i_mode |= S_IFREG; |
606 | } | 608 | } |
607 | 609 | ||
610 | cifsInfo->server_eof = le64_to_cpu(pfindData->EndOfFile); | ||
608 | spin_lock(&inode->i_lock); | 611 | spin_lock(&inode->i_lock); |
609 | if (is_size_safe_to_change(cifsInfo, | 612 | if (is_size_safe_to_change(cifsInfo, cifsInfo->server_eof)) { |
610 | le64_to_cpu(pfindData->EndOfFile))) { | ||
611 | /* can not safely shrink the file size here if the | 613 | /* can not safely shrink the file size here if the |
612 | client is writing to it due to potential races */ | 614 | client is writing to it due to potential races */ |
613 | i_size_write(inode, le64_to_cpu(pfindData->EndOfFile)); | 615 | i_size_write(inode, cifsInfo->server_eof); |
614 | 616 | ||
615 | /* 512 bytes (2**9) is the fake blocksize that must be | 617 | /* 512 bytes (2**9) is the fake blocksize that must be |
616 | used for this calculation */ | 618 | used for this calculation */ |
@@ -1138,6 +1140,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) | |||
1138 | cFYI(1, ("posix mkdir returned 0x%x", rc)); | 1140 | cFYI(1, ("posix mkdir returned 0x%x", rc)); |
1139 | d_drop(direntry); | 1141 | d_drop(direntry); |
1140 | } else { | 1142 | } else { |
1143 | __u64 unique_id; | ||
1141 | if (pInfo->Type == cpu_to_le32(-1)) { | 1144 | if (pInfo->Type == cpu_to_le32(-1)) { |
1142 | /* no return info, go query for it */ | 1145 | /* no return info, go query for it */ |
1143 | kfree(pInfo); | 1146 | kfree(pInfo); |
@@ -1151,8 +1154,8 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) | |||
1151 | else | 1154 | else |
1152 | direntry->d_op = &cifs_dentry_ops; | 1155 | direntry->d_op = &cifs_dentry_ops; |
1153 | 1156 | ||
1154 | newinode = cifs_new_inode(inode->i_sb, | 1157 | unique_id = le64_to_cpu(pInfo->UniqueId); |
1155 | &pInfo->UniqueId); | 1158 | newinode = cifs_new_inode(inode->i_sb, &unique_id); |
1156 | if (newinode == NULL) { | 1159 | if (newinode == NULL) { |
1157 | kfree(pInfo); | 1160 | kfree(pInfo); |
1158 | goto mkdir_get_info; | 1161 | goto mkdir_get_info; |
@@ -1450,7 +1453,8 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, | |||
1450 | checking the UniqueId via FILE_INTERNAL_INFO */ | 1453 | checking the UniqueId via FILE_INTERNAL_INFO */ |
1451 | 1454 | ||
1452 | unlink_target: | 1455 | unlink_target: |
1453 | if ((rc == -EACCES) || (rc == -EEXIST)) { | 1456 | /* Try unlinking the target dentry if it's not negative */ |
1457 | if (target_dentry->d_inode && (rc == -EACCES || rc == -EEXIST)) { | ||
1454 | tmprc = cifs_unlink(target_dir, target_dentry); | 1458 | tmprc = cifs_unlink(target_dir, target_dentry); |
1455 | if (tmprc) | 1459 | if (tmprc) |
1456 | goto cifs_rename_exit; | 1460 | goto cifs_rename_exit; |
@@ -1753,6 +1757,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, | |||
1753 | } | 1757 | } |
1754 | 1758 | ||
1755 | if (rc == 0) { | 1759 | if (rc == 0) { |
1760 | cifsInode->server_eof = attrs->ia_size; | ||
1756 | rc = cifs_vmtruncate(inode, attrs->ia_size); | 1761 | rc = cifs_vmtruncate(inode, attrs->ia_size); |
1757 | cifs_truncate_page(inode->i_mapping, inode->i_size); | 1762 | cifs_truncate_page(inode->i_mapping, inode->i_size); |
1758 | } | 1763 | } |
@@ -1792,20 +1797,21 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) | |||
1792 | goto out; | 1797 | goto out; |
1793 | } | 1798 | } |
1794 | 1799 | ||
1795 | if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) { | 1800 | /* |
1796 | /* | 1801 | * Attempt to flush data before changing attributes. We need to do |
1797 | Flush data before changing file size or changing the last | 1802 | * this for ATTR_SIZE and ATTR_MTIME for sure, and if we change the |
1798 | write time of the file on the server. If the | 1803 | * ownership or mode then we may also need to do this. Here, we take |
1799 | flush returns error, store it to report later and continue. | 1804 | * the safe way out and just do the flush on all setattr requests. If |
1800 | BB: This should be smarter. Why bother flushing pages that | 1805 | * the flush returns error, store it to report later and continue. |
1801 | will be truncated anyway? Also, should we error out here if | 1806 | * |
1802 | the flush returns error? | 1807 | * BB: This should be smarter. Why bother flushing pages that |
1803 | */ | 1808 | * will be truncated anyway? Also, should we error out here if |
1804 | rc = filemap_write_and_wait(inode->i_mapping); | 1809 | * the flush returns error? |
1805 | if (rc != 0) { | 1810 | */ |
1806 | cifsInode->write_behind_rc = rc; | 1811 | rc = filemap_write_and_wait(inode->i_mapping); |
1807 | rc = 0; | 1812 | if (rc != 0) { |
1808 | } | 1813 | cifsInode->write_behind_rc = rc; |
1814 | rc = 0; | ||
1809 | } | 1815 | } |
1810 | 1816 | ||
1811 | if (attrs->ia_valid & ATTR_SIZE) { | 1817 | if (attrs->ia_valid & ATTR_SIZE) { |
@@ -1903,20 +1909,21 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) | |||
1903 | return -ENOMEM; | 1909 | return -ENOMEM; |
1904 | } | 1910 | } |
1905 | 1911 | ||
1906 | if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) { | 1912 | /* |
1907 | /* | 1913 | * Attempt to flush data before changing attributes. We need to do |
1908 | Flush data before changing file size or changing the last | 1914 | * this for ATTR_SIZE and ATTR_MTIME for sure, and if we change the |
1909 | write time of the file on the server. If the | 1915 | * ownership or mode then we may also need to do this. Here, we take |
1910 | flush returns error, store it to report later and continue. | 1916 | * the safe way out and just do the flush on all setattr requests. If |
1911 | BB: This should be smarter. Why bother flushing pages that | 1917 | * the flush returns error, store it to report later and continue. |
1912 | will be truncated anyway? Also, should we error out here if | 1918 | * |
1913 | the flush returns error? | 1919 | * BB: This should be smarter. Why bother flushing pages that |
1914 | */ | 1920 | * will be truncated anyway? Also, should we error out here if |
1915 | rc = filemap_write_and_wait(inode->i_mapping); | 1921 | * the flush returns error? |
1916 | if (rc != 0) { | 1922 | */ |
1917 | cifsInode->write_behind_rc = rc; | 1923 | rc = filemap_write_and_wait(inode->i_mapping); |
1918 | rc = 0; | 1924 | if (rc != 0) { |
1919 | } | 1925 | cifsInode->write_behind_rc = rc; |
1926 | rc = 0; | ||
1920 | } | 1927 | } |
1921 | 1928 | ||
1922 | if (attrs->ia_valid & ATTR_SIZE) { | 1929 | if (attrs->ia_valid & ATTR_SIZE) { |
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index c2c01ff4c32c..1a8be6228333 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
@@ -239,6 +239,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, | |||
239 | if (atomic_read(&cifsInfo->inUse) == 0) | 239 | if (atomic_read(&cifsInfo->inUse) == 0) |
240 | atomic_set(&cifsInfo->inUse, 1); | 240 | atomic_set(&cifsInfo->inUse, 1); |
241 | 241 | ||
242 | cifsInfo->server_eof = end_of_file; | ||
242 | spin_lock(&tmp_inode->i_lock); | 243 | spin_lock(&tmp_inode->i_lock); |
243 | if (is_size_safe_to_change(cifsInfo, end_of_file)) { | 244 | if (is_size_safe_to_change(cifsInfo, end_of_file)) { |
244 | /* can not safely change the file size here if the | 245 | /* can not safely change the file size here if the |
@@ -375,6 +376,7 @@ static void unix_fill_in_inode(struct inode *tmp_inode, | |||
375 | tmp_inode->i_gid = le64_to_cpu(pfindData->Gid); | 376 | tmp_inode->i_gid = le64_to_cpu(pfindData->Gid); |
376 | tmp_inode->i_nlink = le64_to_cpu(pfindData->Nlinks); | 377 | tmp_inode->i_nlink = le64_to_cpu(pfindData->Nlinks); |
377 | 378 | ||
379 | cifsInfo->server_eof = end_of_file; | ||
378 | spin_lock(&tmp_inode->i_lock); | 380 | spin_lock(&tmp_inode->i_lock); |
379 | if (is_size_safe_to_change(cifsInfo, end_of_file)) { | 381 | if (is_size_safe_to_change(cifsInfo, end_of_file)) { |
380 | /* can not safely change the file size here if the | 382 | /* can not safely change the file size here if the |
@@ -840,7 +842,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst, | |||
840 | len = strnlen(filename, PATH_MAX); | 842 | len = strnlen(filename, PATH_MAX); |
841 | } | 843 | } |
842 | 844 | ||
843 | *pinum = pFindData->UniqueId; | 845 | *pinum = le64_to_cpu(pFindData->UniqueId); |
844 | } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { | 846 | } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { |
845 | FILE_DIRECTORY_INFO *pFindData = | 847 | FILE_DIRECTORY_INFO *pFindData = |
846 | (FILE_DIRECTORY_INFO *)current_entry; | 848 | (FILE_DIRECTORY_INFO *)current_entry; |
@@ -856,7 +858,7 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst, | |||
856 | (SEARCH_ID_FULL_DIR_INFO *)current_entry; | 858 | (SEARCH_ID_FULL_DIR_INFO *)current_entry; |
857 | filename = &pFindData->FileName[0]; | 859 | filename = &pFindData->FileName[0]; |
858 | len = le32_to_cpu(pFindData->FileNameLength); | 860 | len = le32_to_cpu(pFindData->FileNameLength); |
859 | *pinum = pFindData->UniqueId; | 861 | *pinum = le64_to_cpu(pFindData->UniqueId); |
860 | } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { | 862 | } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { |
861 | FILE_BOTH_DIRECTORY_INFO *pFindData = | 863 | FILE_BOTH_DIRECTORY_INFO *pFindData = |
862 | (FILE_BOTH_DIRECTORY_INFO *)current_entry; | 864 | (FILE_BOTH_DIRECTORY_INFO *)current_entry; |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 5c68b4282be9..c652c73760dd 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
@@ -285,35 +285,36 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft, | |||
285 | int words_left, len; | 285 | int words_left, len; |
286 | char *data = *pbcc_area; | 286 | char *data = *pbcc_area; |
287 | 287 | ||
288 | |||
289 | |||
290 | cFYI(1, ("bleft %d", bleft)); | 288 | cFYI(1, ("bleft %d", bleft)); |
291 | 289 | ||
292 | 290 | /* | |
293 | /* SMB header is unaligned, so cifs servers word align start of | 291 | * Windows servers do not always double null terminate their final |
294 | Unicode strings */ | 292 | * Unicode string. Check to see if there are an uneven number of bytes |
295 | data++; | 293 | * left. If so, then add an extra NULL pad byte to the end of the |
296 | bleft--; /* Windows servers do not always double null terminate | 294 | * response. |
297 | their final Unicode string - in which case we | 295 | * |
298 | now will not attempt to decode the byte of junk | 296 | * See section 2.7.2 in "Implementing CIFS" for details |
299 | which follows it */ | 297 | */ |
298 | if (bleft % 2) { | ||
299 | data[bleft] = 0; | ||
300 | ++bleft; | ||
301 | } | ||
300 | 302 | ||
301 | words_left = bleft / 2; | 303 | words_left = bleft / 2; |
302 | 304 | ||
303 | /* save off server operating system */ | 305 | /* save off server operating system */ |
304 | len = UniStrnlen((wchar_t *) data, words_left); | 306 | len = UniStrnlen((wchar_t *) data, words_left); |
305 | 307 | ||
306 | /* We look for obvious messed up bcc or strings in response so we do not go off | ||
307 | the end since (at least) WIN2K and Windows XP have a major bug in not null | ||
308 | terminating last Unicode string in response */ | ||
309 | if (len >= words_left) | 308 | if (len >= words_left) |
310 | return rc; | 309 | return rc; |
311 | 310 | ||
312 | kfree(ses->serverOS); | 311 | kfree(ses->serverOS); |
313 | /* UTF-8 string will not grow more than four times as big as UCS-16 */ | 312 | /* UTF-8 string will not grow more than four times as big as UCS-16 */ |
314 | ses->serverOS = kzalloc((4 * len) + 2 /* trailing null */, GFP_KERNEL); | 313 | ses->serverOS = kzalloc((4 * len) + 2 /* trailing null */, GFP_KERNEL); |
315 | if (ses->serverOS != NULL) | 314 | if (ses->serverOS != NULL) { |
316 | cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len, nls_cp); | 315 | cifs_strfromUCS_le(ses->serverOS, (__le16 *)data, len, nls_cp); |
316 | cFYI(1, ("serverOS=%s", ses->serverOS)); | ||
317 | } | ||
317 | data += 2 * (len + 1); | 318 | data += 2 * (len + 1); |
318 | words_left -= len + 1; | 319 | words_left -= len + 1; |
319 | 320 | ||
@@ -328,6 +329,7 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft, | |||
328 | if (ses->serverNOS != NULL) { | 329 | if (ses->serverNOS != NULL) { |
329 | cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len, | 330 | cifs_strfromUCS_le(ses->serverNOS, (__le16 *)data, len, |
330 | nls_cp); | 331 | nls_cp); |
332 | cFYI(1, ("serverNOS=%s", ses->serverNOS)); | ||
331 | if (strncmp(ses->serverNOS, "NT LAN Manager 4", 16) == 0) { | 333 | if (strncmp(ses->serverNOS, "NT LAN Manager 4", 16) == 0) { |
332 | cFYI(1, ("NT4 server")); | 334 | cFYI(1, ("NT4 server")); |
333 | ses->flags |= CIFS_SES_NT4; | 335 | ses->flags |= CIFS_SES_NT4; |
@@ -343,12 +345,11 @@ static int decode_unicode_ssetup(char **pbcc_area, int bleft, | |||
343 | return rc; | 345 | return rc; |
344 | 346 | ||
345 | kfree(ses->serverDomain); | 347 | kfree(ses->serverDomain); |
346 | ses->serverDomain = kzalloc(2 * (len + 1), GFP_KERNEL); /* BB FIXME wrong length */ | 348 | ses->serverDomain = kzalloc((4 * len) + 2, GFP_KERNEL); |
347 | if (ses->serverDomain != NULL) { | 349 | if (ses->serverDomain != NULL) { |
348 | cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len, | 350 | cifs_strfromUCS_le(ses->serverDomain, (__le16 *)data, len, |
349 | nls_cp); | 351 | nls_cp); |
350 | ses->serverDomain[2*len] = 0; | 352 | cFYI(1, ("serverDomain=%s", ses->serverDomain)); |
351 | ses->serverDomain[(2*len) + 1] = 0; | ||
352 | } | 353 | } |
353 | data += 2 * (len + 1); | 354 | data += 2 * (len + 1); |
354 | words_left -= len + 1; | 355 | words_left -= len + 1; |
@@ -702,12 +703,18 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
702 | } | 703 | } |
703 | 704 | ||
704 | /* BB check if Unicode and decode strings */ | 705 | /* BB check if Unicode and decode strings */ |
705 | if (smb_buf->Flags2 & SMBFLG2_UNICODE) | 706 | if (smb_buf->Flags2 & SMBFLG2_UNICODE) { |
707 | /* unicode string area must be word-aligned */ | ||
708 | if (((unsigned long) bcc_ptr - (unsigned long) smb_buf) % 2) { | ||
709 | ++bcc_ptr; | ||
710 | --bytes_remaining; | ||
711 | } | ||
706 | rc = decode_unicode_ssetup(&bcc_ptr, bytes_remaining, | 712 | rc = decode_unicode_ssetup(&bcc_ptr, bytes_remaining, |
707 | ses, nls_cp); | 713 | ses, nls_cp); |
708 | else | 714 | } else { |
709 | rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining, | 715 | rc = decode_ascii_ssetup(&bcc_ptr, bytes_remaining, |
710 | ses, nls_cp); | 716 | ses, nls_cp); |
717 | } | ||
711 | 718 | ||
712 | ssetup_exit: | 719 | ssetup_exit: |
713 | if (spnego_key) { | 720 | if (spnego_key) { |
diff --git a/fs/compat.c b/fs/compat.c index 3f84d5f15889..681ed81e6be0 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -181,22 +181,24 @@ asmlinkage long compat_sys_newstat(char __user * filename, | |||
181 | struct compat_stat __user *statbuf) | 181 | struct compat_stat __user *statbuf) |
182 | { | 182 | { |
183 | struct kstat stat; | 183 | struct kstat stat; |
184 | int error = vfs_stat_fd(AT_FDCWD, filename, &stat); | 184 | int error; |
185 | 185 | ||
186 | if (!error) | 186 | error = vfs_stat(filename, &stat); |
187 | error = cp_compat_stat(&stat, statbuf); | 187 | if (error) |
188 | return error; | 188 | return error; |
189 | return cp_compat_stat(&stat, statbuf); | ||
189 | } | 190 | } |
190 | 191 | ||
191 | asmlinkage long compat_sys_newlstat(char __user * filename, | 192 | asmlinkage long compat_sys_newlstat(char __user * filename, |
192 | struct compat_stat __user *statbuf) | 193 | struct compat_stat __user *statbuf) |
193 | { | 194 | { |
194 | struct kstat stat; | 195 | struct kstat stat; |
195 | int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); | 196 | int error; |
196 | 197 | ||
197 | if (!error) | 198 | error = vfs_lstat(filename, &stat); |
198 | error = cp_compat_stat(&stat, statbuf); | 199 | if (error) |
199 | return error; | 200 | return error; |
201 | return cp_compat_stat(&stat, statbuf); | ||
200 | } | 202 | } |
201 | 203 | ||
202 | #ifndef __ARCH_WANT_STAT64 | 204 | #ifndef __ARCH_WANT_STAT64 |
@@ -204,21 +206,12 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user *filename, | |||
204 | struct compat_stat __user *statbuf, int flag) | 206 | struct compat_stat __user *statbuf, int flag) |
205 | { | 207 | { |
206 | struct kstat stat; | 208 | struct kstat stat; |
207 | int error = -EINVAL; | 209 | int error; |
208 | |||
209 | if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) | ||
210 | goto out; | ||
211 | |||
212 | if (flag & AT_SYMLINK_NOFOLLOW) | ||
213 | error = vfs_lstat_fd(dfd, filename, &stat); | ||
214 | else | ||
215 | error = vfs_stat_fd(dfd, filename, &stat); | ||
216 | |||
217 | if (!error) | ||
218 | error = cp_compat_stat(&stat, statbuf); | ||
219 | 210 | ||
220 | out: | 211 | error = vfs_fstatat(dfd, filename, &stat, flag); |
221 | return error; | 212 | if (error) |
213 | return error; | ||
214 | return cp_compat_stat(&stat, statbuf); | ||
222 | } | 215 | } |
223 | #endif | 216 | #endif |
224 | 217 | ||
@@ -1483,6 +1476,7 @@ int compat_do_execve(char * filename, | |||
1483 | struct linux_binprm *bprm; | 1476 | struct linux_binprm *bprm; |
1484 | struct file *file; | 1477 | struct file *file; |
1485 | struct files_struct *displaced; | 1478 | struct files_struct *displaced; |
1479 | bool clear_in_exec; | ||
1486 | int retval; | 1480 | int retval; |
1487 | 1481 | ||
1488 | retval = unshare_files(&displaced); | 1482 | retval = unshare_files(&displaced); |
@@ -1505,8 +1499,9 @@ int compat_do_execve(char * filename, | |||
1505 | goto out_unlock; | 1499 | goto out_unlock; |
1506 | 1500 | ||
1507 | retval = check_unsafe_exec(bprm); | 1501 | retval = check_unsafe_exec(bprm); |
1508 | if (retval) | 1502 | if (retval < 0) |
1509 | goto out_unlock; | 1503 | goto out_unlock; |
1504 | clear_in_exec = retval; | ||
1510 | 1505 | ||
1511 | file = open_exec(filename); | 1506 | file = open_exec(filename); |
1512 | retval = PTR_ERR(file); | 1507 | retval = PTR_ERR(file); |
@@ -1553,9 +1548,7 @@ int compat_do_execve(char * filename, | |||
1553 | goto out; | 1548 | goto out; |
1554 | 1549 | ||
1555 | /* execve succeeded */ | 1550 | /* execve succeeded */ |
1556 | write_lock(¤t->fs->lock); | ||
1557 | current->fs->in_exec = 0; | 1551 | current->fs->in_exec = 0; |
1558 | write_unlock(¤t->fs->lock); | ||
1559 | current->in_execve = 0; | 1552 | current->in_execve = 0; |
1560 | mutex_unlock(¤t->cred_exec_mutex); | 1553 | mutex_unlock(¤t->cred_exec_mutex); |
1561 | acct_update_integrals(current); | 1554 | acct_update_integrals(current); |
@@ -1575,9 +1568,8 @@ out_file: | |||
1575 | } | 1568 | } |
1576 | 1569 | ||
1577 | out_unmark: | 1570 | out_unmark: |
1578 | write_lock(¤t->fs->lock); | 1571 | if (clear_in_exec) |
1579 | current->fs->in_exec = 0; | 1572 | current->fs->in_exec = 0; |
1580 | write_unlock(¤t->fs->lock); | ||
1581 | 1573 | ||
1582 | out_unlock: | 1574 | out_unlock: |
1583 | current->in_execve = 0; | 1575 | current->in_execve = 0; |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 3e87ce443ea2..b83f6bcfa51a 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -58,7 +58,6 @@ | |||
58 | #include <linux/i2c.h> | 58 | #include <linux/i2c.h> |
59 | #include <linux/i2c-dev.h> | 59 | #include <linux/i2c-dev.h> |
60 | #include <linux/atalk.h> | 60 | #include <linux/atalk.h> |
61 | #include <linux/loop.h> | ||
62 | 61 | ||
63 | #include <net/bluetooth/bluetooth.h> | 62 | #include <net/bluetooth/bluetooth.h> |
64 | #include <net/bluetooth/hci.h> | 63 | #include <net/bluetooth/hci.h> |
@@ -68,6 +67,7 @@ | |||
68 | #include <linux/gigaset_dev.h> | 67 | #include <linux/gigaset_dev.h> |
69 | 68 | ||
70 | #ifdef CONFIG_BLOCK | 69 | #ifdef CONFIG_BLOCK |
70 | #include <linux/loop.h> | ||
71 | #include <scsi/scsi.h> | 71 | #include <scsi/scsi.h> |
72 | #include <scsi/scsi_ioctl.h> | 72 | #include <scsi/scsi_ioctl.h> |
73 | #include <scsi/sg.h> | 73 | #include <scsi/sg.h> |
@@ -2660,6 +2660,8 @@ HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl) | |||
2660 | HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl) | 2660 | HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl) |
2661 | /* block stuff */ | 2661 | /* block stuff */ |
2662 | #ifdef CONFIG_BLOCK | 2662 | #ifdef CONFIG_BLOCK |
2663 | /* loop */ | ||
2664 | IGNORE_IOCTL(LOOP_CLR_FD) | ||
2663 | /* Raw devices */ | 2665 | /* Raw devices */ |
2664 | HANDLE_IOCTL(RAW_SETBIND, raw_ioctl) | 2666 | HANDLE_IOCTL(RAW_SETBIND, raw_ioctl) |
2665 | HANDLE_IOCTL(RAW_GETBIND, raw_ioctl) | 2667 | HANDLE_IOCTL(RAW_GETBIND, raw_ioctl) |
@@ -2728,9 +2730,6 @@ HANDLE_IOCTL(LPSETTIMEOUT, lp_timeout_trans) | |||
2728 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32) | 2730 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_BOTH32) |
2729 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32) | 2731 | IGNORE_IOCTL(VFAT_IOCTL_READDIR_SHORT32) |
2730 | 2732 | ||
2731 | /* loop */ | ||
2732 | IGNORE_IOCTL(LOOP_CLR_FD) | ||
2733 | |||
2734 | #ifdef CONFIG_SPARC | 2733 | #ifdef CONFIG_SPARC |
2735 | /* Sparc framebuffers, handled in sbusfb_compat_ioctl() */ | 2734 | /* Sparc framebuffers, handled in sbusfb_compat_ioctl() */ |
2736 | IGNORE_IOCTL(FBIOGTYPE) | 2735 | IGNORE_IOCTL(FBIOGTYPE) |
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 932a92b31483..c8afa6b1d91d 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c | |||
@@ -135,7 +135,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna | |||
135 | struct path path; | 135 | struct path path; |
136 | struct configfs_dirent *sd; | 136 | struct configfs_dirent *sd; |
137 | struct config_item *parent_item; | 137 | struct config_item *parent_item; |
138 | struct config_item *target_item; | 138 | struct config_item *target_item = NULL; |
139 | struct config_item_type *type; | 139 | struct config_item_type *type; |
140 | 140 | ||
141 | ret = -EPERM; /* What lack-of-symlink returns */ | 141 | ret = -EPERM; /* What lack-of-symlink returns */ |
diff --git a/fs/dcache.c b/fs/dcache.c index 761d30be2683..1fcffebfb44f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -2149,7 +2149,6 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) | |||
2149 | int result; | 2149 | int result; |
2150 | unsigned long seq; | 2150 | unsigned long seq; |
2151 | 2151 | ||
2152 | /* FIXME: This is old behavior, needed? Please check callers. */ | ||
2153 | if (new_dentry == old_dentry) | 2152 | if (new_dentry == old_dentry) |
2154 | return 1; | 2153 | return 1; |
2155 | 2154 | ||
diff --git a/fs/direct-io.c b/fs/direct-io.c index da258e7249cc..05763bbc2050 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -307,8 +307,6 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, | |||
307 | struct bio *bio; | 307 | struct bio *bio; |
308 | 308 | ||
309 | bio = bio_alloc(GFP_KERNEL, nr_vecs); | 309 | bio = bio_alloc(GFP_KERNEL, nr_vecs); |
310 | if (bio == NULL) | ||
311 | return -ENOMEM; | ||
312 | 310 | ||
313 | bio->bi_bdev = bdev; | 311 | bio->bi_bdev = bdev; |
314 | bio->bi_sector = first_sector; | 312 | bio->bi_sector = first_sector; |
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 8b65f289ee00..b91851f1cda3 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c | |||
@@ -483,15 +483,7 @@ int ecryptfs_encrypt_page(struct page *page) | |||
483 | ecryptfs_inode = page->mapping->host; | 483 | ecryptfs_inode = page->mapping->host; |
484 | crypt_stat = | 484 | crypt_stat = |
485 | &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); | 485 | &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); |
486 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { | 486 | BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)); |
487 | rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page, | ||
488 | 0, PAGE_CACHE_SIZE); | ||
489 | if (rc) | ||
490 | printk(KERN_ERR "%s: Error attempting to copy " | ||
491 | "page at index [%ld]\n", __func__, | ||
492 | page->index); | ||
493 | goto out; | ||
494 | } | ||
495 | enc_extent_page = alloc_page(GFP_USER); | 487 | enc_extent_page = alloc_page(GFP_USER); |
496 | if (!enc_extent_page) { | 488 | if (!enc_extent_page) { |
497 | rc = -ENOMEM; | 489 | rc = -ENOMEM; |
@@ -620,16 +612,7 @@ int ecryptfs_decrypt_page(struct page *page) | |||
620 | ecryptfs_inode = page->mapping->host; | 612 | ecryptfs_inode = page->mapping->host; |
621 | crypt_stat = | 613 | crypt_stat = |
622 | &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); | 614 | &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); |
623 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { | 615 | BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)); |
624 | rc = ecryptfs_read_lower_page_segment(page, page->index, 0, | ||
625 | PAGE_CACHE_SIZE, | ||
626 | ecryptfs_inode); | ||
627 | if (rc) | ||
628 | printk(KERN_ERR "%s: Error attempting to copy " | ||
629 | "page at index [%ld]\n", __func__, | ||
630 | page->index); | ||
631 | goto out; | ||
632 | } | ||
633 | enc_extent_page = alloc_page(GFP_USER); | 616 | enc_extent_page = alloc_page(GFP_USER); |
634 | if (!enc_extent_page) { | 617 | if (!enc_extent_page) { |
635 | rc = -ENOMEM; | 618 | rc = -ENOMEM; |
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 064c5820e4e5..00b30a2d5466 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
@@ -269,6 +269,7 @@ struct ecryptfs_crypt_stat { | |||
269 | #define ECRYPTFS_ENCRYPT_FILENAMES 0x00000800 | 269 | #define ECRYPTFS_ENCRYPT_FILENAMES 0x00000800 |
270 | #define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00001000 | 270 | #define ECRYPTFS_ENCFN_USE_MOUNT_FNEK 0x00001000 |
271 | #define ECRYPTFS_ENCFN_USE_FEK 0x00002000 | 271 | #define ECRYPTFS_ENCFN_USE_FEK 0x00002000 |
272 | #define ECRYPTFS_UNLINK_SIGS 0x00004000 | ||
272 | u32 flags; | 273 | u32 flags; |
273 | unsigned int file_version; | 274 | unsigned int file_version; |
274 | size_t iv_bytes; | 275 | size_t iv_bytes; |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 55b3145b8072..2f0945d63297 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -379,9 +379,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
379 | goto out_d_drop; | 379 | goto out_d_drop; |
380 | } | 380 | } |
381 | lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); | 381 | lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); |
382 | mutex_lock(&lower_dir_dentry->d_inode->i_mutex); | ||
382 | lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, | 383 | lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, |
383 | lower_dir_dentry, | 384 | lower_dir_dentry, |
384 | ecryptfs_dentry->d_name.len); | 385 | ecryptfs_dentry->d_name.len); |
386 | mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); | ||
385 | if (IS_ERR(lower_dentry)) { | 387 | if (IS_ERR(lower_dentry)) { |
386 | rc = PTR_ERR(lower_dentry); | 388 | rc = PTR_ERR(lower_dentry); |
387 | printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " | 389 | printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " |
@@ -406,9 +408,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
406 | "filename; rc = [%d]\n", __func__, rc); | 408 | "filename; rc = [%d]\n", __func__, rc); |
407 | goto out_d_drop; | 409 | goto out_d_drop; |
408 | } | 410 | } |
411 | mutex_lock(&lower_dir_dentry->d_inode->i_mutex); | ||
409 | lower_dentry = lookup_one_len(encrypted_and_encoded_name, | 412 | lower_dentry = lookup_one_len(encrypted_and_encoded_name, |
410 | lower_dir_dentry, | 413 | lower_dir_dentry, |
411 | encrypted_and_encoded_name_size - 1); | 414 | encrypted_and_encoded_name_size - 1); |
415 | mutex_unlock(&lower_dir_dentry->d_inode->i_mutex); | ||
412 | if (IS_ERR(lower_dentry)) { | 416 | if (IS_ERR(lower_dentry)) { |
413 | rc = PTR_ERR(lower_dentry); | 417 | rc = PTR_ERR(lower_dentry); |
414 | printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " | 418 | printk(KERN_ERR "%s: lookup_one_len() returned [%d] on " |
@@ -636,8 +640,9 @@ static int | |||
636 | ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) | 640 | ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) |
637 | { | 641 | { |
638 | char *lower_buf; | 642 | char *lower_buf; |
643 | size_t lower_bufsiz; | ||
639 | struct dentry *lower_dentry; | 644 | struct dentry *lower_dentry; |
640 | struct ecryptfs_crypt_stat *crypt_stat; | 645 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat; |
641 | char *plaintext_name; | 646 | char *plaintext_name; |
642 | size_t plaintext_name_size; | 647 | size_t plaintext_name_size; |
643 | mm_segment_t old_fs; | 648 | mm_segment_t old_fs; |
@@ -648,12 +653,21 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) | |||
648 | rc = -EINVAL; | 653 | rc = -EINVAL; |
649 | goto out; | 654 | goto out; |
650 | } | 655 | } |
651 | crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; | 656 | mount_crypt_stat = &ecryptfs_superblock_to_private( |
657 | dentry->d_sb)->mount_crypt_stat; | ||
658 | /* | ||
659 | * If the lower filename is encrypted, it will result in a significantly | ||
660 | * longer name. If needed, truncate the name after decode and decrypt. | ||
661 | */ | ||
662 | if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) | ||
663 | lower_bufsiz = PATH_MAX; | ||
664 | else | ||
665 | lower_bufsiz = bufsiz; | ||
652 | /* Released in this function */ | 666 | /* Released in this function */ |
653 | lower_buf = kmalloc(bufsiz, GFP_KERNEL); | 667 | lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL); |
654 | if (lower_buf == NULL) { | 668 | if (lower_buf == NULL) { |
655 | printk(KERN_ERR "%s: Out of memory whilst attempting to " | 669 | printk(KERN_ERR "%s: Out of memory whilst attempting to " |
656 | "kmalloc [%d] bytes\n", __func__, bufsiz); | 670 | "kmalloc [%zd] bytes\n", __func__, lower_bufsiz); |
657 | rc = -ENOMEM; | 671 | rc = -ENOMEM; |
658 | goto out; | 672 | goto out; |
659 | } | 673 | } |
@@ -661,7 +675,7 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) | |||
661 | set_fs(get_ds()); | 675 | set_fs(get_ds()); |
662 | rc = lower_dentry->d_inode->i_op->readlink(lower_dentry, | 676 | rc = lower_dentry->d_inode->i_op->readlink(lower_dentry, |
663 | (char __user *)lower_buf, | 677 | (char __user *)lower_buf, |
664 | bufsiz); | 678 | lower_bufsiz); |
665 | set_fs(old_fs); | 679 | set_fs(old_fs); |
666 | if (rc >= 0) { | 680 | if (rc >= 0) { |
667 | rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name, | 681 | rc = ecryptfs_decode_and_decrypt_filename(&plaintext_name, |
@@ -674,7 +688,9 @@ ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) | |||
674 | rc); | 688 | rc); |
675 | goto out_free_lower_buf; | 689 | goto out_free_lower_buf; |
676 | } | 690 | } |
677 | rc = copy_to_user(buf, plaintext_name, plaintext_name_size); | 691 | /* Check for bufsiz <= 0 done in sys_readlinkat() */ |
692 | rc = copy_to_user(buf, plaintext_name, | ||
693 | min((size_t) bufsiz, plaintext_name_size)); | ||
678 | if (rc) | 694 | if (rc) |
679 | rc = -EFAULT; | 695 | rc = -EFAULT; |
680 | else | 696 | else |
@@ -814,6 +830,13 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) | |||
814 | size_t num_zeros = (PAGE_CACHE_SIZE | 830 | size_t num_zeros = (PAGE_CACHE_SIZE |
815 | - (new_length & ~PAGE_CACHE_MASK)); | 831 | - (new_length & ~PAGE_CACHE_MASK)); |
816 | 832 | ||
833 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { | ||
834 | rc = vmtruncate(inode, new_length); | ||
835 | if (rc) | ||
836 | goto out_free; | ||
837 | rc = vmtruncate(lower_dentry->d_inode, new_length); | ||
838 | goto out_free; | ||
839 | } | ||
817 | if (num_zeros) { | 840 | if (num_zeros) { |
818 | char *zeros_virt; | 841 | char *zeros_virt; |
819 | 842 | ||
@@ -915,8 +938,6 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) | |||
915 | } | 938 | } |
916 | rc = 0; | 939 | rc = 0; |
917 | crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); | 940 | crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED); |
918 | mutex_unlock(&crypt_stat->cs_mutex); | ||
919 | goto out; | ||
920 | } | 941 | } |
921 | } | 942 | } |
922 | mutex_unlock(&crypt_stat->cs_mutex); | 943 | mutex_unlock(&crypt_stat->cs_mutex); |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index aed56c25539b..ccabd5faa04d 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
@@ -190,14 +190,14 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, | |||
190 | init_special_inode(inode, lower_inode->i_mode, | 190 | init_special_inode(inode, lower_inode->i_mode, |
191 | lower_inode->i_rdev); | 191 | lower_inode->i_rdev); |
192 | dentry->d_op = &ecryptfs_dops; | 192 | dentry->d_op = &ecryptfs_dops; |
193 | if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD) | ||
194 | d_add(dentry, inode); | ||
195 | else | ||
196 | d_instantiate(dentry, inode); | ||
197 | fsstack_copy_attr_all(inode, lower_inode, NULL); | 193 | fsstack_copy_attr_all(inode, lower_inode, NULL); |
198 | /* This size will be overwritten for real files w/ headers and | 194 | /* This size will be overwritten for real files w/ headers and |
199 | * other metadata */ | 195 | * other metadata */ |
200 | fsstack_copy_inode_size(inode, lower_inode); | 196 | fsstack_copy_inode_size(inode, lower_inode); |
197 | if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD) | ||
198 | d_add(dentry, inode); | ||
199 | else | ||
200 | d_instantiate(dentry, inode); | ||
201 | out: | 201 | out: |
202 | return rc; | 202 | return rc; |
203 | } | 203 | } |
@@ -208,7 +208,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, | |||
208 | ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, | 208 | ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, |
209 | ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig, | 209 | ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig, |
210 | ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes, | 210 | ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes, |
211 | ecryptfs_opt_err }; | 211 | ecryptfs_opt_unlink_sigs, ecryptfs_opt_err }; |
212 | 212 | ||
213 | static const match_table_t tokens = { | 213 | static const match_table_t tokens = { |
214 | {ecryptfs_opt_sig, "sig=%s"}, | 214 | {ecryptfs_opt_sig, "sig=%s"}, |
@@ -222,6 +222,7 @@ static const match_table_t tokens = { | |||
222 | {ecryptfs_opt_fnek_sig, "ecryptfs_fnek_sig=%s"}, | 222 | {ecryptfs_opt_fnek_sig, "ecryptfs_fnek_sig=%s"}, |
223 | {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"}, | 223 | {ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"}, |
224 | {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"}, | 224 | {ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"}, |
225 | {ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"}, | ||
225 | {ecryptfs_opt_err, NULL} | 226 | {ecryptfs_opt_err, NULL} |
226 | }; | 227 | }; |
227 | 228 | ||
@@ -402,6 +403,9 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options) | |||
402 | fn_cipher_key_bytes; | 403 | fn_cipher_key_bytes; |
403 | fn_cipher_key_bytes_set = 1; | 404 | fn_cipher_key_bytes_set = 1; |
404 | break; | 405 | break; |
406 | case ecryptfs_opt_unlink_sigs: | ||
407 | mount_crypt_stat->flags |= ECRYPTFS_UNLINK_SIGS; | ||
408 | break; | ||
405 | case ecryptfs_opt_err: | 409 | case ecryptfs_opt_err: |
406 | default: | 410 | default: |
407 | printk(KERN_WARNING | 411 | printk(KERN_WARNING |
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 295e7fa56755..f1c17e87c5fb 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c | |||
@@ -133,45 +133,6 @@ out: | |||
133 | return rc; | 133 | return rc; |
134 | } | 134 | } |
135 | 135 | ||
136 | static int | ||
137 | ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type, | ||
138 | struct ecryptfs_msg_ctx **msg_ctx); | ||
139 | |||
140 | /** | ||
141 | * ecryptfs_send_raw_message | ||
142 | * @msg_type: Message type | ||
143 | * @daemon: Daemon struct for recipient of message | ||
144 | * | ||
145 | * A raw message is one that does not include an ecryptfs_message | ||
146 | * struct. It simply has a type. | ||
147 | * | ||
148 | * Must be called with ecryptfs_daemon_hash_mux held. | ||
149 | * | ||
150 | * Returns zero on success; non-zero otherwise | ||
151 | */ | ||
152 | static int ecryptfs_send_raw_message(u8 msg_type, | ||
153 | struct ecryptfs_daemon *daemon) | ||
154 | { | ||
155 | struct ecryptfs_msg_ctx *msg_ctx; | ||
156 | int rc; | ||
157 | |||
158 | rc = ecryptfs_send_message_locked(NULL, 0, msg_type, &msg_ctx); | ||
159 | if (rc) { | ||
160 | printk(KERN_ERR "%s: Error whilst attempting to send " | ||
161 | "message to ecryptfsd; rc = [%d]\n", __func__, rc); | ||
162 | goto out; | ||
163 | } | ||
164 | /* Raw messages are logically context-free (e.g., no | ||
165 | * reply is expected), so we set the state of the | ||
166 | * ecryptfs_msg_ctx object to indicate that it should | ||
167 | * be freed as soon as the message is sent. */ | ||
168 | mutex_lock(&msg_ctx->mux); | ||
169 | msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY; | ||
170 | mutex_unlock(&msg_ctx->mux); | ||
171 | out: | ||
172 | return rc; | ||
173 | } | ||
174 | |||
175 | /** | 136 | /** |
176 | * ecryptfs_spawn_daemon - Create and initialize a new daemon struct | 137 | * ecryptfs_spawn_daemon - Create and initialize a new daemon struct |
177 | * @daemon: Pointer to set to newly allocated daemon struct | 138 | * @daemon: Pointer to set to newly allocated daemon struct |
@@ -212,49 +173,6 @@ out: | |||
212 | } | 173 | } |
213 | 174 | ||
214 | /** | 175 | /** |
215 | * ecryptfs_process_helo | ||
216 | * @euid: The user ID owner of the message | ||
217 | * @user_ns: The namespace in which @euid applies | ||
218 | * @pid: The process ID for the userspace program that sent the | ||
219 | * message | ||
220 | * | ||
221 | * Adds the euid and pid values to the daemon euid hash. If an euid | ||
222 | * already has a daemon pid registered, the daemon will be | ||
223 | * unregistered before the new daemon is put into the hash list. | ||
224 | * Returns zero after adding a new daemon to the hash list; | ||
225 | * non-zero otherwise. | ||
226 | */ | ||
227 | int ecryptfs_process_helo(uid_t euid, struct user_namespace *user_ns, | ||
228 | struct pid *pid) | ||
229 | { | ||
230 | struct ecryptfs_daemon *new_daemon; | ||
231 | struct ecryptfs_daemon *old_daemon; | ||
232 | int rc; | ||
233 | |||
234 | mutex_lock(&ecryptfs_daemon_hash_mux); | ||
235 | rc = ecryptfs_find_daemon_by_euid(&old_daemon, euid, user_ns); | ||
236 | if (rc != 0) { | ||
237 | printk(KERN_WARNING "Received request from user [%d] " | ||
238 | "to register daemon [0x%p]; unregistering daemon " | ||
239 | "[0x%p]\n", euid, pid, old_daemon->pid); | ||
240 | rc = ecryptfs_send_raw_message(ECRYPTFS_MSG_QUIT, old_daemon); | ||
241 | if (rc) | ||
242 | printk(KERN_WARNING "Failed to send QUIT " | ||
243 | "message to daemon [0x%p]; rc = [%d]\n", | ||
244 | old_daemon->pid, rc); | ||
245 | hlist_del(&old_daemon->euid_chain); | ||
246 | kfree(old_daemon); | ||
247 | } | ||
248 | rc = ecryptfs_spawn_daemon(&new_daemon, euid, user_ns, pid); | ||
249 | if (rc) | ||
250 | printk(KERN_ERR "%s: The gods are displeased with this attempt " | ||
251 | "to create a new daemon object for euid [%d]; pid " | ||
252 | "[0x%p]; rc = [%d]\n", __func__, euid, pid, rc); | ||
253 | mutex_unlock(&ecryptfs_daemon_hash_mux); | ||
254 | return rc; | ||
255 | } | ||
256 | |||
257 | /** | ||
258 | * ecryptfs_exorcise_daemon - Destroy the daemon struct | 176 | * ecryptfs_exorcise_daemon - Destroy the daemon struct |
259 | * | 177 | * |
260 | * Must be called ceremoniously while in possession of | 178 | * Must be called ceremoniously while in possession of |
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index a67fea655f49..4ec8f61ccf5a 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c | |||
@@ -193,26 +193,20 @@ int ecryptfs_send_miscdev(char *data, size_t data_size, | |||
193 | int rc = 0; | 193 | int rc = 0; |
194 | 194 | ||
195 | mutex_lock(&msg_ctx->mux); | 195 | mutex_lock(&msg_ctx->mux); |
196 | if (data) { | 196 | msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size), |
197 | msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size), | 197 | GFP_KERNEL); |
198 | GFP_KERNEL); | 198 | if (!msg_ctx->msg) { |
199 | if (!msg_ctx->msg) { | 199 | rc = -ENOMEM; |
200 | rc = -ENOMEM; | 200 | printk(KERN_ERR "%s: Out of memory whilst attempting " |
201 | printk(KERN_ERR "%s: Out of memory whilst attempting " | 201 | "to kmalloc(%zd, GFP_KERNEL)\n", __func__, |
202 | "to kmalloc(%zd, GFP_KERNEL)\n", __func__, | 202 | (sizeof(*msg_ctx->msg) + data_size)); |
203 | (sizeof(*msg_ctx->msg) + data_size)); | 203 | goto out_unlock; |
204 | goto out_unlock; | 204 | } |
205 | } | ||
206 | } else | ||
207 | msg_ctx->msg = NULL; | ||
208 | msg_ctx->msg->index = msg_ctx->index; | 205 | msg_ctx->msg->index = msg_ctx->index; |
209 | msg_ctx->msg->data_len = data_size; | 206 | msg_ctx->msg->data_len = data_size; |
210 | msg_ctx->type = msg_type; | 207 | msg_ctx->type = msg_type; |
211 | if (data) { | 208 | memcpy(msg_ctx->msg->data, data, data_size); |
212 | memcpy(msg_ctx->msg->data, data, data_size); | 209 | msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size); |
213 | msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size); | ||
214 | } else | ||
215 | msg_ctx->msg_size = 0; | ||
216 | mutex_lock(&daemon->mux); | 210 | mutex_lock(&daemon->mux); |
217 | list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue); | 211 | list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue); |
218 | daemon->num_queued_msg_ctx++; | 212 | daemon->num_queued_msg_ctx++; |
@@ -418,18 +412,13 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, | |||
418 | 412 | ||
419 | if (count == 0) | 413 | if (count == 0) |
420 | goto out; | 414 | goto out; |
421 | data = kmalloc(count, GFP_KERNEL); | 415 | |
422 | if (!data) { | 416 | data = memdup_user(buf, count); |
423 | printk(KERN_ERR "%s: Out of memory whilst attempting to " | 417 | if (IS_ERR(data)) { |
424 | "kmalloc([%zd], GFP_KERNEL)\n", __func__, count); | 418 | printk(KERN_ERR "%s: memdup_user returned error [%ld]\n", |
419 | __func__, PTR_ERR(data)); | ||
425 | goto out; | 420 | goto out; |
426 | } | 421 | } |
427 | rc = copy_from_user(data, buf, count); | ||
428 | if (rc) { | ||
429 | printk(KERN_ERR "%s: copy_from_user returned error [%d]\n", | ||
430 | __func__, rc); | ||
431 | goto out_free; | ||
432 | } | ||
433 | sz = count; | 422 | sz = count; |
434 | i = 0; | 423 | i = 0; |
435 | switch (data[i++]) { | 424 | switch (data[i++]) { |
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 46cec2b69796..5c6bab9786e3 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c | |||
@@ -449,6 +449,7 @@ int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode) | |||
449 | struct ecryptfs_crypt_stat *crypt_stat; | 449 | struct ecryptfs_crypt_stat *crypt_stat; |
450 | 450 | ||
451 | crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; | 451 | crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; |
452 | BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)); | ||
452 | if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) | 453 | if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) |
453 | return ecryptfs_write_inode_size_to_xattr(ecryptfs_inode); | 454 | return ecryptfs_write_inode_size_to_xattr(ecryptfs_inode); |
454 | else | 455 | else |
@@ -490,6 +491,16 @@ static int ecryptfs_write_end(struct file *file, | |||
490 | ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); | 491 | ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); |
491 | ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" | 492 | ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" |
492 | "(page w/ index = [0x%.16x], to = [%d])\n", index, to); | 493 | "(page w/ index = [0x%.16x], to = [%d])\n", index, to); |
494 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { | ||
495 | rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, page, 0, | ||
496 | to); | ||
497 | if (!rc) { | ||
498 | rc = copied; | ||
499 | fsstack_copy_inode_size(ecryptfs_inode, | ||
500 | ecryptfs_inode_to_lower(ecryptfs_inode)); | ||
501 | } | ||
502 | goto out; | ||
503 | } | ||
493 | /* Fills in zeros if 'to' goes beyond inode size */ | 504 | /* Fills in zeros if 'to' goes beyond inode size */ |
494 | rc = fill_zeros_to_end_of_page(page, to); | 505 | rc = fill_zeros_to_end_of_page(page, to); |
495 | if (rc) { | 506 | if (rc) { |
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 75c2ea9fee35..a137c6ea2fee 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c | |||
@@ -117,13 +117,15 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, | |||
117 | size_t size) | 117 | size_t size) |
118 | { | 118 | { |
119 | struct page *ecryptfs_page; | 119 | struct page *ecryptfs_page; |
120 | struct ecryptfs_crypt_stat *crypt_stat; | ||
121 | struct inode *ecryptfs_inode = ecryptfs_file->f_dentry->d_inode; | ||
120 | char *ecryptfs_page_virt; | 122 | char *ecryptfs_page_virt; |
121 | loff_t ecryptfs_file_size = | 123 | loff_t ecryptfs_file_size = i_size_read(ecryptfs_inode); |
122 | i_size_read(ecryptfs_file->f_dentry->d_inode); | ||
123 | loff_t data_offset = 0; | 124 | loff_t data_offset = 0; |
124 | loff_t pos; | 125 | loff_t pos; |
125 | int rc = 0; | 126 | int rc = 0; |
126 | 127 | ||
128 | crypt_stat = &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat; | ||
127 | /* | 129 | /* |
128 | * if we are writing beyond current size, then start pos | 130 | * if we are writing beyond current size, then start pos |
129 | * at the current size - we'll fill in zeros from there. | 131 | * at the current size - we'll fill in zeros from there. |
@@ -184,7 +186,13 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, | |||
184 | flush_dcache_page(ecryptfs_page); | 186 | flush_dcache_page(ecryptfs_page); |
185 | SetPageUptodate(ecryptfs_page); | 187 | SetPageUptodate(ecryptfs_page); |
186 | unlock_page(ecryptfs_page); | 188 | unlock_page(ecryptfs_page); |
187 | rc = ecryptfs_encrypt_page(ecryptfs_page); | 189 | if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) |
190 | rc = ecryptfs_encrypt_page(ecryptfs_page); | ||
191 | else | ||
192 | rc = ecryptfs_write_lower_page_segment(ecryptfs_inode, | ||
193 | ecryptfs_page, | ||
194 | start_offset_in_page, | ||
195 | data_offset); | ||
188 | page_cache_release(ecryptfs_page); | 196 | page_cache_release(ecryptfs_page); |
189 | if (rc) { | 197 | if (rc) { |
190 | printk(KERN_ERR "%s: Error encrypting " | 198 | printk(KERN_ERR "%s: Error encrypting " |
@@ -194,14 +202,16 @@ int ecryptfs_write(struct file *ecryptfs_file, char *data, loff_t offset, | |||
194 | pos += num_bytes; | 202 | pos += num_bytes; |
195 | } | 203 | } |
196 | if ((offset + size) > ecryptfs_file_size) { | 204 | if ((offset + size) > ecryptfs_file_size) { |
197 | i_size_write(ecryptfs_file->f_dentry->d_inode, (offset + size)); | 205 | i_size_write(ecryptfs_inode, (offset + size)); |
198 | rc = ecryptfs_write_inode_size_to_metadata( | 206 | if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) { |
199 | ecryptfs_file->f_dentry->d_inode); | 207 | rc = ecryptfs_write_inode_size_to_metadata( |
200 | if (rc) { | 208 | ecryptfs_inode); |
201 | printk(KERN_ERR "Problem with " | 209 | if (rc) { |
202 | "ecryptfs_write_inode_size_to_metadata; " | 210 | printk(KERN_ERR "Problem with " |
203 | "rc = [%d]\n", rc); | 211 | "ecryptfs_write_inode_size_to_metadata; " |
204 | goto out; | 212 | "rc = [%d]\n", rc); |
213 | goto out; | ||
214 | } | ||
205 | } | 215 | } |
206 | } | 216 | } |
207 | out: | 217 | out: |
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index c27ac2b358a1..fa4c7e7d15d9 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c | |||
@@ -170,7 +170,10 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
170 | list_for_each_entry(walker, | 170 | list_for_each_entry(walker, |
171 | &mount_crypt_stat->global_auth_tok_list, | 171 | &mount_crypt_stat->global_auth_tok_list, |
172 | mount_crypt_stat_list) { | 172 | mount_crypt_stat_list) { |
173 | seq_printf(m, ",ecryptfs_sig=%s", walker->sig); | 173 | if (walker->flags & ECRYPTFS_AUTH_TOK_FNEK) |
174 | seq_printf(m, ",ecryptfs_fnek_sig=%s", walker->sig); | ||
175 | else | ||
176 | seq_printf(m, ",ecryptfs_sig=%s", walker->sig); | ||
174 | } | 177 | } |
175 | mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); | 178 | mutex_unlock(&mount_crypt_stat->global_auth_tok_list_mutex); |
176 | 179 | ||
@@ -186,6 +189,8 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt) | |||
186 | seq_printf(m, ",ecryptfs_xattr_metadata"); | 189 | seq_printf(m, ",ecryptfs_xattr_metadata"); |
187 | if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) | 190 | if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) |
188 | seq_printf(m, ",ecryptfs_encrypted_view"); | 191 | seq_printf(m, ",ecryptfs_encrypted_view"); |
192 | if (mount_crypt_stat->flags & ECRYPTFS_UNLINK_SIGS) | ||
193 | seq_printf(m, ",ecryptfs_unlink_sigs"); | ||
189 | 194 | ||
190 | return 0; | 195 | return 0; |
191 | } | 196 | } |
@@ -69,17 +69,18 @@ int suid_dumpable = 0; | |||
69 | static LIST_HEAD(formats); | 69 | static LIST_HEAD(formats); |
70 | static DEFINE_RWLOCK(binfmt_lock); | 70 | static DEFINE_RWLOCK(binfmt_lock); |
71 | 71 | ||
72 | int register_binfmt(struct linux_binfmt * fmt) | 72 | int __register_binfmt(struct linux_binfmt * fmt, int insert) |
73 | { | 73 | { |
74 | if (!fmt) | 74 | if (!fmt) |
75 | return -EINVAL; | 75 | return -EINVAL; |
76 | write_lock(&binfmt_lock); | 76 | write_lock(&binfmt_lock); |
77 | list_add(&fmt->lh, &formats); | 77 | insert ? list_add(&fmt->lh, &formats) : |
78 | list_add_tail(&fmt->lh, &formats); | ||
78 | write_unlock(&binfmt_lock); | 79 | write_unlock(&binfmt_lock); |
79 | return 0; | 80 | return 0; |
80 | } | 81 | } |
81 | 82 | ||
82 | EXPORT_SYMBOL(register_binfmt); | 83 | EXPORT_SYMBOL(__register_binfmt); |
83 | 84 | ||
84 | void unregister_binfmt(struct linux_binfmt * fmt) | 85 | void unregister_binfmt(struct linux_binfmt * fmt) |
85 | { | 86 | { |
@@ -1060,7 +1061,6 @@ EXPORT_SYMBOL(install_exec_creds); | |||
1060 | int check_unsafe_exec(struct linux_binprm *bprm) | 1061 | int check_unsafe_exec(struct linux_binprm *bprm) |
1061 | { | 1062 | { |
1062 | struct task_struct *p = current, *t; | 1063 | struct task_struct *p = current, *t; |
1063 | unsigned long flags; | ||
1064 | unsigned n_fs; | 1064 | unsigned n_fs; |
1065 | int res = 0; | 1065 | int res = 0; |
1066 | 1066 | ||
@@ -1068,21 +1068,22 @@ int check_unsafe_exec(struct linux_binprm *bprm) | |||
1068 | 1068 | ||
1069 | n_fs = 1; | 1069 | n_fs = 1; |
1070 | write_lock(&p->fs->lock); | 1070 | write_lock(&p->fs->lock); |
1071 | lock_task_sighand(p, &flags); | 1071 | rcu_read_lock(); |
1072 | for (t = next_thread(p); t != p; t = next_thread(t)) { | 1072 | for (t = next_thread(p); t != p; t = next_thread(t)) { |
1073 | if (t->fs == p->fs) | 1073 | if (t->fs == p->fs) |
1074 | n_fs++; | 1074 | n_fs++; |
1075 | } | 1075 | } |
1076 | rcu_read_unlock(); | ||
1076 | 1077 | ||
1077 | if (p->fs->users > n_fs) { | 1078 | if (p->fs->users > n_fs) { |
1078 | bprm->unsafe |= LSM_UNSAFE_SHARE; | 1079 | bprm->unsafe |= LSM_UNSAFE_SHARE; |
1079 | } else { | 1080 | } else { |
1080 | if (p->fs->in_exec) | 1081 | res = -EAGAIN; |
1081 | res = -EAGAIN; | 1082 | if (!p->fs->in_exec) { |
1082 | p->fs->in_exec = 1; | 1083 | p->fs->in_exec = 1; |
1084 | res = 1; | ||
1085 | } | ||
1083 | } | 1086 | } |
1084 | |||
1085 | unlock_task_sighand(p, &flags); | ||
1086 | write_unlock(&p->fs->lock); | 1087 | write_unlock(&p->fs->lock); |
1087 | 1088 | ||
1088 | return res; | 1089 | return res; |
@@ -1284,6 +1285,7 @@ int do_execve(char * filename, | |||
1284 | struct linux_binprm *bprm; | 1285 | struct linux_binprm *bprm; |
1285 | struct file *file; | 1286 | struct file *file; |
1286 | struct files_struct *displaced; | 1287 | struct files_struct *displaced; |
1288 | bool clear_in_exec; | ||
1287 | int retval; | 1289 | int retval; |
1288 | 1290 | ||
1289 | retval = unshare_files(&displaced); | 1291 | retval = unshare_files(&displaced); |
@@ -1306,8 +1308,9 @@ int do_execve(char * filename, | |||
1306 | goto out_unlock; | 1308 | goto out_unlock; |
1307 | 1309 | ||
1308 | retval = check_unsafe_exec(bprm); | 1310 | retval = check_unsafe_exec(bprm); |
1309 | if (retval) | 1311 | if (retval < 0) |
1310 | goto out_unlock; | 1312 | goto out_unlock; |
1313 | clear_in_exec = retval; | ||
1311 | 1314 | ||
1312 | file = open_exec(filename); | 1315 | file = open_exec(filename); |
1313 | retval = PTR_ERR(file); | 1316 | retval = PTR_ERR(file); |
@@ -1355,9 +1358,7 @@ int do_execve(char * filename, | |||
1355 | goto out; | 1358 | goto out; |
1356 | 1359 | ||
1357 | /* execve succeeded */ | 1360 | /* execve succeeded */ |
1358 | write_lock(¤t->fs->lock); | ||
1359 | current->fs->in_exec = 0; | 1361 | current->fs->in_exec = 0; |
1360 | write_unlock(¤t->fs->lock); | ||
1361 | current->in_execve = 0; | 1362 | current->in_execve = 0; |
1362 | mutex_unlock(¤t->cred_exec_mutex); | 1363 | mutex_unlock(¤t->cred_exec_mutex); |
1363 | acct_update_integrals(current); | 1364 | acct_update_integrals(current); |
@@ -1377,9 +1378,8 @@ out_file: | |||
1377 | } | 1378 | } |
1378 | 1379 | ||
1379 | out_unmark: | 1380 | out_unmark: |
1380 | write_lock(¤t->fs->lock); | 1381 | if (clear_in_exec) |
1381 | current->fs->in_exec = 0; | 1382 | current->fs->in_exec = 0; |
1382 | write_unlock(¤t->fs->lock); | ||
1383 | 1383 | ||
1384 | out_unlock: | 1384 | out_unlock: |
1385 | current->in_execve = 0; | 1385 | current->in_execve = 0; |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index b43b95563663..acf678831103 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -590,9 +590,8 @@ static int ext2_get_blocks(struct inode *inode, | |||
590 | 590 | ||
591 | if (depth == 0) | 591 | if (depth == 0) |
592 | return (err); | 592 | return (err); |
593 | reread: | ||
594 | partial = ext2_get_branch(inode, depth, offsets, chain, &err); | ||
595 | 593 | ||
594 | partial = ext2_get_branch(inode, depth, offsets, chain, &err); | ||
596 | /* Simplest case - block found, no allocation needed */ | 595 | /* Simplest case - block found, no allocation needed */ |
597 | if (!partial) { | 596 | if (!partial) { |
598 | first_block = le32_to_cpu(chain[depth - 1].key); | 597 | first_block = le32_to_cpu(chain[depth - 1].key); |
@@ -602,15 +601,16 @@ reread: | |||
602 | while (count < maxblocks && count <= blocks_to_boundary) { | 601 | while (count < maxblocks && count <= blocks_to_boundary) { |
603 | ext2_fsblk_t blk; | 602 | ext2_fsblk_t blk; |
604 | 603 | ||
605 | if (!verify_chain(chain, partial)) { | 604 | if (!verify_chain(chain, chain + depth - 1)) { |
606 | /* | 605 | /* |
607 | * Indirect block might be removed by | 606 | * Indirect block might be removed by |
608 | * truncate while we were reading it. | 607 | * truncate while we were reading it. |
609 | * Handling of that case: forget what we've | 608 | * Handling of that case: forget what we've |
610 | * got now, go to reread. | 609 | * got now, go to reread. |
611 | */ | 610 | */ |
611 | err = -EAGAIN; | ||
612 | count = 0; | 612 | count = 0; |
613 | goto changed; | 613 | break; |
614 | } | 614 | } |
615 | blk = le32_to_cpu(*(chain[depth-1].p + count)); | 615 | blk = le32_to_cpu(*(chain[depth-1].p + count)); |
616 | if (blk == first_block + count) | 616 | if (blk == first_block + count) |
@@ -618,7 +618,8 @@ reread: | |||
618 | else | 618 | else |
619 | break; | 619 | break; |
620 | } | 620 | } |
621 | goto got_it; | 621 | if (err != -EAGAIN) |
622 | goto got_it; | ||
622 | } | 623 | } |
623 | 624 | ||
624 | /* Next simple case - plain lookup or failed read of indirect block */ | 625 | /* Next simple case - plain lookup or failed read of indirect block */ |
@@ -626,6 +627,33 @@ reread: | |||
626 | goto cleanup; | 627 | goto cleanup; |
627 | 628 | ||
628 | mutex_lock(&ei->truncate_mutex); | 629 | mutex_lock(&ei->truncate_mutex); |
630 | /* | ||
631 | * If the indirect block is missing while we are reading | ||
632 | * the chain(ext3_get_branch() returns -EAGAIN err), or | ||
633 | * if the chain has been changed after we grab the semaphore, | ||
634 | * (either because another process truncated this branch, or | ||
635 | * another get_block allocated this branch) re-grab the chain to see if | ||
636 | * the request block has been allocated or not. | ||
637 | * | ||
638 | * Since we already block the truncate/other get_block | ||
639 | * at this point, we will have the current copy of the chain when we | ||
640 | * splice the branch into the tree. | ||
641 | */ | ||
642 | if (err == -EAGAIN || !verify_chain(chain, partial)) { | ||
643 | while (partial > chain) { | ||
644 | brelse(partial->bh); | ||
645 | partial--; | ||
646 | } | ||
647 | partial = ext2_get_branch(inode, depth, offsets, chain, &err); | ||
648 | if (!partial) { | ||
649 | count++; | ||
650 | mutex_unlock(&ei->truncate_mutex); | ||
651 | if (err) | ||
652 | goto cleanup; | ||
653 | clear_buffer_new(bh_result); | ||
654 | goto got_it; | ||
655 | } | ||
656 | } | ||
629 | 657 | ||
630 | /* | 658 | /* |
631 | * Okay, we need to do block allocation. Lazily initialize the block | 659 | * Okay, we need to do block allocation. Lazily initialize the block |
@@ -683,12 +711,6 @@ cleanup: | |||
683 | partial--; | 711 | partial--; |
684 | } | 712 | } |
685 | return err; | 713 | return err; |
686 | changed: | ||
687 | while (partial > chain) { | ||
688 | brelse(partial->bh); | ||
689 | partial--; | ||
690 | } | ||
691 | goto reread; | ||
692 | } | 714 | } |
693 | 715 | ||
694 | int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) | 716 | int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index f983225266dc..5c4afe652245 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -1395,8 +1395,10 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type, | |||
1395 | blk++; | 1395 | blk++; |
1396 | } | 1396 | } |
1397 | out: | 1397 | out: |
1398 | if (len == towrite) | 1398 | if (len == towrite) { |
1399 | mutex_unlock(&inode->i_mutex); | ||
1399 | return err; | 1400 | return err; |
1401 | } | ||
1400 | if (inode->i_size < off+len-towrite) | 1402 | if (inode->i_size < off+len-towrite) |
1401 | i_size_write(inode, off+len-towrite); | 1403 | i_size_write(inode, off+len-towrite); |
1402 | inode->i_version++; | 1404 | inode->i_version++; |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 6132353dcf62..e40332158340 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -326,11 +326,14 @@ ext4_ext_max_entries(struct inode *inode, int depth) | |||
326 | 326 | ||
327 | static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | 327 | static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) |
328 | { | 328 | { |
329 | ext4_fsblk_t block = ext_pblock(ext); | 329 | ext4_fsblk_t block = ext_pblock(ext), valid_block; |
330 | int len = ext4_ext_get_actual_len(ext); | 330 | int len = ext4_ext_get_actual_len(ext); |
331 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | 331 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; |
332 | if (unlikely(block < le32_to_cpu(es->s_first_data_block) || | 332 | |
333 | ((block + len) > ext4_blocks_count(es)))) | 333 | valid_block = le32_to_cpu(es->s_first_data_block) + |
334 | EXT4_SB(inode->i_sb)->s_gdb_count; | ||
335 | if (unlikely(block <= valid_block || | ||
336 | ((block + len) > ext4_blocks_count(es)))) | ||
334 | return 0; | 337 | return 0; |
335 | else | 338 | else |
336 | return 1; | 339 | return 1; |
@@ -339,10 +342,13 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) | |||
339 | static int ext4_valid_extent_idx(struct inode *inode, | 342 | static int ext4_valid_extent_idx(struct inode *inode, |
340 | struct ext4_extent_idx *ext_idx) | 343 | struct ext4_extent_idx *ext_idx) |
341 | { | 344 | { |
342 | ext4_fsblk_t block = idx_pblock(ext_idx); | 345 | ext4_fsblk_t block = idx_pblock(ext_idx), valid_block; |
343 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; | 346 | struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; |
344 | if (unlikely(block < le32_to_cpu(es->s_first_data_block) || | 347 | |
345 | (block >= ext4_blocks_count(es)))) | 348 | valid_block = le32_to_cpu(es->s_first_data_block) + |
349 | EXT4_SB(inode->i_sb)->s_gdb_count; | ||
350 | if (unlikely(block <= valid_block || | ||
351 | (block >= ext4_blocks_count(es)))) | ||
346 | return 0; | 352 | return 0; |
347 | else | 353 | else |
348 | return 1; | 354 | return 1; |
@@ -2416,8 +2422,6 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
2416 | len = ee_len; | 2422 | len = ee_len; |
2417 | 2423 | ||
2418 | bio = bio_alloc(GFP_NOIO, len); | 2424 | bio = bio_alloc(GFP_NOIO, len); |
2419 | if (!bio) | ||
2420 | return -ENOMEM; | ||
2421 | bio->bi_sector = ee_pblock; | 2425 | bio->bi_sector = ee_pblock; |
2422 | bio->bi_bdev = inode->i_sb->s_bdev; | 2426 | bio->bi_bdev = inode->i_sb->s_bdev; |
2423 | 2427 | ||
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 47b84e8df568..f18e0a08a6b5 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -585,6 +585,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
585 | fallback: | 585 | fallback: |
586 | ngroups = sbi->s_groups_count; | 586 | ngroups = sbi->s_groups_count; |
587 | avefreei = freei / ngroups; | 587 | avefreei = freei / ngroups; |
588 | fallback_retry: | ||
588 | parent_group = EXT4_I(parent)->i_block_group; | 589 | parent_group = EXT4_I(parent)->i_block_group; |
589 | for (i = 0; i < ngroups; i++) { | 590 | for (i = 0; i < ngroups; i++) { |
590 | grp = (parent_group + i) % ngroups; | 591 | grp = (parent_group + i) % ngroups; |
@@ -602,7 +603,7 @@ fallback: | |||
602 | * filesystems the above test can fail to find any blockgroups | 603 | * filesystems the above test can fail to find any blockgroups |
603 | */ | 604 | */ |
604 | avefreei = 0; | 605 | avefreei = 0; |
605 | goto fallback; | 606 | goto fallback_retry; |
606 | } | 607 | } |
607 | 608 | ||
608 | return -1; | 609 | return -1; |
@@ -831,11 +832,12 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) | |||
831 | ret2 = find_group_flex(sb, dir, &group); | 832 | ret2 = find_group_flex(sb, dir, &group); |
832 | if (ret2 == -1) { | 833 | if (ret2 == -1) { |
833 | ret2 = find_group_other(sb, dir, &group, mode); | 834 | ret2 = find_group_other(sb, dir, &group, mode); |
834 | if (ret2 == 0 && once) | 835 | if (ret2 == 0 && once) { |
835 | once = 0; | 836 | once = 0; |
836 | printk(KERN_NOTICE "ext4: find_group_flex " | 837 | printk(KERN_NOTICE "ext4: find_group_flex " |
837 | "failed, fallback succeeded dir %lu\n", | 838 | "failed, fallback succeeded dir %lu\n", |
838 | dir->i_ino); | 839 | dir->i_ino); |
840 | } | ||
839 | } | 841 | } |
840 | goto got_group; | 842 | goto got_group; |
841 | } | 843 | } |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c6bd6ced3bb7..e91f978c7f12 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -4357,11 +4357,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4357 | ei->i_flags = le32_to_cpu(raw_inode->i_flags); | 4357 | ei->i_flags = le32_to_cpu(raw_inode->i_flags); |
4358 | inode->i_blocks = ext4_inode_blocks(raw_inode, ei); | 4358 | inode->i_blocks = ext4_inode_blocks(raw_inode, ei); |
4359 | ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); | 4359 | ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); |
4360 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 4360 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) |
4361 | cpu_to_le32(EXT4_OS_HURD)) { | ||
4362 | ei->i_file_acl |= | 4361 | ei->i_file_acl |= |
4363 | ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; | 4362 | ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; |
4364 | } | ||
4365 | inode->i_size = ext4_isize(raw_inode); | 4363 | inode->i_size = ext4_isize(raw_inode); |
4366 | ei->i_disksize = inode->i_size; | 4364 | ei->i_disksize = inode->i_size; |
4367 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); | 4365 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); |
@@ -4409,9 +4407,23 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4409 | (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; | 4407 | (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; |
4410 | } | 4408 | } |
4411 | 4409 | ||
4412 | if (ei->i_flags & EXT4_EXTENTS_FL) { | 4410 | ret = 0; |
4413 | /* Validate extent which is part of inode */ | 4411 | if (ei->i_file_acl && |
4414 | ret = ext4_ext_check_inode(inode); | 4412 | ((ei->i_file_acl < |
4413 | (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + | ||
4414 | EXT4_SB(sb)->s_gdb_count)) || | ||
4415 | (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) { | ||
4416 | ext4_error(sb, __func__, | ||
4417 | "bad extended attribute block %llu in inode #%lu", | ||
4418 | ei->i_file_acl, inode->i_ino); | ||
4419 | ret = -EIO; | ||
4420 | goto bad_inode; | ||
4421 | } else if (ei->i_flags & EXT4_EXTENTS_FL) { | ||
4422 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | ||
4423 | (S_ISLNK(inode->i_mode) && | ||
4424 | !ext4_inode_is_fast_symlink(inode))) | ||
4425 | /* Validate extent which is part of inode */ | ||
4426 | ret = ext4_ext_check_inode(inode); | ||
4415 | } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 4427 | } else if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
4416 | (S_ISLNK(inode->i_mode) && | 4428 | (S_ISLNK(inode->i_mode) && |
4417 | !ext4_inode_is_fast_symlink(inode))) { | 4429 | !ext4_inode_is_fast_symlink(inode))) { |
diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig index d0a69ff25375..182f9ffe2b51 100644 --- a/fs/fat/Kconfig +++ b/fs/fat/Kconfig | |||
@@ -95,3 +95,6 @@ config FAT_DEFAULT_IOCHARSET | |||
95 | Note that "utf8" is not recommended for FAT filesystems. | 95 | Note that "utf8" is not recommended for FAT filesystems. |
96 | If unsure, you shouldn't set "utf8" here. | 96 | If unsure, you shouldn't set "utf8" here. |
97 | See <file:Documentation/filesystems/vfat.txt> for more information. | 97 | See <file:Documentation/filesystems/vfat.txt> for more information. |
98 | |||
99 | Enable any character sets you need in File Systems/Native Language | ||
100 | Support. | ||
diff --git a/fs/filesystems.c b/fs/filesystems.c index 1aa70260e6d1..a24c58e181db 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c | |||
@@ -199,7 +199,7 @@ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2) | |||
199 | return retval; | 199 | return retval; |
200 | } | 200 | } |
201 | 201 | ||
202 | int get_filesystem_list(char * buf) | 202 | int __init get_filesystem_list(char *buf) |
203 | { | 203 | { |
204 | int len = 0; | 204 | int len = 0; |
205 | struct file_system_type * tmp; | 205 | struct file_system_type * tmp; |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2b25133524a3..06f30e965676 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -938,9 +938,9 @@ static void fuse_release_user_pages(struct fuse_req *req, int write) | |||
938 | } | 938 | } |
939 | 939 | ||
940 | static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, | 940 | static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, |
941 | unsigned *nbytesp, int write) | 941 | size_t *nbytesp, int write) |
942 | { | 942 | { |
943 | unsigned nbytes = *nbytesp; | 943 | size_t nbytes = *nbytesp; |
944 | unsigned long user_addr = (unsigned long) buf; | 944 | unsigned long user_addr = (unsigned long) buf; |
945 | unsigned offset = user_addr & ~PAGE_MASK; | 945 | unsigned offset = user_addr & ~PAGE_MASK; |
946 | int npages; | 946 | int npages; |
@@ -955,7 +955,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf, | |||
955 | return 0; | 955 | return 0; |
956 | } | 956 | } |
957 | 957 | ||
958 | nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); | 958 | nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); |
959 | npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; | 959 | npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; |
960 | npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); | 960 | npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ); |
961 | down_read(¤t->mm->mmap_sem); | 961 | down_read(¤t->mm->mmap_sem); |
@@ -1298,6 +1298,8 @@ static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma) | |||
1298 | if (vma->vm_flags & VM_MAYSHARE) | 1298 | if (vma->vm_flags & VM_MAYSHARE) |
1299 | return -ENODEV; | 1299 | return -ENODEV; |
1300 | 1300 | ||
1301 | invalidate_inode_pages2(file->f_mapping); | ||
1302 | |||
1301 | return generic_file_mmap(file, vma); | 1303 | return generic_file_mmap(file, vma); |
1302 | } | 1304 | } |
1303 | 1305 | ||
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 3984e47d1d33..1afd9f26bcb1 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -597,7 +597,6 @@ __acquires(&gl->gl_spin) | |||
597 | 597 | ||
598 | GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); | 598 | GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)); |
599 | 599 | ||
600 | down_read(&gfs2_umount_flush_sem); | ||
601 | if (test_bit(GLF_DEMOTE, &gl->gl_flags) && | 600 | if (test_bit(GLF_DEMOTE, &gl->gl_flags) && |
602 | gl->gl_demote_state != gl->gl_state) { | 601 | gl->gl_demote_state != gl->gl_state) { |
603 | if (find_first_holder(gl)) | 602 | if (find_first_holder(gl)) |
@@ -614,15 +613,14 @@ __acquires(&gl->gl_spin) | |||
614 | if (ret == 0) | 613 | if (ret == 0) |
615 | goto out_unlock; | 614 | goto out_unlock; |
616 | if (ret == 2) | 615 | if (ret == 2) |
617 | goto out_sem; | 616 | goto out; |
618 | gh = find_first_waiter(gl); | 617 | gh = find_first_waiter(gl); |
619 | gl->gl_target = gh->gh_state; | 618 | gl->gl_target = gh->gh_state; |
620 | if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) | 619 | if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) |
621 | do_error(gl, 0); /* Fail queued try locks */ | 620 | do_error(gl, 0); /* Fail queued try locks */ |
622 | } | 621 | } |
623 | do_xmote(gl, gh, gl->gl_target); | 622 | do_xmote(gl, gh, gl->gl_target); |
624 | out_sem: | 623 | out: |
625 | up_read(&gfs2_umount_flush_sem); | ||
626 | return; | 624 | return; |
627 | 625 | ||
628 | out_sched: | 626 | out_sched: |
@@ -631,7 +629,7 @@ out_sched: | |||
631 | gfs2_glock_put(gl); | 629 | gfs2_glock_put(gl); |
632 | out_unlock: | 630 | out_unlock: |
633 | clear_bit(GLF_LOCK, &gl->gl_flags); | 631 | clear_bit(GLF_LOCK, &gl->gl_flags); |
634 | goto out_sem; | 632 | goto out; |
635 | } | 633 | } |
636 | 634 | ||
637 | static void glock_work_func(struct work_struct *work) | 635 | static void glock_work_func(struct work_struct *work) |
@@ -641,6 +639,7 @@ static void glock_work_func(struct work_struct *work) | |||
641 | 639 | ||
642 | if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) | 640 | if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) |
643 | finish_xmote(gl, gl->gl_reply); | 641 | finish_xmote(gl, gl->gl_reply); |
642 | down_read(&gfs2_umount_flush_sem); | ||
644 | spin_lock(&gl->gl_spin); | 643 | spin_lock(&gl->gl_spin); |
645 | if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && | 644 | if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && |
646 | gl->gl_state != LM_ST_UNLOCKED && | 645 | gl->gl_state != LM_ST_UNLOCKED && |
@@ -653,6 +652,7 @@ static void glock_work_func(struct work_struct *work) | |||
653 | } | 652 | } |
654 | run_queue(gl, 0); | 653 | run_queue(gl, 0); |
655 | spin_unlock(&gl->gl_spin); | 654 | spin_unlock(&gl->gl_spin); |
655 | up_read(&gfs2_umount_flush_sem); | ||
656 | if (!delay || | 656 | if (!delay || |
657 | queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) | 657 | queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) |
658 | gfs2_glock_put(gl); | 658 | gfs2_glock_put(gl); |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index bf23a62aa925..70f87f43afa2 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -156,6 +156,12 @@ static void inode_go_sync(struct gfs2_glock *gl) | |||
156 | error = filemap_fdatawait(metamapping); | 156 | error = filemap_fdatawait(metamapping); |
157 | mapping_set_error(metamapping, error); | 157 | mapping_set_error(metamapping, error); |
158 | gfs2_ail_empty_gl(gl); | 158 | gfs2_ail_empty_gl(gl); |
159 | /* | ||
160 | * Writeback of the data mapping may cause the dirty flag to be set | ||
161 | * so we have to clear it again here. | ||
162 | */ | ||
163 | smp_mb__before_clear_bit(); | ||
164 | clear_bit(GLF_DIRTY, &gl->gl_flags); | ||
159 | } | 165 | } |
160 | 166 | ||
161 | /** | 167 | /** |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 7b277d449155..5a31d426116f 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -137,15 +137,15 @@ void gfs2_set_iop(struct inode *inode) | |||
137 | if (S_ISREG(mode)) { | 137 | if (S_ISREG(mode)) { |
138 | inode->i_op = &gfs2_file_iops; | 138 | inode->i_op = &gfs2_file_iops; |
139 | if (gfs2_localflocks(sdp)) | 139 | if (gfs2_localflocks(sdp)) |
140 | inode->i_fop = gfs2_file_fops_nolock; | 140 | inode->i_fop = &gfs2_file_fops_nolock; |
141 | else | 141 | else |
142 | inode->i_fop = gfs2_file_fops; | 142 | inode->i_fop = &gfs2_file_fops; |
143 | } else if (S_ISDIR(mode)) { | 143 | } else if (S_ISDIR(mode)) { |
144 | inode->i_op = &gfs2_dir_iops; | 144 | inode->i_op = &gfs2_dir_iops; |
145 | if (gfs2_localflocks(sdp)) | 145 | if (gfs2_localflocks(sdp)) |
146 | inode->i_fop = gfs2_dir_fops_nolock; | 146 | inode->i_fop = &gfs2_dir_fops_nolock; |
147 | else | 147 | else |
148 | inode->i_fop = gfs2_dir_fops; | 148 | inode->i_fop = &gfs2_dir_fops; |
149 | } else if (S_ISLNK(mode)) { | 149 | } else if (S_ISLNK(mode)) { |
150 | inode->i_op = &gfs2_symlink_iops; | 150 | inode->i_op = &gfs2_symlink_iops; |
151 | } else { | 151 | } else { |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index dca4fee3078b..c30be2b66580 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -101,21 +101,23 @@ void gfs2_dinode_print(const struct gfs2_inode *ip); | |||
101 | extern const struct inode_operations gfs2_file_iops; | 101 | extern const struct inode_operations gfs2_file_iops; |
102 | extern const struct inode_operations gfs2_dir_iops; | 102 | extern const struct inode_operations gfs2_dir_iops; |
103 | extern const struct inode_operations gfs2_symlink_iops; | 103 | extern const struct inode_operations gfs2_symlink_iops; |
104 | extern const struct file_operations *gfs2_file_fops_nolock; | 104 | extern const struct file_operations gfs2_file_fops_nolock; |
105 | extern const struct file_operations *gfs2_dir_fops_nolock; | 105 | extern const struct file_operations gfs2_dir_fops_nolock; |
106 | 106 | ||
107 | extern void gfs2_set_inode_flags(struct inode *inode); | 107 | extern void gfs2_set_inode_flags(struct inode *inode); |
108 | 108 | ||
109 | #ifdef CONFIG_GFS2_FS_LOCKING_DLM | 109 | #ifdef CONFIG_GFS2_FS_LOCKING_DLM |
110 | extern const struct file_operations *gfs2_file_fops; | 110 | extern const struct file_operations gfs2_file_fops; |
111 | extern const struct file_operations *gfs2_dir_fops; | 111 | extern const struct file_operations gfs2_dir_fops; |
112 | |||
112 | static inline int gfs2_localflocks(const struct gfs2_sbd *sdp) | 113 | static inline int gfs2_localflocks(const struct gfs2_sbd *sdp) |
113 | { | 114 | { |
114 | return sdp->sd_args.ar_localflocks; | 115 | return sdp->sd_args.ar_localflocks; |
115 | } | 116 | } |
116 | #else /* Single node only */ | 117 | #else /* Single node only */ |
117 | #define gfs2_file_fops NULL | 118 | #define gfs2_file_fops gfs2_file_fops_nolock |
118 | #define gfs2_dir_fops NULL | 119 | #define gfs2_dir_fops gfs2_dir_fops_nolock |
120 | |||
119 | static inline int gfs2_localflocks(const struct gfs2_sbd *sdp) | 121 | static inline int gfs2_localflocks(const struct gfs2_sbd *sdp) |
120 | { | 122 | { |
121 | return 1; | 123 | return 1; |
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 70b9b8548945..5d82e91887e3 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
@@ -413,7 +413,9 @@ out_unlock: | |||
413 | gfs2_glock_dq(&gh); | 413 | gfs2_glock_dq(&gh); |
414 | out: | 414 | out: |
415 | gfs2_holder_uninit(&gh); | 415 | gfs2_holder_uninit(&gh); |
416 | if (ret) | 416 | if (ret == -ENOMEM) |
417 | ret = VM_FAULT_OOM; | ||
418 | else if (ret) | ||
417 | ret = VM_FAULT_SIGBUS; | 419 | ret = VM_FAULT_SIGBUS; |
418 | return ret; | 420 | return ret; |
419 | } | 421 | } |
@@ -705,7 +707,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) | |||
705 | } | 707 | } |
706 | } | 708 | } |
707 | 709 | ||
708 | const struct file_operations *gfs2_file_fops = &(const struct file_operations){ | 710 | const struct file_operations gfs2_file_fops = { |
709 | .llseek = gfs2_llseek, | 711 | .llseek = gfs2_llseek, |
710 | .read = do_sync_read, | 712 | .read = do_sync_read, |
711 | .aio_read = generic_file_aio_read, | 713 | .aio_read = generic_file_aio_read, |
@@ -723,7 +725,7 @@ const struct file_operations *gfs2_file_fops = &(const struct file_operations){ | |||
723 | .setlease = gfs2_setlease, | 725 | .setlease = gfs2_setlease, |
724 | }; | 726 | }; |
725 | 727 | ||
726 | const struct file_operations *gfs2_dir_fops = &(const struct file_operations){ | 728 | const struct file_operations gfs2_dir_fops = { |
727 | .readdir = gfs2_readdir, | 729 | .readdir = gfs2_readdir, |
728 | .unlocked_ioctl = gfs2_ioctl, | 730 | .unlocked_ioctl = gfs2_ioctl, |
729 | .open = gfs2_open, | 731 | .open = gfs2_open, |
@@ -735,7 +737,7 @@ const struct file_operations *gfs2_dir_fops = &(const struct file_operations){ | |||
735 | 737 | ||
736 | #endif /* CONFIG_GFS2_FS_LOCKING_DLM */ | 738 | #endif /* CONFIG_GFS2_FS_LOCKING_DLM */ |
737 | 739 | ||
738 | const struct file_operations *gfs2_file_fops_nolock = &(const struct file_operations){ | 740 | const struct file_operations gfs2_file_fops_nolock = { |
739 | .llseek = gfs2_llseek, | 741 | .llseek = gfs2_llseek, |
740 | .read = do_sync_read, | 742 | .read = do_sync_read, |
741 | .aio_read = generic_file_aio_read, | 743 | .aio_read = generic_file_aio_read, |
@@ -751,7 +753,7 @@ const struct file_operations *gfs2_file_fops_nolock = &(const struct file_operat | |||
751 | .setlease = generic_setlease, | 753 | .setlease = generic_setlease, |
752 | }; | 754 | }; |
753 | 755 | ||
754 | const struct file_operations *gfs2_dir_fops_nolock = &(const struct file_operations){ | 756 | const struct file_operations gfs2_dir_fops_nolock = { |
755 | .readdir = gfs2_readdir, | 757 | .readdir = gfs2_readdir, |
756 | .unlocked_ioctl = gfs2_ioctl, | 758 | .unlocked_ioctl = gfs2_ioctl, |
757 | .open = gfs2_open, | 759 | .open = gfs2_open, |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 51883b3ad89c..650a730707b7 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -272,11 +272,6 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) | |||
272 | lock_page(page); | 272 | lock_page(page); |
273 | 273 | ||
274 | bio = bio_alloc(GFP_NOFS, 1); | 274 | bio = bio_alloc(GFP_NOFS, 1); |
275 | if (unlikely(!bio)) { | ||
276 | __free_page(page); | ||
277 | return -ENOBUFS; | ||
278 | } | ||
279 | |||
280 | bio->bi_sector = sector * (sb->s_blocksize >> 9); | 275 | bio->bi_sector = sector * (sb->s_blocksize >> 9); |
281 | bio->bi_bdev = sb->s_bdev; | 276 | bio->bi_bdev = sb->s_bdev; |
282 | bio_add_page(bio, page, PAGE_SIZE, 0); | 277 | bio_add_page(bio, page, PAGE_SIZE, 0); |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index abd5429ae285..1c70fa5168d6 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -371,6 +371,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, | |||
371 | ip = ghs[1].gh_gl->gl_object; | 371 | ip = ghs[1].gh_gl->gl_object; |
372 | 372 | ||
373 | ip->i_disksize = size; | 373 | ip->i_disksize = size; |
374 | i_size_write(inode, size); | ||
374 | 375 | ||
375 | error = gfs2_meta_inode_buffer(ip, &dibh); | 376 | error = gfs2_meta_inode_buffer(ip, &dibh); |
376 | 377 | ||
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 8d53f66b5bcc..152e6c4a0dca 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -81,7 +81,7 @@ struct gfs2_quota_change_host { | |||
81 | 81 | ||
82 | static LIST_HEAD(qd_lru_list); | 82 | static LIST_HEAD(qd_lru_list); |
83 | static atomic_t qd_lru_count = ATOMIC_INIT(0); | 83 | static atomic_t qd_lru_count = ATOMIC_INIT(0); |
84 | static spinlock_t qd_lru_lock = SPIN_LOCK_UNLOCKED; | 84 | static DEFINE_SPINLOCK(qd_lru_lock); |
85 | 85 | ||
86 | int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask) | 86 | int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask) |
87 | { | 87 | { |
@@ -1364,7 +1364,7 @@ int gfs2_quotad(void *data) | |||
1364 | refrigerator(); | 1364 | refrigerator(); |
1365 | t = min(quotad_timeo, statfs_timeo); | 1365 | t = min(quotad_timeo, statfs_timeo); |
1366 | 1366 | ||
1367 | prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_UNINTERRUPTIBLE); | 1367 | prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE); |
1368 | spin_lock(&sdp->sd_trunc_lock); | 1368 | spin_lock(&sdp->sd_trunc_lock); |
1369 | empty = list_empty(&sdp->sd_trunc_list); | 1369 | empty = list_empty(&sdp->sd_trunc_list); |
1370 | spin_unlock(&sdp->sd_trunc_lock); | 1370 | spin_unlock(&sdp->sd_trunc_lock); |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index f03d024038ea..565038243fa2 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -212,8 +212,7 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, | |||
212 | if (tmp == 0) | 212 | if (tmp == 0) |
213 | return BFITNOENT; | 213 | return BFITNOENT; |
214 | ptr--; | 214 | ptr--; |
215 | bit = fls64(tmp); | 215 | bit = __ffs64(tmp); |
216 | bit--; /* fls64 always adds one to the bit count */ | ||
217 | bit /= 2; /* two bits per entry in the bitmap */ | 216 | bit /= 2; /* two bits per entry in the bitmap */ |
218 | return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit; | 217 | return (((const unsigned char *)ptr - buf) * GFS2_NBBY) + bit; |
219 | } | 218 | } |
@@ -1445,10 +1444,12 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, | |||
1445 | u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) | 1444 | u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) |
1446 | { | 1445 | { |
1447 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1446 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1447 | struct buffer_head *dibh; | ||
1448 | struct gfs2_alloc *al = ip->i_alloc; | 1448 | struct gfs2_alloc *al = ip->i_alloc; |
1449 | struct gfs2_rgrpd *rgd = al->al_rgd; | 1449 | struct gfs2_rgrpd *rgd = al->al_rgd; |
1450 | u32 goal, blk; | 1450 | u32 goal, blk; |
1451 | u64 block; | 1451 | u64 block; |
1452 | int error; | ||
1452 | 1453 | ||
1453 | if (rgrp_contains_block(rgd, ip->i_goal)) | 1454 | if (rgrp_contains_block(rgd, ip->i_goal)) |
1454 | goal = ip->i_goal - rgd->rd_data0; | 1455 | goal = ip->i_goal - rgd->rd_data0; |
@@ -1461,7 +1462,13 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) | |||
1461 | rgd->rd_last_alloc = blk; | 1462 | rgd->rd_last_alloc = blk; |
1462 | block = rgd->rd_data0 + blk; | 1463 | block = rgd->rd_data0 + blk; |
1463 | ip->i_goal = block; | 1464 | ip->i_goal = block; |
1464 | 1465 | error = gfs2_meta_inode_buffer(ip, &dibh); | |
1466 | if (error == 0) { | ||
1467 | struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; | ||
1468 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1469 | di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal); | ||
1470 | brelse(dibh); | ||
1471 | } | ||
1465 | gfs2_assert_withdraw(sdp, rgd->rd_free >= *n); | 1472 | gfs2_assert_withdraw(sdp, rgd->rd_free >= *n); |
1466 | rgd->rd_free -= *n; | 1473 | rgd->rd_free -= *n; |
1467 | 1474 | ||
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 9435dda8f1e0..a1cbff2b4d99 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c | |||
@@ -70,6 +70,10 @@ static int hfs_releasepage(struct page *page, gfp_t mask) | |||
70 | BUG(); | 70 | BUG(); |
71 | return 0; | 71 | return 0; |
72 | } | 72 | } |
73 | |||
74 | if (!tree) | ||
75 | return 0; | ||
76 | |||
73 | if (tree->node_size >= PAGE_CACHE_SIZE) { | 77 | if (tree->node_size >= PAGE_CACHE_SIZE) { |
74 | nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT); | 78 | nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT); |
75 | spin_lock(&tree->hash_lock); | 79 | spin_lock(&tree->hash_lock); |
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 36ca2e1a4fa3..7b6165f25fbe 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c | |||
@@ -349,6 +349,7 @@ void hfs_mdb_put(struct super_block *sb) | |||
349 | if (HFS_SB(sb)->nls_disk) | 349 | if (HFS_SB(sb)->nls_disk) |
350 | unload_nls(HFS_SB(sb)->nls_disk); | 350 | unload_nls(HFS_SB(sb)->nls_disk); |
351 | 351 | ||
352 | free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0); | ||
352 | kfree(HFS_SB(sb)); | 353 | kfree(HFS_SB(sb)); |
353 | sb->s_fs_info = NULL; | 354 | sb->s_fs_info = NULL; |
354 | } | 355 | } |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 23a3c76711e0..153d9681192b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include <linux/pagevec.h> | 26 | #include <linux/pagevec.h> |
27 | #include <linux/parser.h> | 27 | #include <linux/parser.h> |
28 | #include <linux/mman.h> | 28 | #include <linux/mman.h> |
29 | #include <linux/quotaops.h> | ||
30 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
31 | #include <linux/dnotify.h> | 30 | #include <linux/dnotify.h> |
32 | #include <linux/statfs.h> | 31 | #include <linux/statfs.h> |
@@ -842,7 +841,7 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) | |||
842 | bad_val: | 841 | bad_val: |
843 | printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n", | 842 | printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n", |
844 | args[0].from, p); | 843 | args[0].from, p); |
845 | return 1; | 844 | return -EINVAL; |
846 | } | 845 | } |
847 | 846 | ||
848 | static int | 847 | static int |
diff --git a/fs/inode.c b/fs/inode.c index d06d6d268de9..6ad14a1cd8c9 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -1470,42 +1470,6 @@ static void __wait_on_freeing_inode(struct inode *inode) | |||
1470 | spin_lock(&inode_lock); | 1470 | spin_lock(&inode_lock); |
1471 | } | 1471 | } |
1472 | 1472 | ||
1473 | /* | ||
1474 | * We rarely want to lock two inodes that do not have a parent/child | ||
1475 | * relationship (such as directory, child inode) simultaneously. The | ||
1476 | * vast majority of file systems should be able to get along fine | ||
1477 | * without this. Do not use these functions except as a last resort. | ||
1478 | */ | ||
1479 | void inode_double_lock(struct inode *inode1, struct inode *inode2) | ||
1480 | { | ||
1481 | if (inode1 == NULL || inode2 == NULL || inode1 == inode2) { | ||
1482 | if (inode1) | ||
1483 | mutex_lock(&inode1->i_mutex); | ||
1484 | else if (inode2) | ||
1485 | mutex_lock(&inode2->i_mutex); | ||
1486 | return; | ||
1487 | } | ||
1488 | |||
1489 | if (inode1 < inode2) { | ||
1490 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); | ||
1491 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); | ||
1492 | } else { | ||
1493 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); | ||
1494 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); | ||
1495 | } | ||
1496 | } | ||
1497 | EXPORT_SYMBOL(inode_double_lock); | ||
1498 | |||
1499 | void inode_double_unlock(struct inode *inode1, struct inode *inode2) | ||
1500 | { | ||
1501 | if (inode1) | ||
1502 | mutex_unlock(&inode1->i_mutex); | ||
1503 | |||
1504 | if (inode2 && inode2 != inode1) | ||
1505 | mutex_unlock(&inode2->i_mutex); | ||
1506 | } | ||
1507 | EXPORT_SYMBOL(inode_double_unlock); | ||
1508 | |||
1509 | static __initdata unsigned long ihash_entries; | 1473 | static __initdata unsigned long ihash_entries; |
1510 | static int __init set_ihash_entries(char *str) | 1474 | static int __init set_ihash_entries(char *str) |
1511 | { | 1475 | { |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index a8e8513a78a9..06560c520f49 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -502,7 +502,7 @@ void journal_commit_transaction(journal_t *journal) | |||
502 | err = 0; | 502 | err = 0; |
503 | } | 503 | } |
504 | 504 | ||
505 | journal_write_revoke_records(journal, commit_transaction); | 505 | journal_write_revoke_records(journal, commit_transaction, write_op); |
506 | 506 | ||
507 | /* | 507 | /* |
508 | * If we found any dirty or locked buffers, then we should have | 508 | * If we found any dirty or locked buffers, then we should have |
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index c7bd649bbbdc..da6cd9bdaabc 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c | |||
@@ -55,6 +55,25 @@ | |||
55 | * need do nothing. | 55 | * need do nothing. |
56 | * RevokeValid set, Revoked set: | 56 | * RevokeValid set, Revoked set: |
57 | * buffer has been revoked. | 57 | * buffer has been revoked. |
58 | * | ||
59 | * Locking rules: | ||
60 | * We keep two hash tables of revoke records. One hashtable belongs to the | ||
61 | * running transaction (is pointed to by journal->j_revoke), the other one | ||
62 | * belongs to the committing transaction. Accesses to the second hash table | ||
63 | * happen only from the kjournald and no other thread touches this table. Also | ||
64 | * journal_switch_revoke_table() which switches which hashtable belongs to the | ||
65 | * running and which to the committing transaction is called only from | ||
66 | * kjournald. Therefore we need no locks when accessing the hashtable belonging | ||
67 | * to the committing transaction. | ||
68 | * | ||
69 | * All users operating on the hash table belonging to the running transaction | ||
70 | * have a handle to the transaction. Therefore they are safe from kjournald | ||
71 | * switching hash tables under them. For operations on the lists of entries in | ||
72 | * the hash table j_revoke_lock is used. | ||
73 | * | ||
74 | * Finally, also replay code uses the hash tables but at this moment noone else | ||
75 | * can touch them (filesystem isn't mounted yet) and hence no locking is | ||
76 | * needed. | ||
58 | */ | 77 | */ |
59 | 78 | ||
60 | #ifndef __KERNEL__ | 79 | #ifndef __KERNEL__ |
@@ -67,6 +86,7 @@ | |||
67 | #include <linux/slab.h> | 86 | #include <linux/slab.h> |
68 | #include <linux/list.h> | 87 | #include <linux/list.h> |
69 | #include <linux/init.h> | 88 | #include <linux/init.h> |
89 | #include <linux/bio.h> | ||
70 | #endif | 90 | #endif |
71 | #include <linux/log2.h> | 91 | #include <linux/log2.h> |
72 | 92 | ||
@@ -99,8 +119,8 @@ struct jbd_revoke_table_s | |||
99 | #ifdef __KERNEL__ | 119 | #ifdef __KERNEL__ |
100 | static void write_one_revoke_record(journal_t *, transaction_t *, | 120 | static void write_one_revoke_record(journal_t *, transaction_t *, |
101 | struct journal_head **, int *, | 121 | struct journal_head **, int *, |
102 | struct jbd_revoke_record_s *); | 122 | struct jbd_revoke_record_s *, int); |
103 | static void flush_descriptor(journal_t *, struct journal_head *, int); | 123 | static void flush_descriptor(journal_t *, struct journal_head *, int, int); |
104 | #endif | 124 | #endif |
105 | 125 | ||
106 | /* Utility functions to maintain the revoke table */ | 126 | /* Utility functions to maintain the revoke table */ |
@@ -402,8 +422,6 @@ int journal_revoke(handle_t *handle, unsigned long blocknr, | |||
402 | * the second time we would still have a pending revoke to cancel. So, | 422 | * the second time we would still have a pending revoke to cancel. So, |
403 | * do not trust the Revoked bit on buffers unless RevokeValid is also | 423 | * do not trust the Revoked bit on buffers unless RevokeValid is also |
404 | * set. | 424 | * set. |
405 | * | ||
406 | * The caller must have the journal locked. | ||
407 | */ | 425 | */ |
408 | int journal_cancel_revoke(handle_t *handle, struct journal_head *jh) | 426 | int journal_cancel_revoke(handle_t *handle, struct journal_head *jh) |
409 | { | 427 | { |
@@ -481,12 +499,9 @@ void journal_switch_revoke_table(journal_t *journal) | |||
481 | /* | 499 | /* |
482 | * Write revoke records to the journal for all entries in the current | 500 | * Write revoke records to the journal for all entries in the current |
483 | * revoke hash, deleting the entries as we go. | 501 | * revoke hash, deleting the entries as we go. |
484 | * | ||
485 | * Called with the journal lock held. | ||
486 | */ | 502 | */ |
487 | |||
488 | void journal_write_revoke_records(journal_t *journal, | 503 | void journal_write_revoke_records(journal_t *journal, |
489 | transaction_t *transaction) | 504 | transaction_t *transaction, int write_op) |
490 | { | 505 | { |
491 | struct journal_head *descriptor; | 506 | struct journal_head *descriptor; |
492 | struct jbd_revoke_record_s *record; | 507 | struct jbd_revoke_record_s *record; |
@@ -510,14 +525,14 @@ void journal_write_revoke_records(journal_t *journal, | |||
510 | hash_list->next; | 525 | hash_list->next; |
511 | write_one_revoke_record(journal, transaction, | 526 | write_one_revoke_record(journal, transaction, |
512 | &descriptor, &offset, | 527 | &descriptor, &offset, |
513 | record); | 528 | record, write_op); |
514 | count++; | 529 | count++; |
515 | list_del(&record->hash); | 530 | list_del(&record->hash); |
516 | kmem_cache_free(revoke_record_cache, record); | 531 | kmem_cache_free(revoke_record_cache, record); |
517 | } | 532 | } |
518 | } | 533 | } |
519 | if (descriptor) | 534 | if (descriptor) |
520 | flush_descriptor(journal, descriptor, offset); | 535 | flush_descriptor(journal, descriptor, offset, write_op); |
521 | jbd_debug(1, "Wrote %d revoke records\n", count); | 536 | jbd_debug(1, "Wrote %d revoke records\n", count); |
522 | } | 537 | } |
523 | 538 | ||
@@ -530,7 +545,8 @@ static void write_one_revoke_record(journal_t *journal, | |||
530 | transaction_t *transaction, | 545 | transaction_t *transaction, |
531 | struct journal_head **descriptorp, | 546 | struct journal_head **descriptorp, |
532 | int *offsetp, | 547 | int *offsetp, |
533 | struct jbd_revoke_record_s *record) | 548 | struct jbd_revoke_record_s *record, |
549 | int write_op) | ||
534 | { | 550 | { |
535 | struct journal_head *descriptor; | 551 | struct journal_head *descriptor; |
536 | int offset; | 552 | int offset; |
@@ -549,7 +565,7 @@ static void write_one_revoke_record(journal_t *journal, | |||
549 | /* Make sure we have a descriptor with space left for the record */ | 565 | /* Make sure we have a descriptor with space left for the record */ |
550 | if (descriptor) { | 566 | if (descriptor) { |
551 | if (offset == journal->j_blocksize) { | 567 | if (offset == journal->j_blocksize) { |
552 | flush_descriptor(journal, descriptor, offset); | 568 | flush_descriptor(journal, descriptor, offset, write_op); |
553 | descriptor = NULL; | 569 | descriptor = NULL; |
554 | } | 570 | } |
555 | } | 571 | } |
@@ -586,7 +602,7 @@ static void write_one_revoke_record(journal_t *journal, | |||
586 | 602 | ||
587 | static void flush_descriptor(journal_t *journal, | 603 | static void flush_descriptor(journal_t *journal, |
588 | struct journal_head *descriptor, | 604 | struct journal_head *descriptor, |
589 | int offset) | 605 | int offset, int write_op) |
590 | { | 606 | { |
591 | journal_revoke_header_t *header; | 607 | journal_revoke_header_t *header; |
592 | struct buffer_head *bh = jh2bh(descriptor); | 608 | struct buffer_head *bh = jh2bh(descriptor); |
@@ -601,7 +617,7 @@ static void flush_descriptor(journal_t *journal, | |||
601 | set_buffer_jwrite(bh); | 617 | set_buffer_jwrite(bh); |
602 | BUFFER_TRACE(bh, "write"); | 618 | BUFFER_TRACE(bh, "write"); |
603 | set_buffer_dirty(bh); | 619 | set_buffer_dirty(bh); |
604 | ll_rw_block(SWRITE, 1, &bh); | 620 | ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); |
605 | } | 621 | } |
606 | #endif | 622 | #endif |
607 | 623 | ||
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 073c8c3df7cd..0b7d3b8226fd 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -506,7 +506,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
506 | if (err) | 506 | if (err) |
507 | jbd2_journal_abort(journal, err); | 507 | jbd2_journal_abort(journal, err); |
508 | 508 | ||
509 | jbd2_journal_write_revoke_records(journal, commit_transaction); | 509 | jbd2_journal_write_revoke_records(journal, commit_transaction, |
510 | write_op); | ||
510 | 511 | ||
511 | jbd_debug(3, "JBD: commit phase 2\n"); | 512 | jbd_debug(3, "JBD: commit phase 2\n"); |
512 | 513 | ||
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index bbe6d592d8b3..a360b06af2e3 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -86,6 +86,7 @@ | |||
86 | #include <linux/slab.h> | 86 | #include <linux/slab.h> |
87 | #include <linux/list.h> | 87 | #include <linux/list.h> |
88 | #include <linux/init.h> | 88 | #include <linux/init.h> |
89 | #include <linux/bio.h> | ||
89 | #endif | 90 | #endif |
90 | #include <linux/log2.h> | 91 | #include <linux/log2.h> |
91 | 92 | ||
@@ -118,8 +119,8 @@ struct jbd2_revoke_table_s | |||
118 | #ifdef __KERNEL__ | 119 | #ifdef __KERNEL__ |
119 | static void write_one_revoke_record(journal_t *, transaction_t *, | 120 | static void write_one_revoke_record(journal_t *, transaction_t *, |
120 | struct journal_head **, int *, | 121 | struct journal_head **, int *, |
121 | struct jbd2_revoke_record_s *); | 122 | struct jbd2_revoke_record_s *, int); |
122 | static void flush_descriptor(journal_t *, struct journal_head *, int); | 123 | static void flush_descriptor(journal_t *, struct journal_head *, int, int); |
123 | #endif | 124 | #endif |
124 | 125 | ||
125 | /* Utility functions to maintain the revoke table */ | 126 | /* Utility functions to maintain the revoke table */ |
@@ -499,7 +500,8 @@ void jbd2_journal_switch_revoke_table(journal_t *journal) | |||
499 | * revoke hash, deleting the entries as we go. | 500 | * revoke hash, deleting the entries as we go. |
500 | */ | 501 | */ |
501 | void jbd2_journal_write_revoke_records(journal_t *journal, | 502 | void jbd2_journal_write_revoke_records(journal_t *journal, |
502 | transaction_t *transaction) | 503 | transaction_t *transaction, |
504 | int write_op) | ||
503 | { | 505 | { |
504 | struct journal_head *descriptor; | 506 | struct journal_head *descriptor; |
505 | struct jbd2_revoke_record_s *record; | 507 | struct jbd2_revoke_record_s *record; |
@@ -523,14 +525,14 @@ void jbd2_journal_write_revoke_records(journal_t *journal, | |||
523 | hash_list->next; | 525 | hash_list->next; |
524 | write_one_revoke_record(journal, transaction, | 526 | write_one_revoke_record(journal, transaction, |
525 | &descriptor, &offset, | 527 | &descriptor, &offset, |
526 | record); | 528 | record, write_op); |
527 | count++; | 529 | count++; |
528 | list_del(&record->hash); | 530 | list_del(&record->hash); |
529 | kmem_cache_free(jbd2_revoke_record_cache, record); | 531 | kmem_cache_free(jbd2_revoke_record_cache, record); |
530 | } | 532 | } |
531 | } | 533 | } |
532 | if (descriptor) | 534 | if (descriptor) |
533 | flush_descriptor(journal, descriptor, offset); | 535 | flush_descriptor(journal, descriptor, offset, write_op); |
534 | jbd_debug(1, "Wrote %d revoke records\n", count); | 536 | jbd_debug(1, "Wrote %d revoke records\n", count); |
535 | } | 537 | } |
536 | 538 | ||
@@ -543,7 +545,8 @@ static void write_one_revoke_record(journal_t *journal, | |||
543 | transaction_t *transaction, | 545 | transaction_t *transaction, |
544 | struct journal_head **descriptorp, | 546 | struct journal_head **descriptorp, |
545 | int *offsetp, | 547 | int *offsetp, |
546 | struct jbd2_revoke_record_s *record) | 548 | struct jbd2_revoke_record_s *record, |
549 | int write_op) | ||
547 | { | 550 | { |
548 | struct journal_head *descriptor; | 551 | struct journal_head *descriptor; |
549 | int offset; | 552 | int offset; |
@@ -562,7 +565,7 @@ static void write_one_revoke_record(journal_t *journal, | |||
562 | /* Make sure we have a descriptor with space left for the record */ | 565 | /* Make sure we have a descriptor with space left for the record */ |
563 | if (descriptor) { | 566 | if (descriptor) { |
564 | if (offset == journal->j_blocksize) { | 567 | if (offset == journal->j_blocksize) { |
565 | flush_descriptor(journal, descriptor, offset); | 568 | flush_descriptor(journal, descriptor, offset, write_op); |
566 | descriptor = NULL; | 569 | descriptor = NULL; |
567 | } | 570 | } |
568 | } | 571 | } |
@@ -607,7 +610,7 @@ static void write_one_revoke_record(journal_t *journal, | |||
607 | 610 | ||
608 | static void flush_descriptor(journal_t *journal, | 611 | static void flush_descriptor(journal_t *journal, |
609 | struct journal_head *descriptor, | 612 | struct journal_head *descriptor, |
610 | int offset) | 613 | int offset, int write_op) |
611 | { | 614 | { |
612 | jbd2_journal_revoke_header_t *header; | 615 | jbd2_journal_revoke_header_t *header; |
613 | struct buffer_head *bh = jh2bh(descriptor); | 616 | struct buffer_head *bh = jh2bh(descriptor); |
@@ -622,7 +625,7 @@ static void flush_descriptor(journal_t *journal, | |||
622 | set_buffer_jwrite(bh); | 625 | set_buffer_jwrite(bh); |
623 | BUFFER_TRACE(bh, "write"); | 626 | BUFFER_TRACE(bh, "write"); |
624 | set_buffer_dirty(bh); | 627 | set_buffer_dirty(bh); |
625 | ll_rw_block(SWRITE, 1, &bh); | 628 | ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); |
626 | } | 629 | } |
627 | #endif | 630 | #endif |
628 | 631 | ||
diff --git a/fs/namei.c b/fs/namei.c index b8433ebfae05..78f253cd2d4f 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1248,6 +1248,8 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) | |||
1248 | int err; | 1248 | int err; |
1249 | struct qstr this; | 1249 | struct qstr this; |
1250 | 1250 | ||
1251 | WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); | ||
1252 | |||
1251 | err = __lookup_one_len(name, &this, base, len); | 1253 | err = __lookup_one_len(name, &this, base, len); |
1252 | if (err) | 1254 | if (err) |
1253 | return ERR_PTR(err); | 1255 | return ERR_PTR(err); |
diff --git a/fs/namespace.c b/fs/namespace.c index c6f54e4c4290..41196209a906 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -1377,7 +1377,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, | |||
1377 | if (parent_path) { | 1377 | if (parent_path) { |
1378 | detach_mnt(source_mnt, parent_path); | 1378 | detach_mnt(source_mnt, parent_path); |
1379 | attach_mnt(source_mnt, path); | 1379 | attach_mnt(source_mnt, path); |
1380 | touch_mnt_namespace(current->nsproxy->mnt_ns); | 1380 | touch_mnt_namespace(parent_path->mnt->mnt_ns); |
1381 | } else { | 1381 | } else { |
1382 | mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); | 1382 | mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); |
1383 | commit_tree(source_mnt); | 1383 | commit_tree(source_mnt); |
@@ -1920,8 +1920,9 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, | |||
1920 | if (data_page) | 1920 | if (data_page) |
1921 | ((char *)data_page)[PAGE_SIZE - 1] = 0; | 1921 | ((char *)data_page)[PAGE_SIZE - 1] = 0; |
1922 | 1922 | ||
1923 | /* Default to relatime */ | 1923 | /* Default to relatime unless overriden */ |
1924 | mnt_flags |= MNT_RELATIME; | 1924 | if (!(flags & MS_NOATIME)) |
1925 | mnt_flags |= MNT_RELATIME; | ||
1925 | 1926 | ||
1926 | /* Separate the per-mountpoint flags */ | 1927 | /* Separate the per-mountpoint flags */ |
1927 | if (flags & MS_NOSUID) | 1928 | if (flags & MS_NOSUID) |
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index f54360f50a9c..fa038df63ac8 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c | |||
@@ -660,13 +660,10 @@ outrel: | |||
660 | if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN) | 660 | if (user.object_name_len > NCP_OBJECT_NAME_MAX_LEN) |
661 | return -ENOMEM; | 661 | return -ENOMEM; |
662 | if (user.object_name_len) { | 662 | if (user.object_name_len) { |
663 | newname = kmalloc(user.object_name_len, GFP_USER); | 663 | newname = memdup_user(user.object_name, |
664 | if (!newname) | 664 | user.object_name_len); |
665 | return -ENOMEM; | 665 | if (IS_ERR(newname)) |
666 | if (copy_from_user(newname, user.object_name, user.object_name_len)) { | 666 | return PTR_ERR(newname); |
667 | kfree(newname); | ||
668 | return -EFAULT; | ||
669 | } | ||
670 | } else { | 667 | } else { |
671 | newname = NULL; | 668 | newname = NULL; |
672 | } | 669 | } |
@@ -760,13 +757,9 @@ outrel: | |||
760 | if (user.len > NCP_PRIVATE_DATA_MAX_LEN) | 757 | if (user.len > NCP_PRIVATE_DATA_MAX_LEN) |
761 | return -ENOMEM; | 758 | return -ENOMEM; |
762 | if (user.len) { | 759 | if (user.len) { |
763 | new = kmalloc(user.len, GFP_USER); | 760 | new = memdup_user(user.data, user.len); |
764 | if (!new) | 761 | if (IS_ERR(new)) |
765 | return -ENOMEM; | 762 | return PTR_ERR(new); |
766 | if (copy_from_user(new, user.data, user.len)) { | ||
767 | kfree(new); | ||
768 | return -EFAULT; | ||
769 | } | ||
770 | } else { | 763 | } else { |
771 | new = NULL; | 764 | new = NULL; |
772 | } | 765 | } |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5a97bcfe03e5..ec7e27d00bc6 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -517,10 +517,10 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
517 | 517 | ||
518 | ret = nfs_updatepage(filp, page, 0, pagelen); | 518 | ret = nfs_updatepage(filp, page, 0, pagelen); |
519 | out_unlock: | 519 | out_unlock: |
520 | if (!ret) | ||
521 | return VM_FAULT_LOCKED; | ||
520 | unlock_page(page); | 522 | unlock_page(page); |
521 | if (ret) | 523 | return VM_FAULT_SIGBUS; |
522 | ret = VM_FAULT_SIGBUS; | ||
523 | return ret; | ||
524 | } | 524 | } |
525 | 525 | ||
526 | static struct vm_operations_struct nfs_file_vm_ops = { | 526 | static struct vm_operations_struct nfs_file_vm_ops = { |
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index e6a1932c7110..35869a4921f1 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c | |||
@@ -713,7 +713,8 @@ nfs3_xdr_setaclargs(struct rpc_rqst *req, __be32 *p, | |||
713 | if (args->npages != 0) | 713 | if (args->npages != 0) |
714 | xdr_encode_pages(buf, args->pages, 0, args->len); | 714 | xdr_encode_pages(buf, args->pages, 0, args->len); |
715 | else | 715 | else |
716 | req->rq_slen += args->len; | 716 | req->rq_slen = xdr_adjust_iovec(req->rq_svec, |
717 | p + XDR_QUADLEN(args->len)); | ||
717 | 718 | ||
718 | err = nfsacl_encode(buf, base, args->inode, | 719 | err = nfsacl_encode(buf, base, args->inode, |
719 | (args->mask & NFS_ACL) ? | 720 | (args->mask & NFS_ACL) ? |
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 3444c0052a87..5275097a7565 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
@@ -229,21 +229,23 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) | |||
229 | goto out; | 229 | goto out; |
230 | status = vfs_readdir(filp, nfsd4_build_namelist, &names); | 230 | status = vfs_readdir(filp, nfsd4_build_namelist, &names); |
231 | fput(filp); | 231 | fput(filp); |
232 | mutex_lock(&dir->d_inode->i_mutex); | ||
232 | while (!list_empty(&names)) { | 233 | while (!list_empty(&names)) { |
233 | entry = list_entry(names.next, struct name_list, list); | 234 | entry = list_entry(names.next, struct name_list, list); |
234 | 235 | ||
235 | dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); | 236 | dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); |
236 | if (IS_ERR(dentry)) { | 237 | if (IS_ERR(dentry)) { |
237 | status = PTR_ERR(dentry); | 238 | status = PTR_ERR(dentry); |
238 | goto out; | 239 | break; |
239 | } | 240 | } |
240 | status = f(dir, dentry); | 241 | status = f(dir, dentry); |
241 | dput(dentry); | 242 | dput(dentry); |
242 | if (status) | 243 | if (status) |
243 | goto out; | 244 | break; |
244 | list_del(&entry->list); | 245 | list_del(&entry->list); |
245 | kfree(entry); | 246 | kfree(entry); |
246 | } | 247 | } |
248 | mutex_unlock(&dir->d_inode->i_mutex); | ||
247 | out: | 249 | out: |
248 | while (!list_empty(&names)) { | 250 | while (!list_empty(&names)) { |
249 | entry = list_entry(names.next, struct name_list, list); | 251 | entry = list_entry(names.next, struct name_list, list); |
@@ -255,36 +257,6 @@ out: | |||
255 | } | 257 | } |
256 | 258 | ||
257 | static int | 259 | static int |
258 | nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry) | ||
259 | { | ||
260 | int status; | ||
261 | |||
262 | if (!S_ISREG(dir->d_inode->i_mode)) { | ||
263 | printk("nfsd4: non-file found in client recovery directory\n"); | ||
264 | return -EINVAL; | ||
265 | } | ||
266 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); | ||
267 | status = vfs_unlink(dir->d_inode, dentry); | ||
268 | mutex_unlock(&dir->d_inode->i_mutex); | ||
269 | return status; | ||
270 | } | ||
271 | |||
272 | static int | ||
273 | nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry) | ||
274 | { | ||
275 | int status; | ||
276 | |||
277 | /* For now this directory should already be empty, but we empty it of | ||
278 | * any regular files anyway, just in case the directory was created by | ||
279 | * a kernel from the future.... */ | ||
280 | nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file); | ||
281 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); | ||
282 | status = vfs_rmdir(dir->d_inode, dentry); | ||
283 | mutex_unlock(&dir->d_inode->i_mutex); | ||
284 | return status; | ||
285 | } | ||
286 | |||
287 | static int | ||
288 | nfsd4_unlink_clid_dir(char *name, int namlen) | 260 | nfsd4_unlink_clid_dir(char *name, int namlen) |
289 | { | 261 | { |
290 | struct dentry *dentry; | 262 | struct dentry *dentry; |
@@ -294,18 +266,18 @@ nfsd4_unlink_clid_dir(char *name, int namlen) | |||
294 | 266 | ||
295 | mutex_lock(&rec_dir.dentry->d_inode->i_mutex); | 267 | mutex_lock(&rec_dir.dentry->d_inode->i_mutex); |
296 | dentry = lookup_one_len(name, rec_dir.dentry, namlen); | 268 | dentry = lookup_one_len(name, rec_dir.dentry, namlen); |
297 | mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); | ||
298 | if (IS_ERR(dentry)) { | 269 | if (IS_ERR(dentry)) { |
299 | status = PTR_ERR(dentry); | 270 | status = PTR_ERR(dentry); |
300 | return status; | 271 | goto out_unlock; |
301 | } | 272 | } |
302 | status = -ENOENT; | 273 | status = -ENOENT; |
303 | if (!dentry->d_inode) | 274 | if (!dentry->d_inode) |
304 | goto out; | 275 | goto out; |
305 | 276 | status = vfs_rmdir(rec_dir.dentry->d_inode, dentry); | |
306 | status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry); | ||
307 | out: | 277 | out: |
308 | dput(dentry); | 278 | dput(dentry); |
279 | out_unlock: | ||
280 | mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); | ||
309 | return status; | 281 | return status; |
310 | } | 282 | } |
311 | 283 | ||
@@ -348,7 +320,7 @@ purge_old(struct dentry *parent, struct dentry *child) | |||
348 | if (nfs4_has_reclaimed_state(child->d_name.name, false)) | 320 | if (nfs4_has_reclaimed_state(child->d_name.name, false)) |
349 | return 0; | 321 | return 0; |
350 | 322 | ||
351 | status = nfsd4_clear_clid_dir(parent, child); | 323 | status = vfs_rmdir(parent->d_inode, child); |
352 | if (status) | 324 | if (status) |
353 | printk("failed to remove client recovery directory %s\n", | 325 | printk("failed to remove client recovery directory %s\n", |
354 | child->d_name.name); | 326 | child->d_name.name); |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ab93fcfef254..6c68ffd6b4bb 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -116,10 +116,15 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, | |||
116 | } | 116 | } |
117 | if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { | 117 | if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) { |
118 | /* successfully crossed mount point */ | 118 | /* successfully crossed mount point */ |
119 | exp_put(exp); | 119 | /* |
120 | *expp = exp2; | 120 | * This is subtle: dentry is *not* under mnt at this point. |
121 | * The only reason we are safe is that original mnt is pinned | ||
122 | * down by exp, so we should dput before putting exp. | ||
123 | */ | ||
121 | dput(dentry); | 124 | dput(dentry); |
122 | *dpp = mounts; | 125 | *dpp = mounts; |
126 | exp_put(exp); | ||
127 | *expp = exp2; | ||
123 | } else { | 128 | } else { |
124 | exp_put(exp2); | 129 | exp_put(exp2); |
125 | dput(mounts); | 130 | dput(mounts); |
@@ -1885,8 +1890,8 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, | |||
1885 | return 0; | 1890 | return 0; |
1886 | } | 1891 | } |
1887 | 1892 | ||
1888 | static int nfsd_buffered_readdir(struct file *file, filldir_t func, | 1893 | static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, |
1889 | struct readdir_cd *cdp, loff_t *offsetp) | 1894 | struct readdir_cd *cdp, loff_t *offsetp) |
1890 | { | 1895 | { |
1891 | struct readdir_data buf; | 1896 | struct readdir_data buf; |
1892 | struct buffered_dirent *de; | 1897 | struct buffered_dirent *de; |
@@ -1896,11 +1901,12 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func, | |||
1896 | 1901 | ||
1897 | buf.dirent = (void *)__get_free_page(GFP_KERNEL); | 1902 | buf.dirent = (void *)__get_free_page(GFP_KERNEL); |
1898 | if (!buf.dirent) | 1903 | if (!buf.dirent) |
1899 | return -ENOMEM; | 1904 | return nfserrno(-ENOMEM); |
1900 | 1905 | ||
1901 | offset = *offsetp; | 1906 | offset = *offsetp; |
1902 | 1907 | ||
1903 | while (1) { | 1908 | while (1) { |
1909 | struct inode *dir_inode = file->f_path.dentry->d_inode; | ||
1904 | unsigned int reclen; | 1910 | unsigned int reclen; |
1905 | 1911 | ||
1906 | cdp->err = nfserr_eof; /* will be cleared on successful read */ | 1912 | cdp->err = nfserr_eof; /* will be cleared on successful read */ |
@@ -1919,26 +1925,38 @@ static int nfsd_buffered_readdir(struct file *file, filldir_t func, | |||
1919 | if (!size) | 1925 | if (!size) |
1920 | break; | 1926 | break; |
1921 | 1927 | ||
1928 | /* | ||
1929 | * Various filldir functions may end up calling back into | ||
1930 | * lookup_one_len() and the file system's ->lookup() method. | ||
1931 | * These expect i_mutex to be held, as it would within readdir. | ||
1932 | */ | ||
1933 | host_err = mutex_lock_killable(&dir_inode->i_mutex); | ||
1934 | if (host_err) | ||
1935 | break; | ||
1936 | |||
1922 | de = (struct buffered_dirent *)buf.dirent; | 1937 | de = (struct buffered_dirent *)buf.dirent; |
1923 | while (size > 0) { | 1938 | while (size > 0) { |
1924 | offset = de->offset; | 1939 | offset = de->offset; |
1925 | 1940 | ||
1926 | if (func(cdp, de->name, de->namlen, de->offset, | 1941 | if (func(cdp, de->name, de->namlen, de->offset, |
1927 | de->ino, de->d_type)) | 1942 | de->ino, de->d_type)) |
1928 | goto done; | 1943 | break; |
1929 | 1944 | ||
1930 | if (cdp->err != nfs_ok) | 1945 | if (cdp->err != nfs_ok) |
1931 | goto done; | 1946 | break; |
1932 | 1947 | ||
1933 | reclen = ALIGN(sizeof(*de) + de->namlen, | 1948 | reclen = ALIGN(sizeof(*de) + de->namlen, |
1934 | sizeof(u64)); | 1949 | sizeof(u64)); |
1935 | size -= reclen; | 1950 | size -= reclen; |
1936 | de = (struct buffered_dirent *)((char *)de + reclen); | 1951 | de = (struct buffered_dirent *)((char *)de + reclen); |
1937 | } | 1952 | } |
1953 | mutex_unlock(&dir_inode->i_mutex); | ||
1954 | if (size > 0) /* We bailed out early */ | ||
1955 | break; | ||
1956 | |||
1938 | offset = vfs_llseek(file, 0, SEEK_CUR); | 1957 | offset = vfs_llseek(file, 0, SEEK_CUR); |
1939 | } | 1958 | } |
1940 | 1959 | ||
1941 | done: | ||
1942 | free_page((unsigned long)(buf.dirent)); | 1960 | free_page((unsigned long)(buf.dirent)); |
1943 | 1961 | ||
1944 | if (host_err) | 1962 | if (host_err) |
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 24638e059bf3..064279e33bbb 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c | |||
@@ -688,6 +688,8 @@ static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_gc = { | |||
688 | .bpop_translate = NULL, | 688 | .bpop_translate = NULL, |
689 | }; | 689 | }; |
690 | 690 | ||
691 | static struct lock_class_key nilfs_bmap_dat_lock_key; | ||
692 | |||
691 | /** | 693 | /** |
692 | * nilfs_bmap_read - read a bmap from an inode | 694 | * nilfs_bmap_read - read a bmap from an inode |
693 | * @bmap: bmap | 695 | * @bmap: bmap |
@@ -715,6 +717,7 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) | |||
715 | bmap->b_pops = &nilfs_bmap_ptr_ops_p; | 717 | bmap->b_pops = &nilfs_bmap_ptr_ops_p; |
716 | bmap->b_last_allocated_key = 0; /* XXX: use macro */ | 718 | bmap->b_last_allocated_key = 0; /* XXX: use macro */ |
717 | bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; | 719 | bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; |
720 | lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); | ||
718 | break; | 721 | break; |
719 | case NILFS_CPFILE_INO: | 722 | case NILFS_CPFILE_INO: |
720 | case NILFS_SUFILE_INO: | 723 | case NILFS_SUFILE_INO: |
@@ -772,6 +775,7 @@ void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) | |||
772 | { | 775 | { |
773 | memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union)); | 776 | memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union)); |
774 | init_rwsem(&gcbmap->b_sem); | 777 | init_rwsem(&gcbmap->b_sem); |
778 | lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); | ||
775 | gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode; | 779 | gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode; |
776 | } | 780 | } |
777 | 781 | ||
@@ -779,5 +783,6 @@ void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) | |||
779 | { | 783 | { |
780 | memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union)); | 784 | memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union)); |
781 | init_rwsem(&bmap->b_sem); | 785 | init_rwsem(&bmap->b_sem); |
786 | lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); | ||
782 | bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; | 787 | bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; |
783 | } | 788 | } |
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 7558c977db02..3d0c18a16db1 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h | |||
@@ -35,11 +35,6 @@ | |||
35 | #include "bmap_union.h" | 35 | #include "bmap_union.h" |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * NILFS filesystem version | ||
39 | */ | ||
40 | #define NILFS_VERSION "2.0.5" | ||
41 | |||
42 | /* | ||
43 | * nilfs inode data in memory | 38 | * nilfs inode data in memory |
44 | */ | 39 | */ |
45 | struct nilfs_inode_info { | 40 | struct nilfs_inode_info { |
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 6ade0963fc1d..4fc081e47d70 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c | |||
@@ -413,7 +413,6 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, | |||
413 | struct nilfs_segment_entry *ent, *n; | 413 | struct nilfs_segment_entry *ent, *n; |
414 | struct inode *sufile = nilfs->ns_sufile; | 414 | struct inode *sufile = nilfs->ns_sufile; |
415 | __u64 segnum[4]; | 415 | __u64 segnum[4]; |
416 | time_t mtime; | ||
417 | int err; | 416 | int err; |
418 | int i; | 417 | int i; |
419 | 418 | ||
@@ -442,24 +441,13 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, | |||
442 | * Collecting segments written after the latest super root. | 441 | * Collecting segments written after the latest super root. |
443 | * These are marked dirty to avoid being reallocated in the next write. | 442 | * These are marked dirty to avoid being reallocated in the next write. |
444 | */ | 443 | */ |
445 | mtime = get_seconds(); | ||
446 | list_for_each_entry_safe(ent, n, head, list) { | 444 | list_for_each_entry_safe(ent, n, head, list) { |
447 | if (ent->segnum == segnum[0]) { | 445 | if (ent->segnum != segnum[0]) { |
448 | list_del(&ent->list); | 446 | err = nilfs_sufile_scrap(sufile, ent->segnum); |
449 | nilfs_free_segment_entry(ent); | 447 | if (unlikely(err)) |
450 | continue; | 448 | goto failed; |
451 | } | ||
452 | err = nilfs_open_segment_entry(ent, sufile); | ||
453 | if (unlikely(err)) | ||
454 | goto failed; | ||
455 | if (!nilfs_segment_usage_dirty(ent->raw_su)) { | ||
456 | /* make the segment garbage */ | ||
457 | ent->raw_su->su_nblocks = cpu_to_le32(0); | ||
458 | ent->raw_su->su_lastmod = cpu_to_le32(mtime); | ||
459 | nilfs_segment_usage_set_dirty(ent->raw_su); | ||
460 | } | 449 | } |
461 | list_del(&ent->list); | 450 | list_del(&ent->list); |
462 | nilfs_close_segment_entry(ent, sufile); | ||
463 | nilfs_free_segment_entry(ent); | 451 | nilfs_free_segment_entry(ent); |
464 | } | 452 | } |
465 | 453 | ||
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index c774cf397e2f..98e68677f045 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c | |||
@@ -93,6 +93,52 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum, | |||
93 | create, NULL, bhp); | 93 | create, NULL, bhp); |
94 | } | 94 | } |
95 | 95 | ||
96 | static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, | ||
97 | u64 ncleanadd, u64 ndirtyadd) | ||
98 | { | ||
99 | struct nilfs_sufile_header *header; | ||
100 | void *kaddr; | ||
101 | |||
102 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | ||
103 | header = kaddr + bh_offset(header_bh); | ||
104 | le64_add_cpu(&header->sh_ncleansegs, ncleanadd); | ||
105 | le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); | ||
106 | kunmap_atomic(kaddr, KM_USER0); | ||
107 | |||
108 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
109 | } | ||
110 | |||
111 | int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create, | ||
112 | void (*dofunc)(struct inode *, __u64, | ||
113 | struct buffer_head *, | ||
114 | struct buffer_head *)) | ||
115 | { | ||
116 | struct buffer_head *header_bh, *bh; | ||
117 | int ret; | ||
118 | |||
119 | if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) { | ||
120 | printk(KERN_WARNING "%s: invalid segment number: %llu\n", | ||
121 | __func__, (unsigned long long)segnum); | ||
122 | return -EINVAL; | ||
123 | } | ||
124 | down_write(&NILFS_MDT(sufile)->mi_sem); | ||
125 | |||
126 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | ||
127 | if (ret < 0) | ||
128 | goto out_sem; | ||
129 | |||
130 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, create, &bh); | ||
131 | if (!ret) { | ||
132 | dofunc(sufile, segnum, header_bh, bh); | ||
133 | brelse(bh); | ||
134 | } | ||
135 | brelse(header_bh); | ||
136 | |||
137 | out_sem: | ||
138 | up_write(&NILFS_MDT(sufile)->mi_sem); | ||
139 | return ret; | ||
140 | } | ||
141 | |||
96 | /** | 142 | /** |
97 | * nilfs_sufile_alloc - allocate a segment | 143 | * nilfs_sufile_alloc - allocate a segment |
98 | * @sufile: inode of segment usage file | 144 | * @sufile: inode of segment usage file |
@@ -113,7 +159,6 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum, | |||
113 | int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) | 159 | int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) |
114 | { | 160 | { |
115 | struct buffer_head *header_bh, *su_bh; | 161 | struct buffer_head *header_bh, *su_bh; |
116 | struct the_nilfs *nilfs; | ||
117 | struct nilfs_sufile_header *header; | 162 | struct nilfs_sufile_header *header; |
118 | struct nilfs_segment_usage *su; | 163 | struct nilfs_segment_usage *su; |
119 | size_t susz = NILFS_MDT(sufile)->mi_entry_size; | 164 | size_t susz = NILFS_MDT(sufile)->mi_entry_size; |
@@ -124,8 +169,6 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) | |||
124 | 169 | ||
125 | down_write(&NILFS_MDT(sufile)->mi_sem); | 170 | down_write(&NILFS_MDT(sufile)->mi_sem); |
126 | 171 | ||
127 | nilfs = NILFS_MDT(sufile)->mi_nilfs; | ||
128 | |||
129 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | 172 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); |
130 | if (ret < 0) | 173 | if (ret < 0) |
131 | goto out_sem; | 174 | goto out_sem; |
@@ -192,165 +235,84 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) | |||
192 | return ret; | 235 | return ret; |
193 | } | 236 | } |
194 | 237 | ||
195 | /** | 238 | void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, |
196 | * nilfs_sufile_cancel_free - | 239 | struct buffer_head *header_bh, |
197 | * @sufile: inode of segment usage file | 240 | struct buffer_head *su_bh) |
198 | * @segnum: segment number | ||
199 | * | ||
200 | * Description: | ||
201 | * | ||
202 | * Return Value: On success, 0 is returned. On error, one of the following | ||
203 | * negative error codes is returned. | ||
204 | * | ||
205 | * %-EIO - I/O error. | ||
206 | * | ||
207 | * %-ENOMEM - Insufficient amount of memory available. | ||
208 | */ | ||
209 | int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum) | ||
210 | { | 241 | { |
211 | struct buffer_head *header_bh, *su_bh; | ||
212 | struct the_nilfs *nilfs; | ||
213 | struct nilfs_sufile_header *header; | ||
214 | struct nilfs_segment_usage *su; | 242 | struct nilfs_segment_usage *su; |
215 | void *kaddr; | 243 | void *kaddr; |
216 | int ret; | ||
217 | |||
218 | down_write(&NILFS_MDT(sufile)->mi_sem); | ||
219 | |||
220 | nilfs = NILFS_MDT(sufile)->mi_nilfs; | ||
221 | |||
222 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | ||
223 | if (ret < 0) | ||
224 | goto out_sem; | ||
225 | |||
226 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); | ||
227 | if (ret < 0) | ||
228 | goto out_header; | ||
229 | 244 | ||
230 | kaddr = kmap_atomic(su_bh->b_page, KM_USER0); | 245 | kaddr = kmap_atomic(su_bh->b_page, KM_USER0); |
231 | su = nilfs_sufile_block_get_segment_usage( | 246 | su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); |
232 | sufile, segnum, su_bh, kaddr); | ||
233 | if (unlikely(!nilfs_segment_usage_clean(su))) { | 247 | if (unlikely(!nilfs_segment_usage_clean(su))) { |
234 | printk(KERN_WARNING "%s: segment %llu must be clean\n", | 248 | printk(KERN_WARNING "%s: segment %llu must be clean\n", |
235 | __func__, (unsigned long long)segnum); | 249 | __func__, (unsigned long long)segnum); |
236 | kunmap_atomic(kaddr, KM_USER0); | 250 | kunmap_atomic(kaddr, KM_USER0); |
237 | goto out_su_bh; | 251 | return; |
238 | } | 252 | } |
239 | nilfs_segment_usage_set_dirty(su); | 253 | nilfs_segment_usage_set_dirty(su); |
240 | kunmap_atomic(kaddr, KM_USER0); | 254 | kunmap_atomic(kaddr, KM_USER0); |
241 | 255 | ||
242 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | 256 | nilfs_sufile_mod_counter(header_bh, -1, 1); |
243 | header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); | ||
244 | le64_add_cpu(&header->sh_ncleansegs, -1); | ||
245 | le64_add_cpu(&header->sh_ndirtysegs, 1); | ||
246 | kunmap_atomic(kaddr, KM_USER0); | ||
247 | |||
248 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
249 | nilfs_mdt_mark_buffer_dirty(su_bh); | 257 | nilfs_mdt_mark_buffer_dirty(su_bh); |
250 | nilfs_mdt_mark_dirty(sufile); | 258 | nilfs_mdt_mark_dirty(sufile); |
251 | |||
252 | out_su_bh: | ||
253 | brelse(su_bh); | ||
254 | out_header: | ||
255 | brelse(header_bh); | ||
256 | out_sem: | ||
257 | up_write(&NILFS_MDT(sufile)->mi_sem); | ||
258 | return ret; | ||
259 | } | 259 | } |
260 | 260 | ||
261 | /** | 261 | void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, |
262 | * nilfs_sufile_freev - free segments | 262 | struct buffer_head *header_bh, |
263 | * @sufile: inode of segment usage file | 263 | struct buffer_head *su_bh) |
264 | * @segnum: array of segment numbers | ||
265 | * @nsegs: number of segments | ||
266 | * | ||
267 | * Description: nilfs_sufile_freev() frees segments specified by @segnum and | ||
268 | * @nsegs, which must have been returned by a previous call to | ||
269 | * nilfs_sufile_alloc(). | ||
270 | * | ||
271 | * Return Value: On success, 0 is returned. On error, one of the following | ||
272 | * negative error codes is returned. | ||
273 | * | ||
274 | * %-EIO - I/O error. | ||
275 | * | ||
276 | * %-ENOMEM - Insufficient amount of memory available. | ||
277 | */ | ||
278 | #define NILFS_SUFILE_FREEV_PREALLOC 16 | ||
279 | int nilfs_sufile_freev(struct inode *sufile, __u64 *segnum, size_t nsegs) | ||
280 | { | 264 | { |
281 | struct buffer_head *header_bh, **su_bh, | ||
282 | *su_bh_prealloc[NILFS_SUFILE_FREEV_PREALLOC]; | ||
283 | struct the_nilfs *nilfs; | ||
284 | struct nilfs_sufile_header *header; | ||
285 | struct nilfs_segment_usage *su; | 265 | struct nilfs_segment_usage *su; |
286 | void *kaddr; | 266 | void *kaddr; |
287 | int ret, i; | 267 | int clean, dirty; |
288 | 268 | ||
289 | down_write(&NILFS_MDT(sufile)->mi_sem); | 269 | kaddr = kmap_atomic(su_bh->b_page, KM_USER0); |
290 | 270 | su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); | |
291 | nilfs = NILFS_MDT(sufile)->mi_nilfs; | 271 | if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) && |
292 | 272 | su->su_nblocks == cpu_to_le32(0)) { | |
293 | /* prepare resources */ | ||
294 | if (nsegs <= NILFS_SUFILE_FREEV_PREALLOC) | ||
295 | su_bh = su_bh_prealloc; | ||
296 | else { | ||
297 | su_bh = kmalloc(sizeof(*su_bh) * nsegs, GFP_NOFS); | ||
298 | if (su_bh == NULL) { | ||
299 | ret = -ENOMEM; | ||
300 | goto out_sem; | ||
301 | } | ||
302 | } | ||
303 | |||
304 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | ||
305 | if (ret < 0) | ||
306 | goto out_su_bh; | ||
307 | for (i = 0; i < nsegs; i++) { | ||
308 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum[i], | ||
309 | 0, &su_bh[i]); | ||
310 | if (ret < 0) | ||
311 | goto out_bh; | ||
312 | } | ||
313 | |||
314 | /* free segments */ | ||
315 | for (i = 0; i < nsegs; i++) { | ||
316 | kaddr = kmap_atomic(su_bh[i]->b_page, KM_USER0); | ||
317 | su = nilfs_sufile_block_get_segment_usage( | ||
318 | sufile, segnum[i], su_bh[i], kaddr); | ||
319 | WARN_ON(nilfs_segment_usage_error(su)); | ||
320 | nilfs_segment_usage_set_clean(su); | ||
321 | kunmap_atomic(kaddr, KM_USER0); | 273 | kunmap_atomic(kaddr, KM_USER0); |
322 | nilfs_mdt_mark_buffer_dirty(su_bh[i]); | 274 | return; |
323 | } | 275 | } |
324 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | 276 | clean = nilfs_segment_usage_clean(su); |
325 | header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); | 277 | dirty = nilfs_segment_usage_dirty(su); |
326 | le64_add_cpu(&header->sh_ncleansegs, nsegs); | 278 | |
327 | le64_add_cpu(&header->sh_ndirtysegs, -(u64)nsegs); | 279 | /* make the segment garbage */ |
280 | su->su_lastmod = cpu_to_le64(0); | ||
281 | su->su_nblocks = cpu_to_le32(0); | ||
282 | su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY); | ||
328 | kunmap_atomic(kaddr, KM_USER0); | 283 | kunmap_atomic(kaddr, KM_USER0); |
329 | nilfs_mdt_mark_buffer_dirty(header_bh); | 284 | |
285 | nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); | ||
286 | nilfs_mdt_mark_buffer_dirty(su_bh); | ||
330 | nilfs_mdt_mark_dirty(sufile); | 287 | nilfs_mdt_mark_dirty(sufile); |
288 | } | ||
331 | 289 | ||
332 | out_bh: | 290 | void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, |
333 | for (i--; i >= 0; i--) | 291 | struct buffer_head *header_bh, |
334 | brelse(su_bh[i]); | 292 | struct buffer_head *su_bh) |
335 | brelse(header_bh); | 293 | { |
294 | struct nilfs_segment_usage *su; | ||
295 | void *kaddr; | ||
296 | int sudirty; | ||
336 | 297 | ||
337 | out_su_bh: | 298 | kaddr = kmap_atomic(su_bh->b_page, KM_USER0); |
338 | if (su_bh != su_bh_prealloc) | 299 | su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); |
339 | kfree(su_bh); | 300 | if (nilfs_segment_usage_clean(su)) { |
301 | printk(KERN_WARNING "%s: segment %llu is already clean\n", | ||
302 | __func__, (unsigned long long)segnum); | ||
303 | kunmap_atomic(kaddr, KM_USER0); | ||
304 | return; | ||
305 | } | ||
306 | WARN_ON(nilfs_segment_usage_error(su)); | ||
307 | WARN_ON(!nilfs_segment_usage_dirty(su)); | ||
340 | 308 | ||
341 | out_sem: | 309 | sudirty = nilfs_segment_usage_dirty(su); |
342 | up_write(&NILFS_MDT(sufile)->mi_sem); | 310 | nilfs_segment_usage_set_clean(su); |
343 | return ret; | 311 | kunmap_atomic(kaddr, KM_USER0); |
344 | } | 312 | nilfs_mdt_mark_buffer_dirty(su_bh); |
345 | 313 | ||
346 | /** | 314 | nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); |
347 | * nilfs_sufile_free - | 315 | nilfs_mdt_mark_dirty(sufile); |
348 | * @sufile: | ||
349 | * @segnum: | ||
350 | */ | ||
351 | int nilfs_sufile_free(struct inode *sufile, __u64 segnum) | ||
352 | { | ||
353 | return nilfs_sufile_freev(sufile, &segnum, 1); | ||
354 | } | 316 | } |
355 | 317 | ||
356 | /** | 318 | /** |
@@ -500,72 +462,28 @@ int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp) | |||
500 | return ret; | 462 | return ret; |
501 | } | 463 | } |
502 | 464 | ||
503 | /** | 465 | void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, |
504 | * nilfs_sufile_set_error - mark a segment as erroneous | 466 | struct buffer_head *header_bh, |
505 | * @sufile: inode of segment usage file | 467 | struct buffer_head *su_bh) |
506 | * @segnum: segment number | ||
507 | * | ||
508 | * Description: nilfs_sufile_set_error() marks the segment specified by | ||
509 | * @segnum as erroneous. The error segment will never be used again. | ||
510 | * | ||
511 | * Return Value: On success, 0 is returned. On error, one of the following | ||
512 | * negative error codes is returned. | ||
513 | * | ||
514 | * %-EIO - I/O error. | ||
515 | * | ||
516 | * %-ENOMEM - Insufficient amount of memory available. | ||
517 | * | ||
518 | * %-EINVAL - Invalid segment usage number. | ||
519 | */ | ||
520 | int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) | ||
521 | { | 468 | { |
522 | struct buffer_head *header_bh, *su_bh; | ||
523 | struct nilfs_segment_usage *su; | 469 | struct nilfs_segment_usage *su; |
524 | struct nilfs_sufile_header *header; | ||
525 | void *kaddr; | 470 | void *kaddr; |
526 | int ret; | 471 | int suclean; |
527 | |||
528 | if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) { | ||
529 | printk(KERN_WARNING "%s: invalid segment number: %llu\n", | ||
530 | __func__, (unsigned long long)segnum); | ||
531 | return -EINVAL; | ||
532 | } | ||
533 | down_write(&NILFS_MDT(sufile)->mi_sem); | ||
534 | |||
535 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | ||
536 | if (ret < 0) | ||
537 | goto out_sem; | ||
538 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh); | ||
539 | if (ret < 0) | ||
540 | goto out_header; | ||
541 | 472 | ||
542 | kaddr = kmap_atomic(su_bh->b_page, KM_USER0); | 473 | kaddr = kmap_atomic(su_bh->b_page, KM_USER0); |
543 | su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); | 474 | su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); |
544 | if (nilfs_segment_usage_error(su)) { | 475 | if (nilfs_segment_usage_error(su)) { |
545 | kunmap_atomic(kaddr, KM_USER0); | 476 | kunmap_atomic(kaddr, KM_USER0); |
546 | brelse(su_bh); | 477 | return; |
547 | goto out_header; | ||
548 | } | 478 | } |
549 | 479 | suclean = nilfs_segment_usage_clean(su); | |
550 | nilfs_segment_usage_set_error(su); | 480 | nilfs_segment_usage_set_error(su); |
551 | kunmap_atomic(kaddr, KM_USER0); | 481 | kunmap_atomic(kaddr, KM_USER0); |
552 | brelse(su_bh); | ||
553 | 482 | ||
554 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | 483 | if (suclean) |
555 | header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); | 484 | nilfs_sufile_mod_counter(header_bh, -1, 0); |
556 | le64_add_cpu(&header->sh_ndirtysegs, -1); | ||
557 | kunmap_atomic(kaddr, KM_USER0); | ||
558 | nilfs_mdt_mark_buffer_dirty(header_bh); | ||
559 | nilfs_mdt_mark_buffer_dirty(su_bh); | 485 | nilfs_mdt_mark_buffer_dirty(su_bh); |
560 | nilfs_mdt_mark_dirty(sufile); | 486 | nilfs_mdt_mark_dirty(sufile); |
561 | brelse(su_bh); | ||
562 | |||
563 | out_header: | ||
564 | brelse(header_bh); | ||
565 | |||
566 | out_sem: | ||
567 | up_write(&NILFS_MDT(sufile)->mi_sem); | ||
568 | return ret; | ||
569 | } | 487 | } |
570 | 488 | ||
571 | /** | 489 | /** |
@@ -625,7 +543,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, | |||
625 | si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks); | 543 | si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks); |
626 | si[i + j].sui_flags = le32_to_cpu(su->su_flags) & | 544 | si[i + j].sui_flags = le32_to_cpu(su->su_flags) & |
627 | ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); | 545 | ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); |
628 | if (nilfs_segment_is_active(nilfs, segnum + i + j)) | 546 | if (nilfs_segment_is_active(nilfs, segnum + j)) |
629 | si[i + j].sui_flags |= | 547 | si[i + j].sui_flags |= |
630 | (1UL << NILFS_SEGMENT_USAGE_ACTIVE); | 548 | (1UL << NILFS_SEGMENT_USAGE_ACTIVE); |
631 | } | 549 | } |
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index d595f33a768d..a2e2efd4ade1 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h | |||
@@ -36,9 +36,6 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) | |||
36 | } | 36 | } |
37 | 37 | ||
38 | int nilfs_sufile_alloc(struct inode *, __u64 *); | 38 | int nilfs_sufile_alloc(struct inode *, __u64 *); |
39 | int nilfs_sufile_cancel_free(struct inode *, __u64); | ||
40 | int nilfs_sufile_freev(struct inode *, __u64 *, size_t); | ||
41 | int nilfs_sufile_free(struct inode *, __u64); | ||
42 | int nilfs_sufile_get_segment_usage(struct inode *, __u64, | 39 | int nilfs_sufile_get_segment_usage(struct inode *, __u64, |
43 | struct nilfs_segment_usage **, | 40 | struct nilfs_segment_usage **, |
44 | struct buffer_head **); | 41 | struct buffer_head **); |
@@ -46,9 +43,83 @@ void nilfs_sufile_put_segment_usage(struct inode *, __u64, | |||
46 | struct buffer_head *); | 43 | struct buffer_head *); |
47 | int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); | 44 | int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); |
48 | int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *); | 45 | int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *); |
49 | int nilfs_sufile_set_error(struct inode *, __u64); | ||
50 | ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *, | 46 | ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *, |
51 | size_t); | 47 | size_t); |
52 | 48 | ||
49 | int nilfs_sufile_update(struct inode *, __u64, int, | ||
50 | void (*dofunc)(struct inode *, __u64, | ||
51 | struct buffer_head *, | ||
52 | struct buffer_head *)); | ||
53 | void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, | ||
54 | struct buffer_head *); | ||
55 | void nilfs_sufile_do_scrap(struct inode *, __u64, struct buffer_head *, | ||
56 | struct buffer_head *); | ||
57 | void nilfs_sufile_do_free(struct inode *, __u64, struct buffer_head *, | ||
58 | struct buffer_head *); | ||
59 | void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, | ||
60 | struct buffer_head *); | ||
61 | |||
62 | /** | ||
63 | * nilfs_sufile_cancel_free - | ||
64 | * @sufile: inode of segment usage file | ||
65 | * @segnum: segment number | ||
66 | * | ||
67 | * Description: | ||
68 | * | ||
69 | * Return Value: On success, 0 is returned. On error, one of the following | ||
70 | * negative error codes is returned. | ||
71 | * | ||
72 | * %-EIO - I/O error. | ||
73 | * | ||
74 | * %-ENOMEM - Insufficient amount of memory available. | ||
75 | */ | ||
76 | static inline int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum) | ||
77 | { | ||
78 | return nilfs_sufile_update(sufile, segnum, 0, | ||
79 | nilfs_sufile_do_cancel_free); | ||
80 | } | ||
81 | |||
82 | /** | ||
83 | * nilfs_sufile_scrap - make a segment garbage | ||
84 | * @sufile: inode of segment usage file | ||
85 | * @segnum: segment number to be freed | ||
86 | */ | ||
87 | static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum) | ||
88 | { | ||
89 | return nilfs_sufile_update(sufile, segnum, 1, nilfs_sufile_do_scrap); | ||
90 | } | ||
91 | |||
92 | /** | ||
93 | * nilfs_sufile_free - free segment | ||
94 | * @sufile: inode of segment usage file | ||
95 | * @segnum: segment number to be freed | ||
96 | */ | ||
97 | static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum) | ||
98 | { | ||
99 | return nilfs_sufile_update(sufile, segnum, 0, nilfs_sufile_do_free); | ||
100 | } | ||
101 | |||
102 | /** | ||
103 | * nilfs_sufile_set_error - mark a segment as erroneous | ||
104 | * @sufile: inode of segment usage file | ||
105 | * @segnum: segment number | ||
106 | * | ||
107 | * Description: nilfs_sufile_set_error() marks the segment specified by | ||
108 | * @segnum as erroneous. The error segment will never be used again. | ||
109 | * | ||
110 | * Return Value: On success, 0 is returned. On error, one of the following | ||
111 | * negative error codes is returned. | ||
112 | * | ||
113 | * %-EIO - I/O error. | ||
114 | * | ||
115 | * %-ENOMEM - Insufficient amount of memory available. | ||
116 | * | ||
117 | * %-EINVAL - Invalid segment usage number. | ||
118 | */ | ||
119 | static inline int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum) | ||
120 | { | ||
121 | return nilfs_sufile_update(sufile, segnum, 0, | ||
122 | nilfs_sufile_do_set_error); | ||
123 | } | ||
53 | 124 | ||
54 | #endif /* _NILFS_SUFILE_H */ | 125 | #endif /* _NILFS_SUFILE_H */ |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index e117e1ea9bff..6989b03e97ab 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -63,7 +63,6 @@ | |||
63 | MODULE_AUTHOR("NTT Corp."); | 63 | MODULE_AUTHOR("NTT Corp."); |
64 | MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " | 64 | MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " |
65 | "(NILFS)"); | 65 | "(NILFS)"); |
66 | MODULE_VERSION(NILFS_VERSION); | ||
67 | MODULE_LICENSE("GPL"); | 66 | MODULE_LICENSE("GPL"); |
68 | 67 | ||
69 | static int nilfs_remount(struct super_block *sb, int *flags, char *data); | 68 | static int nilfs_remount(struct super_block *sb, int *flags, char *data); |
@@ -476,11 +475,12 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
476 | { | 475 | { |
477 | struct super_block *sb = dentry->d_sb; | 476 | struct super_block *sb = dentry->d_sb; |
478 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | 477 | struct nilfs_sb_info *sbi = NILFS_SB(sb); |
478 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
479 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | ||
479 | unsigned long long blocks; | 480 | unsigned long long blocks; |
480 | unsigned long overhead; | 481 | unsigned long overhead; |
481 | unsigned long nrsvblocks; | 482 | unsigned long nrsvblocks; |
482 | sector_t nfreeblocks; | 483 | sector_t nfreeblocks; |
483 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
484 | int err; | 484 | int err; |
485 | 485 | ||
486 | /* | 486 | /* |
@@ -514,6 +514,9 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
514 | buf->f_files = atomic_read(&sbi->s_inodes_count); | 514 | buf->f_files = atomic_read(&sbi->s_inodes_count); |
515 | buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ | 515 | buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ |
516 | buf->f_namelen = NILFS_NAME_LEN; | 516 | buf->f_namelen = NILFS_NAME_LEN; |
517 | buf->f_fsid.val[0] = (u32)id; | ||
518 | buf->f_fsid.val[1] = (u32)(id >> 32); | ||
519 | |||
517 | return 0; | 520 | return 0; |
518 | } | 521 | } |
519 | 522 | ||
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 33400cf0bbe2..7f65b3be4aa9 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c | |||
@@ -115,6 +115,7 @@ void put_nilfs(struct the_nilfs *nilfs) | |||
115 | static int nilfs_load_super_root(struct the_nilfs *nilfs, | 115 | static int nilfs_load_super_root(struct the_nilfs *nilfs, |
116 | struct nilfs_sb_info *sbi, sector_t sr_block) | 116 | struct nilfs_sb_info *sbi, sector_t sr_block) |
117 | { | 117 | { |
118 | static struct lock_class_key dat_lock_key; | ||
118 | struct buffer_head *bh_sr; | 119 | struct buffer_head *bh_sr; |
119 | struct nilfs_super_root *raw_sr; | 120 | struct nilfs_super_root *raw_sr; |
120 | struct nilfs_super_block **sbp = nilfs->ns_sbp; | 121 | struct nilfs_super_block **sbp = nilfs->ns_sbp; |
@@ -163,6 +164,9 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, | |||
163 | if (unlikely(err)) | 164 | if (unlikely(err)) |
164 | goto failed_sufile; | 165 | goto failed_sufile; |
165 | 166 | ||
167 | lockdep_set_class(&NILFS_MDT(nilfs->ns_dat)->mi_sem, &dat_lock_key); | ||
168 | lockdep_set_class(&NILFS_MDT(nilfs->ns_gc_dat)->mi_sem, &dat_lock_key); | ||
169 | |||
166 | nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); | 170 | nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); |
167 | nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size, | 171 | nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size, |
168 | sizeof(struct nilfs_cpfile_header)); | 172 | sizeof(struct nilfs_cpfile_header)); |
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 7d604480557a..b574431a031d 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c | |||
@@ -290,6 +290,21 @@ out_attach: | |||
290 | else | 290 | else |
291 | mlog_errno(ret); | 291 | mlog_errno(ret); |
292 | 292 | ||
293 | /* | ||
294 | * In case of error, manually free the allocation and do the iput(). | ||
295 | * We need to do this because error here means no d_instantiate(), | ||
296 | * which means iput() will not be called during dput(dentry). | ||
297 | */ | ||
298 | if (ret < 0 && !alias) { | ||
299 | ocfs2_lock_res_free(&dl->dl_lockres); | ||
300 | BUG_ON(dl->dl_count != 1); | ||
301 | spin_lock(&dentry_attach_lock); | ||
302 | dentry->d_fsdata = NULL; | ||
303 | spin_unlock(&dentry_attach_lock); | ||
304 | kfree(dl); | ||
305 | iput(inode); | ||
306 | } | ||
307 | |||
293 | dput(alias); | 308 | dput(alias); |
294 | 309 | ||
295 | return ret; | 310 | return ret; |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index e71160cda110..c5752305627c 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -2697,7 +2697,7 @@ static int ocfs2_dx_dir_index_block(struct inode *dir, | |||
2697 | u32 *num_dx_entries, | 2697 | u32 *num_dx_entries, |
2698 | struct buffer_head *dirent_bh) | 2698 | struct buffer_head *dirent_bh) |
2699 | { | 2699 | { |
2700 | int ret, namelen, i; | 2700 | int ret = 0, namelen, i; |
2701 | char *de_buf, *limit; | 2701 | char *de_buf, *limit; |
2702 | struct ocfs2_dir_entry *de; | 2702 | struct ocfs2_dir_entry *de; |
2703 | struct buffer_head *dx_leaf_bh; | 2703 | struct buffer_head *dx_leaf_bh; |
@@ -2934,7 +2934,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
2934 | */ | 2934 | */ |
2935 | BUG_ON(alloc > 2); | 2935 | BUG_ON(alloc > 2); |
2936 | 2936 | ||
2937 | ret = ocfs2_reserve_clusters(osb, alloc, &data_ac); | 2937 | ret = ocfs2_reserve_clusters(osb, alloc + dx_alloc, &data_ac); |
2938 | if (ret) { | 2938 | if (ret) { |
2939 | mlog_errno(ret); | 2939 | mlog_errno(ret); |
2940 | goto out; | 2940 | goto out; |
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index de3da8eb558c..15713cbb865c 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c | |||
@@ -100,7 +100,8 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, | |||
100 | 100 | ||
101 | /* If the inode allocator bit is clear, this inode must be stale */ | 101 | /* If the inode allocator bit is clear, this inode must be stale */ |
102 | if (!set) { | 102 | if (!set) { |
103 | mlog(0, "inode %llu suballoc bit is clear\n", blkno); | 103 | mlog(0, "inode %llu suballoc bit is clear\n", |
104 | (unsigned long long)blkno); | ||
104 | status = -ESTALE; | 105 | status = -ESTALE; |
105 | goto unlock_nfs_sync; | 106 | goto unlock_nfs_sync; |
106 | } | 107 | } |
@@ -114,7 +115,7 @@ check_err: | |||
114 | if (status < 0) { | 115 | if (status < 0) { |
115 | if (status == -ESTALE) { | 116 | if (status == -ESTALE) { |
116 | mlog(0, "stale inode ino: %llu generation: %u\n", | 117 | mlog(0, "stale inode ino: %llu generation: %u\n", |
117 | blkno, handle->ih_generation); | 118 | (unsigned long long)blkno, handle->ih_generation); |
118 | } | 119 | } |
119 | result = ERR_PTR(status); | 120 | result = ERR_PTR(status); |
120 | goto bail; | 121 | goto bail; |
@@ -129,8 +130,8 @@ check_err: | |||
129 | check_gen: | 130 | check_gen: |
130 | if (handle->ih_generation != inode->i_generation) { | 131 | if (handle->ih_generation != inode->i_generation) { |
131 | iput(inode); | 132 | iput(inode); |
132 | mlog(0, "stale inode ino: %llu generation: %u\n", blkno, | 133 | mlog(0, "stale inode ino: %llu generation: %u\n", |
133 | handle->ih_generation); | 134 | (unsigned long long)blkno, handle->ih_generation); |
134 | result = ERR_PTR(-ESTALE); | 135 | result = ERR_PTR(-ESTALE); |
135 | goto bail; | 136 | goto bail; |
136 | } | 137 | } |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8672b9536039..c2a87c885b73 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -1912,6 +1912,22 @@ out_sems: | |||
1912 | return written ? written : ret; | 1912 | return written ? written : ret; |
1913 | } | 1913 | } |
1914 | 1914 | ||
1915 | static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, | ||
1916 | struct file *out, | ||
1917 | struct splice_desc *sd) | ||
1918 | { | ||
1919 | int ret; | ||
1920 | |||
1921 | ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, | ||
1922 | sd->total_len, 0, NULL); | ||
1923 | if (ret < 0) { | ||
1924 | mlog_errno(ret); | ||
1925 | return ret; | ||
1926 | } | ||
1927 | |||
1928 | return splice_from_pipe_feed(pipe, sd, pipe_to_file); | ||
1929 | } | ||
1930 | |||
1915 | static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | 1931 | static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, |
1916 | struct file *out, | 1932 | struct file *out, |
1917 | loff_t *ppos, | 1933 | loff_t *ppos, |
@@ -1919,38 +1935,76 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | |||
1919 | unsigned int flags) | 1935 | unsigned int flags) |
1920 | { | 1936 | { |
1921 | int ret; | 1937 | int ret; |
1922 | struct inode *inode = out->f_path.dentry->d_inode; | 1938 | struct address_space *mapping = out->f_mapping; |
1939 | struct inode *inode = mapping->host; | ||
1940 | struct splice_desc sd = { | ||
1941 | .total_len = len, | ||
1942 | .flags = flags, | ||
1943 | .pos = *ppos, | ||
1944 | .u.file = out, | ||
1945 | }; | ||
1923 | 1946 | ||
1924 | mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, | 1947 | mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, |
1925 | (unsigned int)len, | 1948 | (unsigned int)len, |
1926 | out->f_path.dentry->d_name.len, | 1949 | out->f_path.dentry->d_name.len, |
1927 | out->f_path.dentry->d_name.name); | 1950 | out->f_path.dentry->d_name.name); |
1928 | 1951 | ||
1929 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); | 1952 | if (pipe->inode) |
1953 | mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); | ||
1930 | 1954 | ||
1931 | ret = ocfs2_rw_lock(inode, 1); | 1955 | splice_from_pipe_begin(&sd); |
1932 | if (ret < 0) { | 1956 | do { |
1933 | mlog_errno(ret); | 1957 | ret = splice_from_pipe_next(pipe, &sd); |
1934 | goto out; | 1958 | if (ret <= 0) |
1935 | } | 1959 | break; |
1936 | 1960 | ||
1937 | ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0, | 1961 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); |
1938 | NULL); | 1962 | ret = ocfs2_rw_lock(inode, 1); |
1939 | if (ret < 0) { | 1963 | if (ret < 0) |
1940 | mlog_errno(ret); | 1964 | mlog_errno(ret); |
1941 | goto out_unlock; | 1965 | else { |
1942 | } | 1966 | ret = ocfs2_splice_to_file(pipe, out, &sd); |
1967 | ocfs2_rw_unlock(inode, 1); | ||
1968 | } | ||
1969 | mutex_unlock(&inode->i_mutex); | ||
1970 | } while (ret > 0); | ||
1971 | splice_from_pipe_end(pipe, &sd); | ||
1943 | 1972 | ||
1944 | if (pipe->inode) | 1973 | if (pipe->inode) |
1945 | mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); | ||
1946 | ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); | ||
1947 | if (pipe->inode) | ||
1948 | mutex_unlock(&pipe->inode->i_mutex); | 1974 | mutex_unlock(&pipe->inode->i_mutex); |
1949 | 1975 | ||
1950 | out_unlock: | 1976 | if (sd.num_spliced) |
1951 | ocfs2_rw_unlock(inode, 1); | 1977 | ret = sd.num_spliced; |
1952 | out: | 1978 | |
1953 | mutex_unlock(&inode->i_mutex); | 1979 | if (ret > 0) { |
1980 | unsigned long nr_pages; | ||
1981 | |||
1982 | *ppos += ret; | ||
1983 | nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
1984 | |||
1985 | /* | ||
1986 | * If file or inode is SYNC and we actually wrote some data, | ||
1987 | * sync it. | ||
1988 | */ | ||
1989 | if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
1990 | int err; | ||
1991 | |||
1992 | mutex_lock(&inode->i_mutex); | ||
1993 | err = ocfs2_rw_lock(inode, 1); | ||
1994 | if (err < 0) { | ||
1995 | mlog_errno(err); | ||
1996 | } else { | ||
1997 | err = generic_osync_inode(inode, mapping, | ||
1998 | OSYNC_METADATA|OSYNC_DATA); | ||
1999 | ocfs2_rw_unlock(inode, 1); | ||
2000 | } | ||
2001 | mutex_unlock(&inode->i_mutex); | ||
2002 | |||
2003 | if (err) | ||
2004 | ret = err; | ||
2005 | } | ||
2006 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); | ||
2007 | } | ||
1954 | 2008 | ||
1955 | mlog_exit(ret); | 2009 | mlog_exit(ret); |
1956 | return ret; | 2010 | return ret; |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 619dd7f6c053..eb7b76331eb7 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -437,8 +437,9 @@ static inline int ocfs2_unlink_credits(struct super_block *sb) | |||
437 | } | 437 | } |
438 | 438 | ||
439 | /* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry + | 439 | /* dinode + orphan dir dinode + inode alloc dinode + orphan dir entry + |
440 | * inode alloc group descriptor + orphan dir index leaf */ | 440 | * inode alloc group descriptor + orphan dir index root + |
441 | #define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3) | 441 | * orphan dir index leaf */ |
442 | #define OCFS2_DELETE_INODE_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 4) | ||
442 | 443 | ||
443 | /* dinode update, old dir dinode update, new dir dinode update, old | 444 | /* dinode update, old dir dinode update, new dir dinode update, old |
444 | * dir dir entry, new dir dir entry, dir entry update for renaming | 445 | * dir dir entry, new dir dir entry, dir entry update for renaming |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 2220f93f668b..33464c6b60a2 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -1025,10 +1025,8 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1025 | struct inode *orphan_dir = NULL; | 1025 | struct inode *orphan_dir = NULL; |
1026 | struct ocfs2_dinode *newfe = NULL; | 1026 | struct ocfs2_dinode *newfe = NULL; |
1027 | char orphan_name[OCFS2_ORPHAN_NAMELEN + 1]; | 1027 | char orphan_name[OCFS2_ORPHAN_NAMELEN + 1]; |
1028 | struct buffer_head *orphan_entry_bh = NULL; | ||
1029 | struct buffer_head *newfe_bh = NULL; | 1028 | struct buffer_head *newfe_bh = NULL; |
1030 | struct buffer_head *old_inode_bh = NULL; | 1029 | struct buffer_head *old_inode_bh = NULL; |
1031 | struct buffer_head *insert_entry_bh = NULL; | ||
1032 | struct ocfs2_super *osb = NULL; | 1030 | struct ocfs2_super *osb = NULL; |
1033 | u64 newfe_blkno, old_de_ino; | 1031 | u64 newfe_blkno, old_de_ino; |
1034 | handle_t *handle = NULL; | 1032 | handle_t *handle = NULL; |
@@ -1455,8 +1453,6 @@ bail: | |||
1455 | brelse(old_inode_bh); | 1453 | brelse(old_inode_bh); |
1456 | brelse(old_dir_bh); | 1454 | brelse(old_dir_bh); |
1457 | brelse(new_dir_bh); | 1455 | brelse(new_dir_bh); |
1458 | brelse(orphan_entry_bh); | ||
1459 | brelse(insert_entry_bh); | ||
1460 | 1456 | ||
1461 | mlog_exit(status); | 1457 | mlog_exit(status); |
1462 | 1458 | ||
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index b4ca5911caaf..8439f6b324b9 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -2197,26 +2197,29 @@ static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, | |||
2197 | struct buffer_head *inode_bh = NULL; | 2197 | struct buffer_head *inode_bh = NULL; |
2198 | struct ocfs2_dinode *inode_fe; | 2198 | struct ocfs2_dinode *inode_fe; |
2199 | 2199 | ||
2200 | mlog_entry("blkno: %llu\n", blkno); | 2200 | mlog_entry("blkno: %llu\n", (unsigned long long)blkno); |
2201 | 2201 | ||
2202 | /* dirty read disk */ | 2202 | /* dirty read disk */ |
2203 | status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh); | 2203 | status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh); |
2204 | if (status < 0) { | 2204 | if (status < 0) { |
2205 | mlog(ML_ERROR, "read block %llu failed %d\n", blkno, status); | 2205 | mlog(ML_ERROR, "read block %llu failed %d\n", |
2206 | (unsigned long long)blkno, status); | ||
2206 | goto bail; | 2207 | goto bail; |
2207 | } | 2208 | } |
2208 | 2209 | ||
2209 | inode_fe = (struct ocfs2_dinode *) inode_bh->b_data; | 2210 | inode_fe = (struct ocfs2_dinode *) inode_bh->b_data; |
2210 | if (!OCFS2_IS_VALID_DINODE(inode_fe)) { | 2211 | if (!OCFS2_IS_VALID_DINODE(inode_fe)) { |
2211 | mlog(ML_ERROR, "invalid inode %llu requested\n", blkno); | 2212 | mlog(ML_ERROR, "invalid inode %llu requested\n", |
2213 | (unsigned long long)blkno); | ||
2212 | status = -EINVAL; | 2214 | status = -EINVAL; |
2213 | goto bail; | 2215 | goto bail; |
2214 | } | 2216 | } |
2215 | 2217 | ||
2216 | if (le16_to_cpu(inode_fe->i_suballoc_slot) != OCFS2_INVALID_SLOT && | 2218 | if (le16_to_cpu(inode_fe->i_suballoc_slot) != (u16)OCFS2_INVALID_SLOT && |
2217 | (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) { | 2219 | (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) { |
2218 | mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n", | 2220 | mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n", |
2219 | blkno, (u32)le16_to_cpu(inode_fe->i_suballoc_slot)); | 2221 | (unsigned long long)blkno, |
2222 | (u32)le16_to_cpu(inode_fe->i_suballoc_slot)); | ||
2220 | status = -EINVAL; | 2223 | status = -EINVAL; |
2221 | goto bail; | 2224 | goto bail; |
2222 | } | 2225 | } |
@@ -2251,7 +2254,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, | |||
2251 | u64 bg_blkno; | 2254 | u64 bg_blkno; |
2252 | int status; | 2255 | int status; |
2253 | 2256 | ||
2254 | mlog_entry("blkno: %llu bit: %u\n", blkno, (unsigned int)bit); | 2257 | mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno, |
2258 | (unsigned int)bit); | ||
2255 | 2259 | ||
2256 | alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data; | 2260 | alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data; |
2257 | if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) { | 2261 | if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) { |
@@ -2266,7 +2270,8 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, | |||
2266 | status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno, | 2270 | status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno, |
2267 | &group_bh); | 2271 | &group_bh); |
2268 | if (status < 0) { | 2272 | if (status < 0) { |
2269 | mlog(ML_ERROR, "read group %llu failed %d\n", bg_blkno, status); | 2273 | mlog(ML_ERROR, "read group %llu failed %d\n", |
2274 | (unsigned long long)bg_blkno, status); | ||
2270 | goto bail; | 2275 | goto bail; |
2271 | } | 2276 | } |
2272 | 2277 | ||
@@ -2300,7 +2305,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) | |||
2300 | struct inode *inode_alloc_inode; | 2305 | struct inode *inode_alloc_inode; |
2301 | struct buffer_head *alloc_bh = NULL; | 2306 | struct buffer_head *alloc_bh = NULL; |
2302 | 2307 | ||
2303 | mlog_entry("blkno: %llu", blkno); | 2308 | mlog_entry("blkno: %llu", (unsigned long long)blkno); |
2304 | 2309 | ||
2305 | status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, | 2310 | status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, |
2306 | &suballoc_bit); | 2311 | &suballoc_bit); |
@@ -37,6 +37,42 @@ | |||
37 | * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 | 37 | * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 |
38 | */ | 38 | */ |
39 | 39 | ||
40 | static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) | ||
41 | { | ||
42 | if (pipe->inode) | ||
43 | mutex_lock_nested(&pipe->inode->i_mutex, subclass); | ||
44 | } | ||
45 | |||
46 | void pipe_lock(struct pipe_inode_info *pipe) | ||
47 | { | ||
48 | /* | ||
49 | * pipe_lock() nests non-pipe inode locks (for writing to a file) | ||
50 | */ | ||
51 | pipe_lock_nested(pipe, I_MUTEX_PARENT); | ||
52 | } | ||
53 | EXPORT_SYMBOL(pipe_lock); | ||
54 | |||
55 | void pipe_unlock(struct pipe_inode_info *pipe) | ||
56 | { | ||
57 | if (pipe->inode) | ||
58 | mutex_unlock(&pipe->inode->i_mutex); | ||
59 | } | ||
60 | EXPORT_SYMBOL(pipe_unlock); | ||
61 | |||
62 | void pipe_double_lock(struct pipe_inode_info *pipe1, | ||
63 | struct pipe_inode_info *pipe2) | ||
64 | { | ||
65 | BUG_ON(pipe1 == pipe2); | ||
66 | |||
67 | if (pipe1 < pipe2) { | ||
68 | pipe_lock_nested(pipe1, I_MUTEX_PARENT); | ||
69 | pipe_lock_nested(pipe2, I_MUTEX_CHILD); | ||
70 | } else { | ||
71 | pipe_lock_nested(pipe2, I_MUTEX_CHILD); | ||
72 | pipe_lock_nested(pipe1, I_MUTEX_PARENT); | ||
73 | } | ||
74 | } | ||
75 | |||
40 | /* Drop the inode semaphore and wait for a pipe event, atomically */ | 76 | /* Drop the inode semaphore and wait for a pipe event, atomically */ |
41 | void pipe_wait(struct pipe_inode_info *pipe) | 77 | void pipe_wait(struct pipe_inode_info *pipe) |
42 | { | 78 | { |
@@ -47,12 +83,10 @@ void pipe_wait(struct pipe_inode_info *pipe) | |||
47 | * is considered a noninteractive wait: | 83 | * is considered a noninteractive wait: |
48 | */ | 84 | */ |
49 | prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE); | 85 | prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE); |
50 | if (pipe->inode) | 86 | pipe_unlock(pipe); |
51 | mutex_unlock(&pipe->inode->i_mutex); | ||
52 | schedule(); | 87 | schedule(); |
53 | finish_wait(&pipe->wait, &wait); | 88 | finish_wait(&pipe->wait, &wait); |
54 | if (pipe->inode) | 89 | pipe_lock(pipe); |
55 | mutex_lock(&pipe->inode->i_mutex); | ||
56 | } | 90 | } |
57 | 91 | ||
58 | static int | 92 | static int |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 7e4877d9dcb5..725a650bbbb8 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -80,6 +80,7 @@ | |||
80 | #include <linux/delayacct.h> | 80 | #include <linux/delayacct.h> |
81 | #include <linux/seq_file.h> | 81 | #include <linux/seq_file.h> |
82 | #include <linux/pid_namespace.h> | 82 | #include <linux/pid_namespace.h> |
83 | #include <linux/ptrace.h> | ||
83 | #include <linux/tracehook.h> | 84 | #include <linux/tracehook.h> |
84 | 85 | ||
85 | #include <asm/pgtable.h> | 86 | #include <asm/pgtable.h> |
@@ -352,6 +353,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
352 | char state; | 353 | char state; |
353 | pid_t ppid = 0, pgid = -1, sid = -1; | 354 | pid_t ppid = 0, pgid = -1, sid = -1; |
354 | int num_threads = 0; | 355 | int num_threads = 0; |
356 | int permitted; | ||
355 | struct mm_struct *mm; | 357 | struct mm_struct *mm; |
356 | unsigned long long start_time; | 358 | unsigned long long start_time; |
357 | unsigned long cmin_flt = 0, cmaj_flt = 0; | 359 | unsigned long cmin_flt = 0, cmaj_flt = 0; |
@@ -364,11 +366,14 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
364 | 366 | ||
365 | state = *get_task_state(task); | 367 | state = *get_task_state(task); |
366 | vsize = eip = esp = 0; | 368 | vsize = eip = esp = 0; |
369 | permitted = ptrace_may_access(task, PTRACE_MODE_READ); | ||
367 | mm = get_task_mm(task); | 370 | mm = get_task_mm(task); |
368 | if (mm) { | 371 | if (mm) { |
369 | vsize = task_vsize(mm); | 372 | vsize = task_vsize(mm); |
370 | eip = KSTK_EIP(task); | 373 | if (permitted) { |
371 | esp = KSTK_ESP(task); | 374 | eip = KSTK_EIP(task); |
375 | esp = KSTK_ESP(task); | ||
376 | } | ||
372 | } | 377 | } |
373 | 378 | ||
374 | get_task_comm(tcomm, task); | 379 | get_task_comm(tcomm, task); |
@@ -424,7 +429,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
424 | unlock_task_sighand(task, &flags); | 429 | unlock_task_sighand(task, &flags); |
425 | } | 430 | } |
426 | 431 | ||
427 | if (!whole || num_threads < 2) | 432 | if (permitted && (!whole || num_threads < 2)) |
428 | wchan = get_wchan(task); | 433 | wchan = get_wchan(task); |
429 | if (!whole) { | 434 | if (!whole) { |
430 | min_flt = task->min_flt; | 435 | min_flt = task->min_flt; |
@@ -476,7 +481,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
476 | rsslim, | 481 | rsslim, |
477 | mm ? mm->start_code : 0, | 482 | mm ? mm->start_code : 0, |
478 | mm ? mm->end_code : 0, | 483 | mm ? mm->end_code : 0, |
479 | mm ? mm->start_stack : 0, | 484 | (permitted && mm) ? mm->start_stack : 0, |
480 | esp, | 485 | esp, |
481 | eip, | 486 | eip, |
482 | /* The signal information here is obsolete. | 487 | /* The signal information here is obsolete. |
diff --git a/fs/proc/base.c b/fs/proc/base.c index f71559784bfb..fb45615943c2 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -322,7 +322,10 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) | |||
322 | wchan = get_wchan(task); | 322 | wchan = get_wchan(task); |
323 | 323 | ||
324 | if (lookup_symbol_name(wchan, symname) < 0) | 324 | if (lookup_symbol_name(wchan, symname) < 0) |
325 | return sprintf(buffer, "%lu", wchan); | 325 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
326 | return 0; | ||
327 | else | ||
328 | return sprintf(buffer, "%lu", wchan); | ||
326 | else | 329 | else |
327 | return sprintf(buffer, "%s", symname); | 330 | return sprintf(buffer, "%s", symname); |
328 | } | 331 | } |
@@ -648,14 +651,14 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) | |||
648 | { | 651 | { |
649 | struct proc_mounts *p = file->private_data; | 652 | struct proc_mounts *p = file->private_data; |
650 | struct mnt_namespace *ns = p->ns; | 653 | struct mnt_namespace *ns = p->ns; |
651 | unsigned res = 0; | 654 | unsigned res = POLLIN | POLLRDNORM; |
652 | 655 | ||
653 | poll_wait(file, &ns->poll, wait); | 656 | poll_wait(file, &ns->poll, wait); |
654 | 657 | ||
655 | spin_lock(&vfsmount_lock); | 658 | spin_lock(&vfsmount_lock); |
656 | if (p->event != ns->event) { | 659 | if (p->event != ns->event) { |
657 | p->event = ns->event; | 660 | p->event = ns->event; |
658 | res = POLLERR; | 661 | res |= POLLERR | POLLPRI; |
659 | } | 662 | } |
660 | spin_unlock(&vfsmount_lock); | 663 | spin_unlock(&vfsmount_lock); |
661 | 664 | ||
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 74ea974f5ca6..c6b0302af4c4 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c | |||
@@ -35,7 +35,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
35 | #define K(x) ((x) << (PAGE_SHIFT - 10)) | 35 | #define K(x) ((x) << (PAGE_SHIFT - 10)) |
36 | si_meminfo(&i); | 36 | si_meminfo(&i); |
37 | si_swapinfo(&i); | 37 | si_swapinfo(&i); |
38 | committed = atomic_long_read(&vm_committed_space); | 38 | committed = percpu_counter_read_positive(&vm_committed_as); |
39 | allowed = ((totalram_pages - hugetlb_total_pages()) | 39 | allowed = ((totalram_pages - hugetlb_total_pages()) |
40 | * sysctl_overcommit_ratio / 100) + total_swap_pages; | 40 | * sysctl_overcommit_ratio / 100) + total_swap_pages; |
41 | 41 | ||
diff --git a/fs/proc/stat.c b/fs/proc/stat.c index f75efa22df5e..81e4eb60972e 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c | |||
@@ -18,6 +18,9 @@ | |||
18 | #ifndef arch_irq_stat | 18 | #ifndef arch_irq_stat |
19 | #define arch_irq_stat() 0 | 19 | #define arch_irq_stat() 0 |
20 | #endif | 20 | #endif |
21 | #ifndef arch_idle_time | ||
22 | #define arch_idle_time(cpu) 0 | ||
23 | #endif | ||
21 | 24 | ||
22 | static int show_stat(struct seq_file *p, void *v) | 25 | static int show_stat(struct seq_file *p, void *v) |
23 | { | 26 | { |
@@ -40,6 +43,7 @@ static int show_stat(struct seq_file *p, void *v) | |||
40 | nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); | 43 | nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); |
41 | system = cputime64_add(system, kstat_cpu(i).cpustat.system); | 44 | system = cputime64_add(system, kstat_cpu(i).cpustat.system); |
42 | idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); | 45 | idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); |
46 | idle = cputime64_add(idle, arch_idle_time(i)); | ||
43 | iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); | 47 | iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); |
44 | irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); | 48 | irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); |
45 | softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); | 49 | softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); |
@@ -69,6 +73,7 @@ static int show_stat(struct seq_file *p, void *v) | |||
69 | nice = kstat_cpu(i).cpustat.nice; | 73 | nice = kstat_cpu(i).cpustat.nice; |
70 | system = kstat_cpu(i).cpustat.system; | 74 | system = kstat_cpu(i).cpustat.system; |
71 | idle = kstat_cpu(i).cpustat.idle; | 75 | idle = kstat_cpu(i).cpustat.idle; |
76 | idle = cputime64_add(idle, arch_idle_time(i)); | ||
72 | iowait = kstat_cpu(i).cpustat.iowait; | 77 | iowait = kstat_cpu(i).cpustat.iowait; |
73 | irq = kstat_cpu(i).cpustat.irq; | 78 | irq = kstat_cpu(i).cpustat.irq; |
74 | softirq = kstat_cpu(i).cpustat.softirq; | 79 | softirq = kstat_cpu(i).cpustat.softirq; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 39e4ad4f59f4..6f61b7cc32e0 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -665,6 +665,10 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
665 | goto out_task; | 665 | goto out_task; |
666 | 666 | ||
667 | ret = 0; | 667 | ret = 0; |
668 | |||
669 | if (!count) | ||
670 | goto out_task; | ||
671 | |||
668 | mm = get_task_mm(task); | 672 | mm = get_task_mm(task); |
669 | if (!mm) | 673 | if (!mm) |
670 | goto out_task; | 674 | goto out_task; |
diff --git a/fs/quota/Makefile b/fs/quota/Makefile index 385a0831cc99..68d4f6dc0578 100644 --- a/fs/quota/Makefile +++ b/fs/quota/Makefile | |||
@@ -1,12 +1,3 @@ | |||
1 | # | ||
2 | # Makefile for the Linux filesystems. | ||
3 | # | ||
4 | # 14 Sep 2000, Christoph Hellwig <hch@infradead.org> | ||
5 | # Rewritten to use lists instead of if-statements. | ||
6 | # | ||
7 | |||
8 | obj-y := | ||
9 | |||
10 | obj-$(CONFIG_QUOTA) += dquot.o | 1 | obj-$(CONFIG_QUOTA) += dquot.o |
11 | obj-$(CONFIG_QFMT_V1) += quota_v1.o | 2 | obj-$(CONFIG_QFMT_V1) += quota_v1.o |
12 | obj-$(CONFIG_QFMT_V2) += quota_v2.o | 3 | obj-$(CONFIG_QFMT_V2) += quota_v2.o |
diff --git a/fs/romfs/internal.h b/fs/romfs/internal.h index 06044a9dc62d..95217b830118 100644 --- a/fs/romfs/internal.h +++ b/fs/romfs/internal.h | |||
@@ -43,5 +43,5 @@ extern int romfs_dev_read(struct super_block *sb, unsigned long pos, | |||
43 | void *buf, size_t buflen); | 43 | void *buf, size_t buflen); |
44 | extern ssize_t romfs_dev_strnlen(struct super_block *sb, | 44 | extern ssize_t romfs_dev_strnlen(struct super_block *sb, |
45 | unsigned long pos, size_t maxlen); | 45 | unsigned long pos, size_t maxlen); |
46 | extern int romfs_dev_strncmp(struct super_block *sb, unsigned long pos, | 46 | extern int romfs_dev_strcmp(struct super_block *sb, unsigned long pos, |
47 | const char *str, size_t size); | 47 | const char *str, size_t size); |
diff --git a/fs/romfs/storage.c b/fs/romfs/storage.c index 7e3e1e12a081..b3208adf8e71 100644 --- a/fs/romfs/storage.c +++ b/fs/romfs/storage.c | |||
@@ -67,26 +67,35 @@ static ssize_t romfs_mtd_strnlen(struct super_block *sb, | |||
67 | * compare a string to one in a romfs image on MTD | 67 | * compare a string to one in a romfs image on MTD |
68 | * - return 1 if matched, 0 if differ, -ve if error | 68 | * - return 1 if matched, 0 if differ, -ve if error |
69 | */ | 69 | */ |
70 | static int romfs_mtd_strncmp(struct super_block *sb, unsigned long pos, | 70 | static int romfs_mtd_strcmp(struct super_block *sb, unsigned long pos, |
71 | const char *str, size_t size) | 71 | const char *str, size_t size) |
72 | { | 72 | { |
73 | u_char buf[16]; | 73 | u_char buf[17]; |
74 | size_t len, segment; | 74 | size_t len, segment; |
75 | int ret; | 75 | int ret; |
76 | 76 | ||
77 | /* scan the string up to 16 bytes at a time */ | 77 | /* scan the string up to 16 bytes at a time, and attempt to grab the |
78 | * trailing NUL whilst we're at it */ | ||
79 | buf[0] = 0xff; | ||
80 | |||
78 | while (size > 0) { | 81 | while (size > 0) { |
79 | segment = min_t(size_t, size, 16); | 82 | segment = min_t(size_t, size + 1, 17); |
80 | ret = ROMFS_MTD_READ(sb, pos, segment, &len, buf); | 83 | ret = ROMFS_MTD_READ(sb, pos, segment, &len, buf); |
81 | if (ret < 0) | 84 | if (ret < 0) |
82 | return ret; | 85 | return ret; |
86 | len--; | ||
83 | if (memcmp(buf, str, len) != 0) | 87 | if (memcmp(buf, str, len) != 0) |
84 | return 0; | 88 | return 0; |
89 | buf[0] = buf[len]; | ||
85 | size -= len; | 90 | size -= len; |
86 | pos += len; | 91 | pos += len; |
87 | str += len; | 92 | str += len; |
88 | } | 93 | } |
89 | 94 | ||
95 | /* check the trailing NUL was */ | ||
96 | if (buf[0]) | ||
97 | return 0; | ||
98 | |||
90 | return 1; | 99 | return 1; |
91 | } | 100 | } |
92 | #endif /* CONFIG_ROMFS_ON_MTD */ | 101 | #endif /* CONFIG_ROMFS_ON_MTD */ |
@@ -111,6 +120,7 @@ static int romfs_blk_read(struct super_block *sb, unsigned long pos, | |||
111 | return -EIO; | 120 | return -EIO; |
112 | memcpy(buf, bh->b_data + offset, segment); | 121 | memcpy(buf, bh->b_data + offset, segment); |
113 | brelse(bh); | 122 | brelse(bh); |
123 | buf += segment; | ||
114 | buflen -= segment; | 124 | buflen -= segment; |
115 | pos += segment; | 125 | pos += segment; |
116 | } | 126 | } |
@@ -154,28 +164,48 @@ static ssize_t romfs_blk_strnlen(struct super_block *sb, | |||
154 | * compare a string to one in a romfs image on a block device | 164 | * compare a string to one in a romfs image on a block device |
155 | * - return 1 if matched, 0 if differ, -ve if error | 165 | * - return 1 if matched, 0 if differ, -ve if error |
156 | */ | 166 | */ |
157 | static int romfs_blk_strncmp(struct super_block *sb, unsigned long pos, | 167 | static int romfs_blk_strcmp(struct super_block *sb, unsigned long pos, |
158 | const char *str, size_t size) | 168 | const char *str, size_t size) |
159 | { | 169 | { |
160 | struct buffer_head *bh; | 170 | struct buffer_head *bh; |
161 | unsigned long offset; | 171 | unsigned long offset; |
162 | size_t segment; | 172 | size_t segment; |
163 | bool x; | 173 | bool matched, terminated = false; |
164 | 174 | ||
165 | /* scan the string up to 16 bytes at a time */ | 175 | /* compare string up to a block at a time */ |
166 | while (size > 0) { | 176 | while (size > 0) { |
167 | offset = pos & (ROMBSIZE - 1); | 177 | offset = pos & (ROMBSIZE - 1); |
168 | segment = min_t(size_t, size, ROMBSIZE - offset); | 178 | segment = min_t(size_t, size, ROMBSIZE - offset); |
169 | bh = sb_bread(sb, pos >> ROMBSBITS); | 179 | bh = sb_bread(sb, pos >> ROMBSBITS); |
170 | if (!bh) | 180 | if (!bh) |
171 | return -EIO; | 181 | return -EIO; |
172 | x = (memcmp(bh->b_data + offset, str, segment) != 0); | 182 | matched = (memcmp(bh->b_data + offset, str, segment) == 0); |
173 | brelse(bh); | 183 | |
174 | if (x) | ||
175 | return 0; | ||
176 | size -= segment; | 184 | size -= segment; |
177 | pos += segment; | 185 | pos += segment; |
178 | str += segment; | 186 | str += segment; |
187 | if (matched && size == 0 && offset + segment < ROMBSIZE) { | ||
188 | if (!bh->b_data[offset + segment]) | ||
189 | terminated = true; | ||
190 | else | ||
191 | matched = false; | ||
192 | } | ||
193 | brelse(bh); | ||
194 | if (!matched) | ||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | if (!terminated) { | ||
199 | /* the terminating NUL must be on the first byte of the next | ||
200 | * block */ | ||
201 | BUG_ON((pos & (ROMBSIZE - 1)) != 0); | ||
202 | bh = sb_bread(sb, pos >> ROMBSBITS); | ||
203 | if (!bh) | ||
204 | return -EIO; | ||
205 | matched = !bh->b_data[0]; | ||
206 | brelse(bh); | ||
207 | if (!matched) | ||
208 | return 0; | ||
179 | } | 209 | } |
180 | 210 | ||
181 | return 1; | 211 | return 1; |
@@ -234,10 +264,12 @@ ssize_t romfs_dev_strnlen(struct super_block *sb, | |||
234 | 264 | ||
235 | /* | 265 | /* |
236 | * compare a string to one in romfs | 266 | * compare a string to one in romfs |
267 | * - the string to be compared to, str, may not be NUL-terminated; instead the | ||
268 | * string is of the specified size | ||
237 | * - return 1 if matched, 0 if differ, -ve if error | 269 | * - return 1 if matched, 0 if differ, -ve if error |
238 | */ | 270 | */ |
239 | int romfs_dev_strncmp(struct super_block *sb, unsigned long pos, | 271 | int romfs_dev_strcmp(struct super_block *sb, unsigned long pos, |
240 | const char *str, size_t size) | 272 | const char *str, size_t size) |
241 | { | 273 | { |
242 | size_t limit; | 274 | size_t limit; |
243 | 275 | ||
@@ -246,16 +278,16 @@ int romfs_dev_strncmp(struct super_block *sb, unsigned long pos, | |||
246 | return -EIO; | 278 | return -EIO; |
247 | if (size > ROMFS_MAXFN) | 279 | if (size > ROMFS_MAXFN) |
248 | return -ENAMETOOLONG; | 280 | return -ENAMETOOLONG; |
249 | if (size > limit - pos) | 281 | if (size + 1 > limit - pos) |
250 | return -EIO; | 282 | return -EIO; |
251 | 283 | ||
252 | #ifdef CONFIG_ROMFS_ON_MTD | 284 | #ifdef CONFIG_ROMFS_ON_MTD |
253 | if (sb->s_mtd) | 285 | if (sb->s_mtd) |
254 | return romfs_mtd_strncmp(sb, pos, str, size); | 286 | return romfs_mtd_strcmp(sb, pos, str, size); |
255 | #endif | 287 | #endif |
256 | #ifdef CONFIG_ROMFS_ON_BLOCK | 288 | #ifdef CONFIG_ROMFS_ON_BLOCK |
257 | if (sb->s_bdev) | 289 | if (sb->s_bdev) |
258 | return romfs_blk_strncmp(sb, pos, str, size); | 290 | return romfs_blk_strcmp(sb, pos, str, size); |
259 | #endif | 291 | #endif |
260 | return -EIO; | 292 | return -EIO; |
261 | } | 293 | } |
diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 10ca7d984a8b..c53b5ef8a02f 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c | |||
@@ -240,8 +240,8 @@ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry, | |||
240 | goto error; | 240 | goto error; |
241 | 241 | ||
242 | /* try to match the first 16 bytes of name */ | 242 | /* try to match the first 16 bytes of name */ |
243 | ret = romfs_dev_strncmp(dir->i_sb, offset + ROMFH_SIZE, name, | 243 | ret = romfs_dev_strcmp(dir->i_sb, offset + ROMFH_SIZE, name, |
244 | len); | 244 | len); |
245 | if (ret < 0) | 245 | if (ret < 0) |
246 | goto error; | 246 | goto error; |
247 | if (ret == 1) | 247 | if (ret == 1) |
diff --git a/fs/splice.c b/fs/splice.c index c18aa7e03e2b..666953d59a35 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -182,8 +182,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
182 | do_wakeup = 0; | 182 | do_wakeup = 0; |
183 | page_nr = 0; | 183 | page_nr = 0; |
184 | 184 | ||
185 | if (pipe->inode) | 185 | pipe_lock(pipe); |
186 | mutex_lock(&pipe->inode->i_mutex); | ||
187 | 186 | ||
188 | for (;;) { | 187 | for (;;) { |
189 | if (!pipe->readers) { | 188 | if (!pipe->readers) { |
@@ -245,15 +244,13 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
245 | pipe->waiting_writers--; | 244 | pipe->waiting_writers--; |
246 | } | 245 | } |
247 | 246 | ||
248 | if (pipe->inode) { | 247 | pipe_unlock(pipe); |
249 | mutex_unlock(&pipe->inode->i_mutex); | ||
250 | 248 | ||
251 | if (do_wakeup) { | 249 | if (do_wakeup) { |
252 | smp_mb(); | 250 | smp_mb(); |
253 | if (waitqueue_active(&pipe->wait)) | 251 | if (waitqueue_active(&pipe->wait)) |
254 | wake_up_interruptible(&pipe->wait); | 252 | wake_up_interruptible(&pipe->wait); |
255 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | 253 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); |
256 | } | ||
257 | } | 254 | } |
258 | 255 | ||
259 | while (page_nr < spd_pages) | 256 | while (page_nr < spd_pages) |
@@ -555,8 +552,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, | |||
555 | * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create | 552 | * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create |
556 | * a new page in the output file page cache and fill/dirty that. | 553 | * a new page in the output file page cache and fill/dirty that. |
557 | */ | 554 | */ |
558 | static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | 555 | int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, |
559 | struct splice_desc *sd) | 556 | struct splice_desc *sd) |
560 | { | 557 | { |
561 | struct file *file = sd->u.file; | 558 | struct file *file = sd->u.file; |
562 | struct address_space *mapping = file->f_mapping; | 559 | struct address_space *mapping = file->f_mapping; |
@@ -600,108 +597,177 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, | |||
600 | out: | 597 | out: |
601 | return ret; | 598 | return ret; |
602 | } | 599 | } |
600 | EXPORT_SYMBOL(pipe_to_file); | ||
601 | |||
602 | static void wakeup_pipe_writers(struct pipe_inode_info *pipe) | ||
603 | { | ||
604 | smp_mb(); | ||
605 | if (waitqueue_active(&pipe->wait)) | ||
606 | wake_up_interruptible(&pipe->wait); | ||
607 | kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); | ||
608 | } | ||
603 | 609 | ||
604 | /** | 610 | /** |
605 | * __splice_from_pipe - splice data from a pipe to given actor | 611 | * splice_from_pipe_feed - feed available data from a pipe to a file |
606 | * @pipe: pipe to splice from | 612 | * @pipe: pipe to splice from |
607 | * @sd: information to @actor | 613 | * @sd: information to @actor |
608 | * @actor: handler that splices the data | 614 | * @actor: handler that splices the data |
609 | * | 615 | * |
610 | * Description: | 616 | * Description: |
611 | * This function does little more than loop over the pipe and call | 617 | * This function loops over the pipe and calls @actor to do the |
612 | * @actor to do the actual moving of a single struct pipe_buffer to | 618 | * actual moving of a single struct pipe_buffer to the desired |
613 | * the desired destination. See pipe_to_file, pipe_to_sendpage, or | 619 | * destination. It returns when there's no more buffers left in |
614 | * pipe_to_user. | 620 | * the pipe or if the requested number of bytes (@sd->total_len) |
621 | * have been copied. It returns a positive number (one) if the | ||
622 | * pipe needs to be filled with more data, zero if the required | ||
623 | * number of bytes have been copied and -errno on error. | ||
615 | * | 624 | * |
625 | * This, together with splice_from_pipe_{begin,end,next}, may be | ||
626 | * used to implement the functionality of __splice_from_pipe() when | ||
627 | * locking is required around copying the pipe buffers to the | ||
628 | * destination. | ||
616 | */ | 629 | */ |
617 | ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, | 630 | int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, |
618 | splice_actor *actor) | 631 | splice_actor *actor) |
619 | { | 632 | { |
620 | int ret, do_wakeup, err; | 633 | int ret; |
621 | |||
622 | ret = 0; | ||
623 | do_wakeup = 0; | ||
624 | |||
625 | for (;;) { | ||
626 | if (pipe->nrbufs) { | ||
627 | struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; | ||
628 | const struct pipe_buf_operations *ops = buf->ops; | ||
629 | 634 | ||
630 | sd->len = buf->len; | 635 | while (pipe->nrbufs) { |
631 | if (sd->len > sd->total_len) | 636 | struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; |
632 | sd->len = sd->total_len; | 637 | const struct pipe_buf_operations *ops = buf->ops; |
633 | 638 | ||
634 | err = actor(pipe, buf, sd); | 639 | sd->len = buf->len; |
635 | if (err <= 0) { | 640 | if (sd->len > sd->total_len) |
636 | if (!ret && err != -ENODATA) | 641 | sd->len = sd->total_len; |
637 | ret = err; | ||
638 | 642 | ||
639 | break; | 643 | ret = actor(pipe, buf, sd); |
640 | } | 644 | if (ret <= 0) { |
645 | if (ret == -ENODATA) | ||
646 | ret = 0; | ||
647 | return ret; | ||
648 | } | ||
649 | buf->offset += ret; | ||
650 | buf->len -= ret; | ||
641 | 651 | ||
642 | ret += err; | 652 | sd->num_spliced += ret; |
643 | buf->offset += err; | 653 | sd->len -= ret; |
644 | buf->len -= err; | 654 | sd->pos += ret; |
655 | sd->total_len -= ret; | ||
645 | 656 | ||
646 | sd->len -= err; | 657 | if (!buf->len) { |
647 | sd->pos += err; | 658 | buf->ops = NULL; |
648 | sd->total_len -= err; | 659 | ops->release(pipe, buf); |
649 | if (sd->len) | 660 | pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); |
650 | continue; | 661 | pipe->nrbufs--; |
662 | if (pipe->inode) | ||
663 | sd->need_wakeup = true; | ||
664 | } | ||
651 | 665 | ||
652 | if (!buf->len) { | 666 | if (!sd->total_len) |
653 | buf->ops = NULL; | 667 | return 0; |
654 | ops->release(pipe, buf); | 668 | } |
655 | pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); | ||
656 | pipe->nrbufs--; | ||
657 | if (pipe->inode) | ||
658 | do_wakeup = 1; | ||
659 | } | ||
660 | 669 | ||
661 | if (!sd->total_len) | 670 | return 1; |
662 | break; | 671 | } |
663 | } | 672 | EXPORT_SYMBOL(splice_from_pipe_feed); |
664 | 673 | ||
665 | if (pipe->nrbufs) | 674 | /** |
666 | continue; | 675 | * splice_from_pipe_next - wait for some data to splice from |
676 | * @pipe: pipe to splice from | ||
677 | * @sd: information about the splice operation | ||
678 | * | ||
679 | * Description: | ||
680 | * This function will wait for some data and return a positive | ||
681 | * value (one) if pipe buffers are available. It will return zero | ||
682 | * or -errno if no more data needs to be spliced. | ||
683 | */ | ||
684 | int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) | ||
685 | { | ||
686 | while (!pipe->nrbufs) { | ||
667 | if (!pipe->writers) | 687 | if (!pipe->writers) |
668 | break; | 688 | return 0; |
669 | if (!pipe->waiting_writers) { | ||
670 | if (ret) | ||
671 | break; | ||
672 | } | ||
673 | 689 | ||
674 | if (sd->flags & SPLICE_F_NONBLOCK) { | 690 | if (!pipe->waiting_writers && sd->num_spliced) |
675 | if (!ret) | 691 | return 0; |
676 | ret = -EAGAIN; | ||
677 | break; | ||
678 | } | ||
679 | 692 | ||
680 | if (signal_pending(current)) { | 693 | if (sd->flags & SPLICE_F_NONBLOCK) |
681 | if (!ret) | 694 | return -EAGAIN; |
682 | ret = -ERESTARTSYS; | ||
683 | break; | ||
684 | } | ||
685 | 695 | ||
686 | if (do_wakeup) { | 696 | if (signal_pending(current)) |
687 | smp_mb(); | 697 | return -ERESTARTSYS; |
688 | if (waitqueue_active(&pipe->wait)) | 698 | |
689 | wake_up_interruptible_sync(&pipe->wait); | 699 | if (sd->need_wakeup) { |
690 | kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); | 700 | wakeup_pipe_writers(pipe); |
691 | do_wakeup = 0; | 701 | sd->need_wakeup = false; |
692 | } | 702 | } |
693 | 703 | ||
694 | pipe_wait(pipe); | 704 | pipe_wait(pipe); |
695 | } | 705 | } |
696 | 706 | ||
697 | if (do_wakeup) { | 707 | return 1; |
698 | smp_mb(); | 708 | } |
699 | if (waitqueue_active(&pipe->wait)) | 709 | EXPORT_SYMBOL(splice_from_pipe_next); |
700 | wake_up_interruptible(&pipe->wait); | ||
701 | kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); | ||
702 | } | ||
703 | 710 | ||
704 | return ret; | 711 | /** |
712 | * splice_from_pipe_begin - start splicing from pipe | ||
713 | * @sd: information about the splice operation | ||
714 | * | ||
715 | * Description: | ||
716 | * This function should be called before a loop containing | ||
717 | * splice_from_pipe_next() and splice_from_pipe_feed() to | ||
718 | * initialize the necessary fields of @sd. | ||
719 | */ | ||
720 | void splice_from_pipe_begin(struct splice_desc *sd) | ||
721 | { | ||
722 | sd->num_spliced = 0; | ||
723 | sd->need_wakeup = false; | ||
724 | } | ||
725 | EXPORT_SYMBOL(splice_from_pipe_begin); | ||
726 | |||
727 | /** | ||
728 | * splice_from_pipe_end - finish splicing from pipe | ||
729 | * @pipe: pipe to splice from | ||
730 | * @sd: information about the splice operation | ||
731 | * | ||
732 | * Description: | ||
733 | * This function will wake up pipe writers if necessary. It should | ||
734 | * be called after a loop containing splice_from_pipe_next() and | ||
735 | * splice_from_pipe_feed(). | ||
736 | */ | ||
737 | void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) | ||
738 | { | ||
739 | if (sd->need_wakeup) | ||
740 | wakeup_pipe_writers(pipe); | ||
741 | } | ||
742 | EXPORT_SYMBOL(splice_from_pipe_end); | ||
743 | |||
744 | /** | ||
745 | * __splice_from_pipe - splice data from a pipe to given actor | ||
746 | * @pipe: pipe to splice from | ||
747 | * @sd: information to @actor | ||
748 | * @actor: handler that splices the data | ||
749 | * | ||
750 | * Description: | ||
751 | * This function does little more than loop over the pipe and call | ||
752 | * @actor to do the actual moving of a single struct pipe_buffer to | ||
753 | * the desired destination. See pipe_to_file, pipe_to_sendpage, or | ||
754 | * pipe_to_user. | ||
755 | * | ||
756 | */ | ||
757 | ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, | ||
758 | splice_actor *actor) | ||
759 | { | ||
760 | int ret; | ||
761 | |||
762 | splice_from_pipe_begin(sd); | ||
763 | do { | ||
764 | ret = splice_from_pipe_next(pipe, sd); | ||
765 | if (ret > 0) | ||
766 | ret = splice_from_pipe_feed(pipe, sd, actor); | ||
767 | } while (ret > 0); | ||
768 | splice_from_pipe_end(pipe, sd); | ||
769 | |||
770 | return sd->num_spliced ? sd->num_spliced : ret; | ||
705 | } | 771 | } |
706 | EXPORT_SYMBOL(__splice_from_pipe); | 772 | EXPORT_SYMBOL(__splice_from_pipe); |
707 | 773 | ||
@@ -715,7 +781,7 @@ EXPORT_SYMBOL(__splice_from_pipe); | |||
715 | * @actor: handler that splices the data | 781 | * @actor: handler that splices the data |
716 | * | 782 | * |
717 | * Description: | 783 | * Description: |
718 | * See __splice_from_pipe. This function locks the input and output inodes, | 784 | * See __splice_from_pipe. This function locks the pipe inode, |
719 | * otherwise it's identical to __splice_from_pipe(). | 785 | * otherwise it's identical to __splice_from_pipe(). |
720 | * | 786 | * |
721 | */ | 787 | */ |
@@ -724,7 +790,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
724 | splice_actor *actor) | 790 | splice_actor *actor) |
725 | { | 791 | { |
726 | ssize_t ret; | 792 | ssize_t ret; |
727 | struct inode *inode = out->f_mapping->host; | ||
728 | struct splice_desc sd = { | 793 | struct splice_desc sd = { |
729 | .total_len = len, | 794 | .total_len = len, |
730 | .flags = flags, | 795 | .flags = flags, |
@@ -732,30 +797,15 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
732 | .u.file = out, | 797 | .u.file = out, |
733 | }; | 798 | }; |
734 | 799 | ||
735 | /* | 800 | pipe_lock(pipe); |
736 | * The actor worker might be calling ->write_begin and | ||
737 | * ->write_end. Most of the time, these expect i_mutex to | ||
738 | * be held. Since this may result in an ABBA deadlock with | ||
739 | * pipe->inode, we have to order lock acquiry here. | ||
740 | * | ||
741 | * Outer lock must be inode->i_mutex, as pipe_wait() will | ||
742 | * release and reacquire pipe->inode->i_mutex, AND inode must | ||
743 | * never be a pipe. | ||
744 | */ | ||
745 | WARN_ON(S_ISFIFO(inode->i_mode)); | ||
746 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); | ||
747 | if (pipe->inode) | ||
748 | mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); | ||
749 | ret = __splice_from_pipe(pipe, &sd, actor); | 801 | ret = __splice_from_pipe(pipe, &sd, actor); |
750 | if (pipe->inode) | 802 | pipe_unlock(pipe); |
751 | mutex_unlock(&pipe->inode->i_mutex); | ||
752 | mutex_unlock(&inode->i_mutex); | ||
753 | 803 | ||
754 | return ret; | 804 | return ret; |
755 | } | 805 | } |
756 | 806 | ||
757 | /** | 807 | /** |
758 | * generic_file_splice_write_nolock - generic_file_splice_write without mutexes | 808 | * generic_file_splice_write - splice data from a pipe to a file |
759 | * @pipe: pipe info | 809 | * @pipe: pipe info |
760 | * @out: file to write to | 810 | * @out: file to write to |
761 | * @ppos: position in @out | 811 | * @ppos: position in @out |
@@ -764,13 +814,12 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, | |||
764 | * | 814 | * |
765 | * Description: | 815 | * Description: |
766 | * Will either move or copy pages (determined by @flags options) from | 816 | * Will either move or copy pages (determined by @flags options) from |
767 | * the given pipe inode to the given file. The caller is responsible | 817 | * the given pipe inode to the given file. |
768 | * for acquiring i_mutex on both inodes. | ||
769 | * | 818 | * |
770 | */ | 819 | */ |
771 | ssize_t | 820 | ssize_t |
772 | generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, | 821 | generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, |
773 | loff_t *ppos, size_t len, unsigned int flags) | 822 | loff_t *ppos, size_t len, unsigned int flags) |
774 | { | 823 | { |
775 | struct address_space *mapping = out->f_mapping; | 824 | struct address_space *mapping = out->f_mapping; |
776 | struct inode *inode = mapping->host; | 825 | struct inode *inode = mapping->host; |
@@ -781,76 +830,28 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, | |||
781 | .u.file = out, | 830 | .u.file = out, |
782 | }; | 831 | }; |
783 | ssize_t ret; | 832 | ssize_t ret; |
784 | int err; | ||
785 | |||
786 | err = file_remove_suid(out); | ||
787 | if (unlikely(err)) | ||
788 | return err; | ||
789 | |||
790 | ret = __splice_from_pipe(pipe, &sd, pipe_to_file); | ||
791 | if (ret > 0) { | ||
792 | unsigned long nr_pages; | ||
793 | 833 | ||
794 | *ppos += ret; | 834 | pipe_lock(pipe); |
795 | nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
796 | |||
797 | /* | ||
798 | * If file or inode is SYNC and we actually wrote some data, | ||
799 | * sync it. | ||
800 | */ | ||
801 | if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
802 | err = generic_osync_inode(inode, mapping, | ||
803 | OSYNC_METADATA|OSYNC_DATA); | ||
804 | 835 | ||
805 | if (err) | 836 | splice_from_pipe_begin(&sd); |
806 | ret = err; | 837 | do { |
807 | } | 838 | ret = splice_from_pipe_next(pipe, &sd); |
808 | balance_dirty_pages_ratelimited_nr(mapping, nr_pages); | 839 | if (ret <= 0) |
809 | } | 840 | break; |
810 | 841 | ||
811 | return ret; | 842 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); |
812 | } | 843 | ret = file_remove_suid(out); |
844 | if (!ret) | ||
845 | ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); | ||
846 | mutex_unlock(&inode->i_mutex); | ||
847 | } while (ret > 0); | ||
848 | splice_from_pipe_end(pipe, &sd); | ||
813 | 849 | ||
814 | EXPORT_SYMBOL(generic_file_splice_write_nolock); | 850 | pipe_unlock(pipe); |
815 | 851 | ||
816 | /** | 852 | if (sd.num_spliced) |
817 | * generic_file_splice_write - splice data from a pipe to a file | 853 | ret = sd.num_spliced; |
818 | * @pipe: pipe info | ||
819 | * @out: file to write to | ||
820 | * @ppos: position in @out | ||
821 | * @len: number of bytes to splice | ||
822 | * @flags: splice modifier flags | ||
823 | * | ||
824 | * Description: | ||
825 | * Will either move or copy pages (determined by @flags options) from | ||
826 | * the given pipe inode to the given file. | ||
827 | * | ||
828 | */ | ||
829 | ssize_t | ||
830 | generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, | ||
831 | loff_t *ppos, size_t len, unsigned int flags) | ||
832 | { | ||
833 | struct address_space *mapping = out->f_mapping; | ||
834 | struct inode *inode = mapping->host; | ||
835 | struct splice_desc sd = { | ||
836 | .total_len = len, | ||
837 | .flags = flags, | ||
838 | .pos = *ppos, | ||
839 | .u.file = out, | ||
840 | }; | ||
841 | ssize_t ret; | ||
842 | 854 | ||
843 | WARN_ON(S_ISFIFO(inode->i_mode)); | ||
844 | mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); | ||
845 | ret = file_remove_suid(out); | ||
846 | if (likely(!ret)) { | ||
847 | if (pipe->inode) | ||
848 | mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD); | ||
849 | ret = __splice_from_pipe(pipe, &sd, pipe_to_file); | ||
850 | if (pipe->inode) | ||
851 | mutex_unlock(&pipe->inode->i_mutex); | ||
852 | } | ||
853 | mutex_unlock(&inode->i_mutex); | ||
854 | if (ret > 0) { | 855 | if (ret > 0) { |
855 | unsigned long nr_pages; | 856 | unsigned long nr_pages; |
856 | 857 | ||
@@ -1339,8 +1340,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, | |||
1339 | if (!pipe) | 1340 | if (!pipe) |
1340 | return -EBADF; | 1341 | return -EBADF; |
1341 | 1342 | ||
1342 | if (pipe->inode) | 1343 | pipe_lock(pipe); |
1343 | mutex_lock(&pipe->inode->i_mutex); | ||
1344 | 1344 | ||
1345 | error = ret = 0; | 1345 | error = ret = 0; |
1346 | while (nr_segs) { | 1346 | while (nr_segs) { |
@@ -1395,8 +1395,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, | |||
1395 | iov++; | 1395 | iov++; |
1396 | } | 1396 | } |
1397 | 1397 | ||
1398 | if (pipe->inode) | 1398 | pipe_unlock(pipe); |
1399 | mutex_unlock(&pipe->inode->i_mutex); | ||
1400 | 1399 | ||
1401 | if (!ret) | 1400 | if (!ret) |
1402 | ret = error; | 1401 | ret = error; |
@@ -1524,7 +1523,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | |||
1524 | return 0; | 1523 | return 0; |
1525 | 1524 | ||
1526 | ret = 0; | 1525 | ret = 0; |
1527 | mutex_lock(&pipe->inode->i_mutex); | 1526 | pipe_lock(pipe); |
1528 | 1527 | ||
1529 | while (!pipe->nrbufs) { | 1528 | while (!pipe->nrbufs) { |
1530 | if (signal_pending(current)) { | 1529 | if (signal_pending(current)) { |
@@ -1542,7 +1541,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | |||
1542 | pipe_wait(pipe); | 1541 | pipe_wait(pipe); |
1543 | } | 1542 | } |
1544 | 1543 | ||
1545 | mutex_unlock(&pipe->inode->i_mutex); | 1544 | pipe_unlock(pipe); |
1546 | return ret; | 1545 | return ret; |
1547 | } | 1546 | } |
1548 | 1547 | ||
@@ -1562,7 +1561,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | |||
1562 | return 0; | 1561 | return 0; |
1563 | 1562 | ||
1564 | ret = 0; | 1563 | ret = 0; |
1565 | mutex_lock(&pipe->inode->i_mutex); | 1564 | pipe_lock(pipe); |
1566 | 1565 | ||
1567 | while (pipe->nrbufs >= PIPE_BUFFERS) { | 1566 | while (pipe->nrbufs >= PIPE_BUFFERS) { |
1568 | if (!pipe->readers) { | 1567 | if (!pipe->readers) { |
@@ -1583,7 +1582,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) | |||
1583 | pipe->waiting_writers--; | 1582 | pipe->waiting_writers--; |
1584 | } | 1583 | } |
1585 | 1584 | ||
1586 | mutex_unlock(&pipe->inode->i_mutex); | 1585 | pipe_unlock(pipe); |
1587 | return ret; | 1586 | return ret; |
1588 | } | 1587 | } |
1589 | 1588 | ||
@@ -1599,10 +1598,10 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
1599 | 1598 | ||
1600 | /* | 1599 | /* |
1601 | * Potential ABBA deadlock, work around it by ordering lock | 1600 | * Potential ABBA deadlock, work around it by ordering lock |
1602 | * grabbing by inode address. Otherwise two different processes | 1601 | * grabbing by pipe info address. Otherwise two different processes |
1603 | * could deadlock (one doing tee from A -> B, the other from B -> A). | 1602 | * could deadlock (one doing tee from A -> B, the other from B -> A). |
1604 | */ | 1603 | */ |
1605 | inode_double_lock(ipipe->inode, opipe->inode); | 1604 | pipe_double_lock(ipipe, opipe); |
1606 | 1605 | ||
1607 | do { | 1606 | do { |
1608 | if (!opipe->readers) { | 1607 | if (!opipe->readers) { |
@@ -1653,7 +1652,8 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
1653 | if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK)) | 1652 | if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK)) |
1654 | ret = -EAGAIN; | 1653 | ret = -EAGAIN; |
1655 | 1654 | ||
1656 | inode_double_unlock(ipipe->inode, opipe->inode); | 1655 | pipe_unlock(ipipe); |
1656 | pipe_unlock(opipe); | ||
1657 | 1657 | ||
1658 | /* | 1658 | /* |
1659 | * If we put data in the output pipe, wakeup any potential readers. | 1659 | * If we put data in the output pipe, wakeup any potential readers. |
@@ -55,59 +55,54 @@ int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
55 | 55 | ||
56 | EXPORT_SYMBOL(vfs_getattr); | 56 | EXPORT_SYMBOL(vfs_getattr); |
57 | 57 | ||
58 | int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat) | 58 | int vfs_fstat(unsigned int fd, struct kstat *stat) |
59 | { | 59 | { |
60 | struct path path; | 60 | struct file *f = fget(fd); |
61 | int error; | 61 | int error = -EBADF; |
62 | 62 | ||
63 | error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path); | 63 | if (f) { |
64 | if (!error) { | 64 | error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat); |
65 | error = vfs_getattr(path.mnt, path.dentry, stat); | 65 | fput(f); |
66 | path_put(&path); | ||
67 | } | 66 | } |
68 | return error; | 67 | return error; |
69 | } | 68 | } |
69 | EXPORT_SYMBOL(vfs_fstat); | ||
70 | 70 | ||
71 | int vfs_stat(char __user *name, struct kstat *stat) | 71 | int vfs_fstatat(int dfd, char __user *filename, struct kstat *stat, int flag) |
72 | { | 72 | { |
73 | return vfs_stat_fd(AT_FDCWD, name, stat); | 73 | struct path path; |
74 | } | 74 | int error = -EINVAL; |
75 | int lookup_flags = 0; | ||
75 | 76 | ||
76 | EXPORT_SYMBOL(vfs_stat); | 77 | if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) |
78 | goto out; | ||
77 | 79 | ||
78 | int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat) | 80 | if (!(flag & AT_SYMLINK_NOFOLLOW)) |
79 | { | 81 | lookup_flags |= LOOKUP_FOLLOW; |
80 | struct path path; | ||
81 | int error; | ||
82 | 82 | ||
83 | error = user_path_at(dfd, name, 0, &path); | 83 | error = user_path_at(dfd, filename, lookup_flags, &path); |
84 | if (!error) { | 84 | if (error) |
85 | error = vfs_getattr(path.mnt, path.dentry, stat); | 85 | goto out; |
86 | path_put(&path); | 86 | |
87 | } | 87 | error = vfs_getattr(path.mnt, path.dentry, stat); |
88 | path_put(&path); | ||
89 | out: | ||
88 | return error; | 90 | return error; |
89 | } | 91 | } |
92 | EXPORT_SYMBOL(vfs_fstatat); | ||
90 | 93 | ||
91 | int vfs_lstat(char __user *name, struct kstat *stat) | 94 | int vfs_stat(char __user *name, struct kstat *stat) |
92 | { | 95 | { |
93 | return vfs_lstat_fd(AT_FDCWD, name, stat); | 96 | return vfs_fstatat(AT_FDCWD, name, stat, 0); |
94 | } | 97 | } |
98 | EXPORT_SYMBOL(vfs_stat); | ||
95 | 99 | ||
96 | EXPORT_SYMBOL(vfs_lstat); | 100 | int vfs_lstat(char __user *name, struct kstat *stat) |
97 | |||
98 | int vfs_fstat(unsigned int fd, struct kstat *stat) | ||
99 | { | 101 | { |
100 | struct file *f = fget(fd); | 102 | return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW); |
101 | int error = -EBADF; | ||
102 | |||
103 | if (f) { | ||
104 | error = vfs_getattr(f->f_path.mnt, f->f_path.dentry, stat); | ||
105 | fput(f); | ||
106 | } | ||
107 | return error; | ||
108 | } | 103 | } |
104 | EXPORT_SYMBOL(vfs_lstat); | ||
109 | 105 | ||
110 | EXPORT_SYMBOL(vfs_fstat); | ||
111 | 106 | ||
112 | #ifdef __ARCH_WANT_OLD_STAT | 107 | #ifdef __ARCH_WANT_OLD_STAT |
113 | 108 | ||
@@ -155,23 +150,25 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta | |||
155 | SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) | 150 | SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) |
156 | { | 151 | { |
157 | struct kstat stat; | 152 | struct kstat stat; |
158 | int error = vfs_stat_fd(AT_FDCWD, filename, &stat); | 153 | int error; |
159 | 154 | ||
160 | if (!error) | 155 | error = vfs_stat(filename, &stat); |
161 | error = cp_old_stat(&stat, statbuf); | 156 | if (error) |
157 | return error; | ||
162 | 158 | ||
163 | return error; | 159 | return cp_old_stat(&stat, statbuf); |
164 | } | 160 | } |
165 | 161 | ||
166 | SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) | 162 | SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) |
167 | { | 163 | { |
168 | struct kstat stat; | 164 | struct kstat stat; |
169 | int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); | 165 | int error; |
170 | 166 | ||
171 | if (!error) | 167 | error = vfs_lstat(filename, &stat); |
172 | error = cp_old_stat(&stat, statbuf); | 168 | if (error) |
169 | return error; | ||
173 | 170 | ||
174 | return error; | 171 | return cp_old_stat(&stat, statbuf); |
175 | } | 172 | } |
176 | 173 | ||
177 | SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf) | 174 | SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf) |
@@ -240,23 +237,23 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) | |||
240 | SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf) | 237 | SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf) |
241 | { | 238 | { |
242 | struct kstat stat; | 239 | struct kstat stat; |
243 | int error = vfs_stat_fd(AT_FDCWD, filename, &stat); | 240 | int error = vfs_stat(filename, &stat); |
244 | |||
245 | if (!error) | ||
246 | error = cp_new_stat(&stat, statbuf); | ||
247 | 241 | ||
248 | return error; | 242 | if (error) |
243 | return error; | ||
244 | return cp_new_stat(&stat, statbuf); | ||
249 | } | 245 | } |
250 | 246 | ||
251 | SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf) | 247 | SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf) |
252 | { | 248 | { |
253 | struct kstat stat; | 249 | struct kstat stat; |
254 | int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); | 250 | int error; |
255 | 251 | ||
256 | if (!error) | 252 | error = vfs_lstat(filename, &stat); |
257 | error = cp_new_stat(&stat, statbuf); | 253 | if (error) |
254 | return error; | ||
258 | 255 | ||
259 | return error; | 256 | return cp_new_stat(&stat, statbuf); |
260 | } | 257 | } |
261 | 258 | ||
262 | #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) | 259 | #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) |
@@ -264,21 +261,12 @@ SYSCALL_DEFINE4(newfstatat, int, dfd, char __user *, filename, | |||
264 | struct stat __user *, statbuf, int, flag) | 261 | struct stat __user *, statbuf, int, flag) |
265 | { | 262 | { |
266 | struct kstat stat; | 263 | struct kstat stat; |
267 | int error = -EINVAL; | 264 | int error; |
268 | |||
269 | if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) | ||
270 | goto out; | ||
271 | |||
272 | if (flag & AT_SYMLINK_NOFOLLOW) | ||
273 | error = vfs_lstat_fd(dfd, filename, &stat); | ||
274 | else | ||
275 | error = vfs_stat_fd(dfd, filename, &stat); | ||
276 | |||
277 | if (!error) | ||
278 | error = cp_new_stat(&stat, statbuf); | ||
279 | 265 | ||
280 | out: | 266 | error = vfs_fstatat(dfd, filename, &stat, flag); |
281 | return error; | 267 | if (error) |
268 | return error; | ||
269 | return cp_new_stat(&stat, statbuf); | ||
282 | } | 270 | } |
283 | #endif | 271 | #endif |
284 | 272 | ||
@@ -404,21 +392,12 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename, | |||
404 | struct stat64 __user *, statbuf, int, flag) | 392 | struct stat64 __user *, statbuf, int, flag) |
405 | { | 393 | { |
406 | struct kstat stat; | 394 | struct kstat stat; |
407 | int error = -EINVAL; | 395 | int error; |
408 | |||
409 | if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) | ||
410 | goto out; | ||
411 | |||
412 | if (flag & AT_SYMLINK_NOFOLLOW) | ||
413 | error = vfs_lstat_fd(dfd, filename, &stat); | ||
414 | else | ||
415 | error = vfs_stat_fd(dfd, filename, &stat); | ||
416 | |||
417 | if (!error) | ||
418 | error = cp_new_stat64(&stat, statbuf); | ||
419 | 396 | ||
420 | out: | 397 | error = vfs_fstatat(dfd, filename, &stat, flag); |
421 | return error; | 398 | if (error) |
399 | return error; | ||
400 | return cp_new_stat64(&stat, statbuf); | ||
422 | } | 401 | } |
423 | #endif /* __ARCH_WANT_STAT64 */ | 402 | #endif /* __ARCH_WANT_STAT64 */ |
424 | 403 | ||
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 93e0c0281d45..9345806c8853 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c | |||
@@ -157,14 +157,9 @@ static ssize_t write(struct file *file, const char __user *userbuf, | |||
157 | count = size - offs; | 157 | count = size - offs; |
158 | } | 158 | } |
159 | 159 | ||
160 | temp = kmalloc(count, GFP_KERNEL); | 160 | temp = memdup_user(userbuf, count); |
161 | if (!temp) | 161 | if (IS_ERR(temp)) |
162 | return -ENOMEM; | 162 | return PTR_ERR(temp); |
163 | |||
164 | if (copy_from_user(temp, userbuf, count)) { | ||
165 | count = -EFAULT; | ||
166 | goto out_free; | ||
167 | } | ||
168 | 163 | ||
169 | mutex_lock(&bb->mutex); | 164 | mutex_lock(&bb->mutex); |
170 | 165 | ||
@@ -176,8 +171,6 @@ static ssize_t write(struct file *file, const char __user *userbuf, | |||
176 | if (count > 0) | 171 | if (count > 0) |
177 | *off = offs + count; | 172 | *off = offs + count; |
178 | 173 | ||
179 | out_free: | ||
180 | kfree(temp); | ||
181 | return count; | 174 | return count; |
182 | } | 175 | } |
183 | 176 | ||
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 289c43a47263..b1606e07b7a3 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -446,11 +446,11 @@ static unsigned int sysfs_poll(struct file *filp, poll_table *wait) | |||
446 | if (buffer->event != atomic_read(&od->event)) | 446 | if (buffer->event != atomic_read(&od->event)) |
447 | goto trigger; | 447 | goto trigger; |
448 | 448 | ||
449 | return 0; | 449 | return DEFAULT_POLLMASK; |
450 | 450 | ||
451 | trigger: | 451 | trigger: |
452 | buffer->needs_read_fill = 1; | 452 | buffer->needs_read_fill = 1; |
453 | return POLLERR|POLLPRI; | 453 | return DEFAULT_POLLMASK|POLLERR|POLLPRI; |
454 | } | 454 | } |
455 | 455 | ||
456 | void sysfs_notify_dirent(struct sysfs_dirent *sd) | 456 | void sysfs_notify_dirent(struct sysfs_dirent *sd) |
@@ -667,6 +667,7 @@ struct sysfs_schedule_callback_struct { | |||
667 | struct work_struct work; | 667 | struct work_struct work; |
668 | }; | 668 | }; |
669 | 669 | ||
670 | static struct workqueue_struct *sysfs_workqueue; | ||
670 | static DEFINE_MUTEX(sysfs_workq_mutex); | 671 | static DEFINE_MUTEX(sysfs_workq_mutex); |
671 | static LIST_HEAD(sysfs_workq); | 672 | static LIST_HEAD(sysfs_workq); |
672 | static void sysfs_schedule_callback_work(struct work_struct *work) | 673 | static void sysfs_schedule_callback_work(struct work_struct *work) |
@@ -715,11 +716,20 @@ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), | |||
715 | mutex_lock(&sysfs_workq_mutex); | 716 | mutex_lock(&sysfs_workq_mutex); |
716 | list_for_each_entry_safe(ss, tmp, &sysfs_workq, workq_list) | 717 | list_for_each_entry_safe(ss, tmp, &sysfs_workq, workq_list) |
717 | if (ss->kobj == kobj) { | 718 | if (ss->kobj == kobj) { |
719 | module_put(owner); | ||
718 | mutex_unlock(&sysfs_workq_mutex); | 720 | mutex_unlock(&sysfs_workq_mutex); |
719 | return -EAGAIN; | 721 | return -EAGAIN; |
720 | } | 722 | } |
721 | mutex_unlock(&sysfs_workq_mutex); | 723 | mutex_unlock(&sysfs_workq_mutex); |
722 | 724 | ||
725 | if (sysfs_workqueue == NULL) { | ||
726 | sysfs_workqueue = create_workqueue("sysfsd"); | ||
727 | if (sysfs_workqueue == NULL) { | ||
728 | module_put(owner); | ||
729 | return -ENOMEM; | ||
730 | } | ||
731 | } | ||
732 | |||
723 | ss = kmalloc(sizeof(*ss), GFP_KERNEL); | 733 | ss = kmalloc(sizeof(*ss), GFP_KERNEL); |
724 | if (!ss) { | 734 | if (!ss) { |
725 | module_put(owner); | 735 | module_put(owner); |
@@ -735,7 +745,7 @@ int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *), | |||
735 | mutex_lock(&sysfs_workq_mutex); | 745 | mutex_lock(&sysfs_workq_mutex); |
736 | list_add_tail(&ss->workq_list, &sysfs_workq); | 746 | list_add_tail(&ss->workq_list, &sysfs_workq); |
737 | mutex_unlock(&sysfs_workq_mutex); | 747 | mutex_unlock(&sysfs_workq_mutex); |
738 | schedule_work(&ss->work); | 748 | queue_work(sysfs_workqueue, &ss->work); |
739 | return 0; | 749 | return 0; |
740 | } | 750 | } |
741 | EXPORT_SYMBOL_GPL(sysfs_schedule_callback); | 751 | EXPORT_SYMBOL_GPL(sysfs_schedule_callback); |
diff --git a/fs/xattr.c b/fs/xattr.c index 197c4fcac032..d51b8f9db921 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -237,13 +237,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value, | |||
237 | if (size) { | 237 | if (size) { |
238 | if (size > XATTR_SIZE_MAX) | 238 | if (size > XATTR_SIZE_MAX) |
239 | return -E2BIG; | 239 | return -E2BIG; |
240 | kvalue = kmalloc(size, GFP_KERNEL); | 240 | kvalue = memdup_user(value, size); |
241 | if (!kvalue) | 241 | if (IS_ERR(kvalue)) |
242 | return -ENOMEM; | 242 | return PTR_ERR(kvalue); |
243 | if (copy_from_user(kvalue, value, size)) { | ||
244 | kfree(kvalue); | ||
245 | return -EFAULT; | ||
246 | } | ||
247 | } | 243 | } |
248 | 244 | ||
249 | error = vfs_setxattr(d, kname, kvalue, size, flags); | 245 | error = vfs_setxattr(d, kname, kvalue, size, flags); |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index c13f67300fe7..7ec89fc05b2b 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -153,23 +153,6 @@ xfs_find_bdev_for_inode( | |||
153 | } | 153 | } |
154 | 154 | ||
155 | /* | 155 | /* |
156 | * Schedule IO completion handling on a xfsdatad if this was | ||
157 | * the final hold on this ioend. If we are asked to wait, | ||
158 | * flush the workqueue. | ||
159 | */ | ||
160 | STATIC void | ||
161 | xfs_finish_ioend( | ||
162 | xfs_ioend_t *ioend, | ||
163 | int wait) | ||
164 | { | ||
165 | if (atomic_dec_and_test(&ioend->io_remaining)) { | ||
166 | queue_work(xfsdatad_workqueue, &ioend->io_work); | ||
167 | if (wait) | ||
168 | flush_workqueue(xfsdatad_workqueue); | ||
169 | } | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * We're now finished for good with this ioend structure. | 156 | * We're now finished for good with this ioend structure. |
174 | * Update the page state via the associated buffer_heads, | 157 | * Update the page state via the associated buffer_heads, |
175 | * release holds on the inode and bio, and finally free | 158 | * release holds on the inode and bio, and finally free |
@@ -310,6 +293,27 @@ xfs_end_bio_read( | |||
310 | } | 293 | } |
311 | 294 | ||
312 | /* | 295 | /* |
296 | * Schedule IO completion handling on a xfsdatad if this was | ||
297 | * the final hold on this ioend. If we are asked to wait, | ||
298 | * flush the workqueue. | ||
299 | */ | ||
300 | STATIC void | ||
301 | xfs_finish_ioend( | ||
302 | xfs_ioend_t *ioend, | ||
303 | int wait) | ||
304 | { | ||
305 | if (atomic_dec_and_test(&ioend->io_remaining)) { | ||
306 | struct workqueue_struct *wq = xfsdatad_workqueue; | ||
307 | if (ioend->io_work.func == xfs_end_bio_unwritten) | ||
308 | wq = xfsconvertd_workqueue; | ||
309 | |||
310 | queue_work(wq, &ioend->io_work); | ||
311 | if (wait) | ||
312 | flush_workqueue(wq); | ||
313 | } | ||
314 | } | ||
315 | |||
316 | /* | ||
313 | * Allocate and initialise an IO completion structure. | 317 | * Allocate and initialise an IO completion structure. |
314 | * We need to track unwritten extent write completion here initially. | 318 | * We need to track unwritten extent write completion here initially. |
315 | * We'll need to extend this for updating the ondisk inode size later | 319 | * We'll need to extend this for updating the ondisk inode size later |
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index 1dd528849755..221b3e66ceef 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #define __XFS_AOPS_H__ | 19 | #define __XFS_AOPS_H__ |
20 | 20 | ||
21 | extern struct workqueue_struct *xfsdatad_workqueue; | 21 | extern struct workqueue_struct *xfsdatad_workqueue; |
22 | extern struct workqueue_struct *xfsconvertd_workqueue; | ||
22 | extern mempool_t *xfs_ioend_pool; | 23 | extern mempool_t *xfs_ioend_pool; |
23 | 24 | ||
24 | /* | 25 | /* |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index aa1016bb9134..e28800a9f2b5 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -51,6 +51,7 @@ static struct shrinker xfs_buf_shake = { | |||
51 | 51 | ||
52 | static struct workqueue_struct *xfslogd_workqueue; | 52 | static struct workqueue_struct *xfslogd_workqueue; |
53 | struct workqueue_struct *xfsdatad_workqueue; | 53 | struct workqueue_struct *xfsdatad_workqueue; |
54 | struct workqueue_struct *xfsconvertd_workqueue; | ||
54 | 55 | ||
55 | #ifdef XFS_BUF_TRACE | 56 | #ifdef XFS_BUF_TRACE |
56 | void | 57 | void |
@@ -1775,6 +1776,7 @@ xfs_flush_buftarg( | |||
1775 | xfs_buf_t *bp, *n; | 1776 | xfs_buf_t *bp, *n; |
1776 | int pincount = 0; | 1777 | int pincount = 0; |
1777 | 1778 | ||
1779 | xfs_buf_runall_queues(xfsconvertd_workqueue); | ||
1778 | xfs_buf_runall_queues(xfsdatad_workqueue); | 1780 | xfs_buf_runall_queues(xfsdatad_workqueue); |
1779 | xfs_buf_runall_queues(xfslogd_workqueue); | 1781 | xfs_buf_runall_queues(xfslogd_workqueue); |
1780 | 1782 | ||
@@ -1831,9 +1833,15 @@ xfs_buf_init(void) | |||
1831 | if (!xfsdatad_workqueue) | 1833 | if (!xfsdatad_workqueue) |
1832 | goto out_destroy_xfslogd_workqueue; | 1834 | goto out_destroy_xfslogd_workqueue; |
1833 | 1835 | ||
1836 | xfsconvertd_workqueue = create_workqueue("xfsconvertd"); | ||
1837 | if (!xfsconvertd_workqueue) | ||
1838 | goto out_destroy_xfsdatad_workqueue; | ||
1839 | |||
1834 | register_shrinker(&xfs_buf_shake); | 1840 | register_shrinker(&xfs_buf_shake); |
1835 | return 0; | 1841 | return 0; |
1836 | 1842 | ||
1843 | out_destroy_xfsdatad_workqueue: | ||
1844 | destroy_workqueue(xfsdatad_workqueue); | ||
1837 | out_destroy_xfslogd_workqueue: | 1845 | out_destroy_xfslogd_workqueue: |
1838 | destroy_workqueue(xfslogd_workqueue); | 1846 | destroy_workqueue(xfslogd_workqueue); |
1839 | out_free_buf_zone: | 1847 | out_free_buf_zone: |
@@ -1849,6 +1857,7 @@ void | |||
1849 | xfs_buf_terminate(void) | 1857 | xfs_buf_terminate(void) |
1850 | { | 1858 | { |
1851 | unregister_shrinker(&xfs_buf_shake); | 1859 | unregister_shrinker(&xfs_buf_shake); |
1860 | destroy_workqueue(xfsconvertd_workqueue); | ||
1852 | destroy_workqueue(xfsdatad_workqueue); | 1861 | destroy_workqueue(xfsdatad_workqueue); |
1853 | destroy_workqueue(xfslogd_workqueue); | 1862 | destroy_workqueue(xfslogd_workqueue); |
1854 | kmem_zone_destroy(xfs_buf_zone); | 1863 | kmem_zone_destroy(xfs_buf_zone); |
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 5aeb77776961..08be36d7326c 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c | |||
@@ -74,14 +74,14 @@ xfs_flush_pages( | |||
74 | 74 | ||
75 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | 75 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
76 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | 76 | xfs_iflags_clear(ip, XFS_ITRUNCATED); |
77 | ret = filemap_fdatawrite(mapping); | 77 | ret = -filemap_fdatawrite(mapping); |
78 | if (flags & XFS_B_ASYNC) | ||
79 | return -ret; | ||
80 | ret2 = filemap_fdatawait(mapping); | ||
81 | if (!ret) | ||
82 | ret = ret2; | ||
83 | } | 78 | } |
84 | return -ret; | 79 | if (flags & XFS_B_ASYNC) |
80 | return ret; | ||
81 | ret2 = xfs_wait_on_pages(ip, first, last); | ||
82 | if (!ret) | ||
83 | ret = ret2; | ||
84 | return ret; | ||
85 | } | 85 | } |
86 | 86 | ||
87 | int | 87 | int |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index d0b499418a7d..34eaab608e6e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -489,17 +489,12 @@ xfs_attrmulti_attr_set( | |||
489 | if (len > XATTR_SIZE_MAX) | 489 | if (len > XATTR_SIZE_MAX) |
490 | return EINVAL; | 490 | return EINVAL; |
491 | 491 | ||
492 | kbuf = kmalloc(len, GFP_KERNEL); | 492 | kbuf = memdup_user(ubuf, len); |
493 | if (!kbuf) | 493 | if (IS_ERR(kbuf)) |
494 | return ENOMEM; | 494 | return PTR_ERR(kbuf); |
495 | |||
496 | if (copy_from_user(kbuf, ubuf, len)) | ||
497 | goto out_kfree; | ||
498 | 495 | ||
499 | error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); | 496 | error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); |
500 | 497 | ||
501 | out_kfree: | ||
502 | kfree(kbuf); | ||
503 | return error; | 498 | return error; |
504 | } | 499 | } |
505 | 500 | ||
@@ -540,20 +535,16 @@ xfs_attrmulti_by_handle( | |||
540 | if (!size || size > 16 * PAGE_SIZE) | 535 | if (!size || size > 16 * PAGE_SIZE) |
541 | goto out_dput; | 536 | goto out_dput; |
542 | 537 | ||
543 | error = ENOMEM; | 538 | ops = memdup_user(am_hreq.ops, size); |
544 | ops = kmalloc(size, GFP_KERNEL); | 539 | if (IS_ERR(ops)) { |
545 | if (!ops) | 540 | error = PTR_ERR(ops); |
546 | goto out_dput; | 541 | goto out_dput; |
547 | 542 | } | |
548 | error = EFAULT; | ||
549 | if (copy_from_user(ops, am_hreq.ops, size)) | ||
550 | goto out_kfree_ops; | ||
551 | 543 | ||
552 | attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); | 544 | attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); |
553 | if (!attr_name) | 545 | if (!attr_name) |
554 | goto out_kfree_ops; | 546 | goto out_kfree_ops; |
555 | 547 | ||
556 | |||
557 | error = 0; | 548 | error = 0; |
558 | for (i = 0; i < am_hreq.opcount; i++) { | 549 | for (i = 0; i < am_hreq.opcount; i++) { |
559 | ops[i].am_error = strncpy_from_user(attr_name, | 550 | ops[i].am_error = strncpy_from_user(attr_name, |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index c70c4e3db790..0882d166239a 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c | |||
@@ -427,20 +427,16 @@ xfs_compat_attrmulti_by_handle( | |||
427 | if (!size || size > 16 * PAGE_SIZE) | 427 | if (!size || size > 16 * PAGE_SIZE) |
428 | goto out_dput; | 428 | goto out_dput; |
429 | 429 | ||
430 | error = ENOMEM; | 430 | ops = memdup_user(compat_ptr(am_hreq.ops), size); |
431 | ops = kmalloc(size, GFP_KERNEL); | 431 | if (IS_ERR(ops)) { |
432 | if (!ops) | 432 | error = PTR_ERR(ops); |
433 | goto out_dput; | 433 | goto out_dput; |
434 | 434 | } | |
435 | error = EFAULT; | ||
436 | if (copy_from_user(ops, compat_ptr(am_hreq.ops), size)) | ||
437 | goto out_kfree_ops; | ||
438 | 435 | ||
439 | attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); | 436 | attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); |
440 | if (!attr_name) | 437 | if (!attr_name) |
441 | goto out_kfree_ops; | 438 | goto out_kfree_ops; |
442 | 439 | ||
443 | |||
444 | error = 0; | 440 | error = 0; |
445 | for (i = 0; i < am_hreq.opcount; i++) { | 441 | for (i = 0; i < am_hreq.opcount; i++) { |
446 | ops[i].am_error = strncpy_from_user(attr_name, | 442 | ops[i].am_error = strncpy_from_user(attr_name, |
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 7e90daa0d1d1..9142192ccbe6 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c | |||
@@ -751,10 +751,26 @@ start: | |||
751 | goto relock; | 751 | goto relock; |
752 | } | 752 | } |
753 | } else { | 753 | } else { |
754 | int enospc = 0; | ||
755 | ssize_t ret2 = 0; | ||
756 | |||
757 | write_retry: | ||
754 | xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, | 758 | xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, |
755 | *offset, ioflags); | 759 | *offset, ioflags); |
756 | ret = generic_file_buffered_write(iocb, iovp, segs, | 760 | ret2 = generic_file_buffered_write(iocb, iovp, segs, |
757 | pos, offset, count, ret); | 761 | pos, offset, count, ret); |
762 | /* | ||
763 | * if we just got an ENOSPC, flush the inode now we | ||
764 | * aren't holding any page locks and retry *once* | ||
765 | */ | ||
766 | if (ret2 == -ENOSPC && !enospc) { | ||
767 | error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE); | ||
768 | if (error) | ||
769 | goto out_unlock_internal; | ||
770 | enospc = 1; | ||
771 | goto write_retry; | ||
772 | } | ||
773 | ret = ret2; | ||
758 | } | 774 | } |
759 | 775 | ||
760 | current->backing_dev_info = NULL; | 776 | current->backing_dev_info = NULL; |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index a608e72fa405..f7ba76633c29 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -62,12 +62,6 @@ xfs_sync_inodes_ag( | |||
62 | uint32_t first_index = 0; | 62 | uint32_t first_index = 0; |
63 | int error = 0; | 63 | int error = 0; |
64 | int last_error = 0; | 64 | int last_error = 0; |
65 | int fflag = XFS_B_ASYNC; | ||
66 | |||
67 | if (flags & SYNC_DELWRI) | ||
68 | fflag = XFS_B_DELWRI; | ||
69 | if (flags & SYNC_WAIT) | ||
70 | fflag = 0; /* synchronous overrides all */ | ||
71 | 65 | ||
72 | do { | 66 | do { |
73 | struct inode *inode; | 67 | struct inode *inode; |
@@ -128,11 +122,23 @@ xfs_sync_inodes_ag( | |||
128 | * If we have to flush data or wait for I/O completion | 122 | * If we have to flush data or wait for I/O completion |
129 | * we need to hold the iolock. | 123 | * we need to hold the iolock. |
130 | */ | 124 | */ |
131 | if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) { | 125 | if (flags & SYNC_DELWRI) { |
132 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 126 | if (VN_DIRTY(inode)) { |
133 | lock_flags |= XFS_IOLOCK_SHARED; | 127 | if (flags & SYNC_TRYLOCK) { |
134 | error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); | 128 | if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) |
135 | if (flags & SYNC_IOWAIT) | 129 | lock_flags |= XFS_IOLOCK_SHARED; |
130 | } else { | ||
131 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | ||
132 | lock_flags |= XFS_IOLOCK_SHARED; | ||
133 | } | ||
134 | if (lock_flags & XFS_IOLOCK_SHARED) { | ||
135 | error = xfs_flush_pages(ip, 0, -1, | ||
136 | (flags & SYNC_WAIT) ? 0 | ||
137 | : XFS_B_ASYNC, | ||
138 | FI_NONE); | ||
139 | } | ||
140 | } | ||
141 | if (VN_CACHED(inode) && (flags & SYNC_IOWAIT)) | ||
136 | xfs_ioend_wait(ip); | 142 | xfs_ioend_wait(ip); |
137 | } | 143 | } |
138 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 144 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
@@ -398,15 +404,17 @@ STATIC void | |||
398 | xfs_syncd_queue_work( | 404 | xfs_syncd_queue_work( |
399 | struct xfs_mount *mp, | 405 | struct xfs_mount *mp, |
400 | void *data, | 406 | void *data, |
401 | void (*syncer)(struct xfs_mount *, void *)) | 407 | void (*syncer)(struct xfs_mount *, void *), |
408 | struct completion *completion) | ||
402 | { | 409 | { |
403 | struct bhv_vfs_sync_work *work; | 410 | struct xfs_sync_work *work; |
404 | 411 | ||
405 | work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP); | 412 | work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP); |
406 | INIT_LIST_HEAD(&work->w_list); | 413 | INIT_LIST_HEAD(&work->w_list); |
407 | work->w_syncer = syncer; | 414 | work->w_syncer = syncer; |
408 | work->w_data = data; | 415 | work->w_data = data; |
409 | work->w_mount = mp; | 416 | work->w_mount = mp; |
417 | work->w_completion = completion; | ||
410 | spin_lock(&mp->m_sync_lock); | 418 | spin_lock(&mp->m_sync_lock); |
411 | list_add_tail(&work->w_list, &mp->m_sync_list); | 419 | list_add_tail(&work->w_list, &mp->m_sync_list); |
412 | spin_unlock(&mp->m_sync_lock); | 420 | spin_unlock(&mp->m_sync_lock); |
@@ -420,49 +428,26 @@ xfs_syncd_queue_work( | |||
420 | * heads, looking about for more room... | 428 | * heads, looking about for more room... |
421 | */ | 429 | */ |
422 | STATIC void | 430 | STATIC void |
423 | xfs_flush_inode_work( | 431 | xfs_flush_inodes_work( |
424 | struct xfs_mount *mp, | ||
425 | void *arg) | ||
426 | { | ||
427 | struct inode *inode = arg; | ||
428 | filemap_flush(inode->i_mapping); | ||
429 | iput(inode); | ||
430 | } | ||
431 | |||
432 | void | ||
433 | xfs_flush_inode( | ||
434 | xfs_inode_t *ip) | ||
435 | { | ||
436 | struct inode *inode = VFS_I(ip); | ||
437 | |||
438 | igrab(inode); | ||
439 | xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); | ||
440 | delay(msecs_to_jiffies(500)); | ||
441 | } | ||
442 | |||
443 | /* | ||
444 | * This is the "bigger hammer" version of xfs_flush_inode_work... | ||
445 | * (IOW, "If at first you don't succeed, use a Bigger Hammer"). | ||
446 | */ | ||
447 | STATIC void | ||
448 | xfs_flush_device_work( | ||
449 | struct xfs_mount *mp, | 432 | struct xfs_mount *mp, |
450 | void *arg) | 433 | void *arg) |
451 | { | 434 | { |
452 | struct inode *inode = arg; | 435 | struct inode *inode = arg; |
453 | sync_blockdev(mp->m_super->s_bdev); | 436 | xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK); |
437 | xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT); | ||
454 | iput(inode); | 438 | iput(inode); |
455 | } | 439 | } |
456 | 440 | ||
457 | void | 441 | void |
458 | xfs_flush_device( | 442 | xfs_flush_inodes( |
459 | xfs_inode_t *ip) | 443 | xfs_inode_t *ip) |
460 | { | 444 | { |
461 | struct inode *inode = VFS_I(ip); | 445 | struct inode *inode = VFS_I(ip); |
446 | DECLARE_COMPLETION_ONSTACK(completion); | ||
462 | 447 | ||
463 | igrab(inode); | 448 | igrab(inode); |
464 | xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); | 449 | xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion); |
465 | delay(msecs_to_jiffies(500)); | 450 | wait_for_completion(&completion); |
466 | xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); | 451 | xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); |
467 | } | 452 | } |
468 | 453 | ||
@@ -497,7 +482,7 @@ xfssyncd( | |||
497 | { | 482 | { |
498 | struct xfs_mount *mp = arg; | 483 | struct xfs_mount *mp = arg; |
499 | long timeleft; | 484 | long timeleft; |
500 | bhv_vfs_sync_work_t *work, *n; | 485 | xfs_sync_work_t *work, *n; |
501 | LIST_HEAD (tmp); | 486 | LIST_HEAD (tmp); |
502 | 487 | ||
503 | set_freezable(); | 488 | set_freezable(); |
@@ -532,6 +517,8 @@ xfssyncd( | |||
532 | list_del(&work->w_list); | 517 | list_del(&work->w_list); |
533 | if (work == &mp->m_sync_work) | 518 | if (work == &mp->m_sync_work) |
534 | continue; | 519 | continue; |
520 | if (work->w_completion) | ||
521 | complete(work->w_completion); | ||
535 | kmem_free(work); | 522 | kmem_free(work); |
536 | } | 523 | } |
537 | } | 524 | } |
@@ -545,6 +532,7 @@ xfs_syncd_init( | |||
545 | { | 532 | { |
546 | mp->m_sync_work.w_syncer = xfs_sync_worker; | 533 | mp->m_sync_work.w_syncer = xfs_sync_worker; |
547 | mp->m_sync_work.w_mount = mp; | 534 | mp->m_sync_work.w_mount = mp; |
535 | mp->m_sync_work.w_completion = NULL; | ||
548 | mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); | 536 | mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); |
549 | if (IS_ERR(mp->m_sync_task)) | 537 | if (IS_ERR(mp->m_sync_task)) |
550 | return -PTR_ERR(mp->m_sync_task); | 538 | return -PTR_ERR(mp->m_sync_task); |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 04f058c848ae..308d5bf6dfbd 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
@@ -21,18 +21,20 @@ | |||
21 | struct xfs_mount; | 21 | struct xfs_mount; |
22 | struct xfs_perag; | 22 | struct xfs_perag; |
23 | 23 | ||
24 | typedef struct bhv_vfs_sync_work { | 24 | typedef struct xfs_sync_work { |
25 | struct list_head w_list; | 25 | struct list_head w_list; |
26 | struct xfs_mount *w_mount; | 26 | struct xfs_mount *w_mount; |
27 | void *w_data; /* syncer routine argument */ | 27 | void *w_data; /* syncer routine argument */ |
28 | void (*w_syncer)(struct xfs_mount *, void *); | 28 | void (*w_syncer)(struct xfs_mount *, void *); |
29 | } bhv_vfs_sync_work_t; | 29 | struct completion *w_completion; |
30 | } xfs_sync_work_t; | ||
30 | 31 | ||
31 | #define SYNC_ATTR 0x0001 /* sync attributes */ | 32 | #define SYNC_ATTR 0x0001 /* sync attributes */ |
32 | #define SYNC_DELWRI 0x0002 /* look at delayed writes */ | 33 | #define SYNC_DELWRI 0x0002 /* look at delayed writes */ |
33 | #define SYNC_WAIT 0x0004 /* wait for i/o to complete */ | 34 | #define SYNC_WAIT 0x0004 /* wait for i/o to complete */ |
34 | #define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */ | 35 | #define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */ |
35 | #define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */ | 36 | #define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */ |
37 | #define SYNC_TRYLOCK 0x0020 /* only try to lock inodes */ | ||
36 | 38 | ||
37 | int xfs_syncd_init(struct xfs_mount *mp); | 39 | int xfs_syncd_init(struct xfs_mount *mp); |
38 | void xfs_syncd_stop(struct xfs_mount *mp); | 40 | void xfs_syncd_stop(struct xfs_mount *mp); |
@@ -43,8 +45,7 @@ int xfs_sync_fsdata(struct xfs_mount *mp, int flags); | |||
43 | int xfs_quiesce_data(struct xfs_mount *mp); | 45 | int xfs_quiesce_data(struct xfs_mount *mp); |
44 | void xfs_quiesce_attr(struct xfs_mount *mp); | 46 | void xfs_quiesce_attr(struct xfs_mount *mp); |
45 | 47 | ||
46 | void xfs_flush_inode(struct xfs_inode *ip); | 48 | void xfs_flush_inodes(struct xfs_inode *ip); |
47 | void xfs_flush_device(struct xfs_inode *ip); | ||
48 | 49 | ||
49 | int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); | 50 | int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); |
50 | int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); | 51 | int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 3a6ed426327a..ca7c6005a487 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -5880,7 +5880,7 @@ xfs_getbmap( | |||
5880 | void *arg) /* formatter arg */ | 5880 | void *arg) /* formatter arg */ |
5881 | { | 5881 | { |
5882 | __int64_t bmvend; /* last block requested */ | 5882 | __int64_t bmvend; /* last block requested */ |
5883 | int error; /* return value */ | 5883 | int error = 0; /* return value */ |
5884 | __int64_t fixlen; /* length for -1 case */ | 5884 | __int64_t fixlen; /* length for -1 case */ |
5885 | int i; /* extent number */ | 5885 | int i; /* extent number */ |
5886 | int lock; /* lock state */ | 5886 | int lock; /* lock state */ |
@@ -5890,39 +5890,18 @@ xfs_getbmap( | |||
5890 | int nexleft; /* # of user extents left */ | 5890 | int nexleft; /* # of user extents left */ |
5891 | int subnex; /* # of bmapi's can do */ | 5891 | int subnex; /* # of bmapi's can do */ |
5892 | int nmap; /* number of map entries */ | 5892 | int nmap; /* number of map entries */ |
5893 | struct getbmapx out; /* output structure */ | 5893 | struct getbmapx *out; /* output structure */ |
5894 | int whichfork; /* data or attr fork */ | 5894 | int whichfork; /* data or attr fork */ |
5895 | int prealloced; /* this is a file with | 5895 | int prealloced; /* this is a file with |
5896 | * preallocated data space */ | 5896 | * preallocated data space */ |
5897 | int iflags; /* interface flags */ | 5897 | int iflags; /* interface flags */ |
5898 | int bmapi_flags; /* flags for xfs_bmapi */ | 5898 | int bmapi_flags; /* flags for xfs_bmapi */ |
5899 | int cur_ext = 0; | ||
5899 | 5900 | ||
5900 | mp = ip->i_mount; | 5901 | mp = ip->i_mount; |
5901 | iflags = bmv->bmv_iflags; | 5902 | iflags = bmv->bmv_iflags; |
5902 | |||
5903 | whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; | 5903 | whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK; |
5904 | 5904 | ||
5905 | /* If the BMV_IF_NO_DMAPI_READ interface bit specified, do not | ||
5906 | * generate a DMAPI read event. Otherwise, if the DM_EVENT_READ | ||
5907 | * bit is set for the file, generate a read event in order | ||
5908 | * that the DMAPI application may do its thing before we return | ||
5909 | * the extents. Usually this means restoring user file data to | ||
5910 | * regions of the file that look like holes. | ||
5911 | * | ||
5912 | * The "old behavior" (from XFS_IOC_GETBMAP) is to not specify | ||
5913 | * BMV_IF_NO_DMAPI_READ so that read events are generated. | ||
5914 | * If this were not true, callers of ioctl( XFS_IOC_GETBMAP ) | ||
5915 | * could misinterpret holes in a DMAPI file as true holes, | ||
5916 | * when in fact they may represent offline user data. | ||
5917 | */ | ||
5918 | if ((iflags & BMV_IF_NO_DMAPI_READ) == 0 && | ||
5919 | DM_EVENT_ENABLED(ip, DM_EVENT_READ) && | ||
5920 | whichfork == XFS_DATA_FORK) { | ||
5921 | error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL); | ||
5922 | if (error) | ||
5923 | return XFS_ERROR(error); | ||
5924 | } | ||
5925 | |||
5926 | if (whichfork == XFS_ATTR_FORK) { | 5905 | if (whichfork == XFS_ATTR_FORK) { |
5927 | if (XFS_IFORK_Q(ip)) { | 5906 | if (XFS_IFORK_Q(ip)) { |
5928 | if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && | 5907 | if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS && |
@@ -5936,11 +5915,37 @@ xfs_getbmap( | |||
5936 | ip->i_mount); | 5915 | ip->i_mount); |
5937 | return XFS_ERROR(EFSCORRUPTED); | 5916 | return XFS_ERROR(EFSCORRUPTED); |
5938 | } | 5917 | } |
5939 | } else if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && | 5918 | |
5940 | ip->i_d.di_format != XFS_DINODE_FMT_BTREE && | 5919 | prealloced = 0; |
5941 | ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) | 5920 | fixlen = 1LL << 32; |
5942 | return XFS_ERROR(EINVAL); | 5921 | } else { |
5943 | if (whichfork == XFS_DATA_FORK) { | 5922 | /* |
5923 | * If the BMV_IF_NO_DMAPI_READ interface bit specified, do | ||
5924 | * not generate a DMAPI read event. Otherwise, if the | ||
5925 | * DM_EVENT_READ bit is set for the file, generate a read | ||
5926 | * event in order that the DMAPI application may do its thing | ||
5927 | * before we return the extents. Usually this means restoring | ||
5928 | * user file data to regions of the file that look like holes. | ||
5929 | * | ||
5930 | * The "old behavior" (from XFS_IOC_GETBMAP) is to not specify | ||
5931 | * BMV_IF_NO_DMAPI_READ so that read events are generated. | ||
5932 | * If this were not true, callers of ioctl(XFS_IOC_GETBMAP) | ||
5933 | * could misinterpret holes in a DMAPI file as true holes, | ||
5934 | * when in fact they may represent offline user data. | ||
5935 | */ | ||
5936 | if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && | ||
5937 | !(iflags & BMV_IF_NO_DMAPI_READ)) { | ||
5938 | error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, | ||
5939 | 0, 0, 0, NULL); | ||
5940 | if (error) | ||
5941 | return XFS_ERROR(error); | ||
5942 | } | ||
5943 | |||
5944 | if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS && | ||
5945 | ip->i_d.di_format != XFS_DINODE_FMT_BTREE && | ||
5946 | ip->i_d.di_format != XFS_DINODE_FMT_LOCAL) | ||
5947 | return XFS_ERROR(EINVAL); | ||
5948 | |||
5944 | if (xfs_get_extsz_hint(ip) || | 5949 | if (xfs_get_extsz_hint(ip) || |
5945 | ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ | 5950 | ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){ |
5946 | prealloced = 1; | 5951 | prealloced = 1; |
@@ -5949,42 +5954,41 @@ xfs_getbmap( | |||
5949 | prealloced = 0; | 5954 | prealloced = 0; |
5950 | fixlen = ip->i_size; | 5955 | fixlen = ip->i_size; |
5951 | } | 5956 | } |
5952 | } else { | ||
5953 | prealloced = 0; | ||
5954 | fixlen = 1LL << 32; | ||
5955 | } | 5957 | } |
5956 | 5958 | ||
5957 | if (bmv->bmv_length == -1) { | 5959 | if (bmv->bmv_length == -1) { |
5958 | fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); | 5960 | fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen)); |
5959 | bmv->bmv_length = MAX( (__int64_t)(fixlen - bmv->bmv_offset), | 5961 | bmv->bmv_length = |
5960 | (__int64_t)0); | 5962 | max_t(__int64_t, fixlen - bmv->bmv_offset, 0); |
5961 | } else if (bmv->bmv_length < 0) | 5963 | } else if (bmv->bmv_length == 0) { |
5962 | return XFS_ERROR(EINVAL); | ||
5963 | if (bmv->bmv_length == 0) { | ||
5964 | bmv->bmv_entries = 0; | 5964 | bmv->bmv_entries = 0; |
5965 | return 0; | 5965 | return 0; |
5966 | } else if (bmv->bmv_length < 0) { | ||
5967 | return XFS_ERROR(EINVAL); | ||
5966 | } | 5968 | } |
5969 | |||
5967 | nex = bmv->bmv_count - 1; | 5970 | nex = bmv->bmv_count - 1; |
5968 | if (nex <= 0) | 5971 | if (nex <= 0) |
5969 | return XFS_ERROR(EINVAL); | 5972 | return XFS_ERROR(EINVAL); |
5970 | bmvend = bmv->bmv_offset + bmv->bmv_length; | 5973 | bmvend = bmv->bmv_offset + bmv->bmv_length; |
5971 | 5974 | ||
5972 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | ||
5973 | 5975 | ||
5974 | if (((iflags & BMV_IF_DELALLOC) == 0) && | 5976 | if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx)) |
5975 | (whichfork == XFS_DATA_FORK) && | 5977 | return XFS_ERROR(ENOMEM); |
5976 | (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size)) { | 5978 | out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL); |
5977 | /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ | 5979 | if (!out) |
5978 | error = xfs_flush_pages(ip, (xfs_off_t)0, | 5980 | return XFS_ERROR(ENOMEM); |
5979 | -1, 0, FI_REMAPF); | 5981 | |
5980 | if (error) { | 5982 | xfs_ilock(ip, XFS_IOLOCK_SHARED); |
5981 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 5983 | if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { |
5982 | return error; | 5984 | if (ip->i_delayed_blks || ip->i_size > ip->i_d.di_size) { |
5985 | error = xfs_flush_pages(ip, 0, -1, 0, FI_REMAPF); | ||
5986 | if (error) | ||
5987 | goto out_unlock_iolock; | ||
5983 | } | 5988 | } |
5984 | } | ||
5985 | 5989 | ||
5986 | ASSERT(whichfork == XFS_ATTR_FORK || (iflags & BMV_IF_DELALLOC) || | 5990 | ASSERT(ip->i_delayed_blks == 0); |
5987 | ip->i_delayed_blks == 0); | 5991 | } |
5988 | 5992 | ||
5989 | lock = xfs_ilock_map_shared(ip); | 5993 | lock = xfs_ilock_map_shared(ip); |
5990 | 5994 | ||
@@ -5995,23 +5999,25 @@ xfs_getbmap( | |||
5995 | if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) | 5999 | if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1) |
5996 | nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; | 6000 | nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1; |
5997 | 6001 | ||
5998 | bmapi_flags = xfs_bmapi_aflag(whichfork) | | 6002 | bmapi_flags = xfs_bmapi_aflag(whichfork); |
5999 | ((iflags & BMV_IF_PREALLOC) ? 0 : XFS_BMAPI_IGSTATE); | 6003 | if (!(iflags & BMV_IF_PREALLOC)) |
6004 | bmapi_flags |= XFS_BMAPI_IGSTATE; | ||
6000 | 6005 | ||
6001 | /* | 6006 | /* |
6002 | * Allocate enough space to handle "subnex" maps at a time. | 6007 | * Allocate enough space to handle "subnex" maps at a time. |
6003 | */ | 6008 | */ |
6009 | error = ENOMEM; | ||
6004 | subnex = 16; | 6010 | subnex = 16; |
6005 | map = kmem_alloc(subnex * sizeof(*map), KM_SLEEP); | 6011 | map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL); |
6012 | if (!map) | ||
6013 | goto out_unlock_ilock; | ||
6006 | 6014 | ||
6007 | bmv->bmv_entries = 0; | 6015 | bmv->bmv_entries = 0; |
6008 | 6016 | ||
6009 | if ((XFS_IFORK_NEXTENTS(ip, whichfork) == 0)) { | 6017 | if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 && |
6010 | if (((iflags & BMV_IF_DELALLOC) == 0) || | 6018 | (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) { |
6011 | whichfork == XFS_ATTR_FORK) { | 6019 | error = 0; |
6012 | error = 0; | 6020 | goto out_free_map; |
6013 | goto unlock_and_return; | ||
6014 | } | ||
6015 | } | 6021 | } |
6016 | 6022 | ||
6017 | nexleft = nex; | 6023 | nexleft = nex; |
@@ -6023,53 +6029,61 @@ xfs_getbmap( | |||
6023 | bmapi_flags, NULL, 0, map, &nmap, | 6029 | bmapi_flags, NULL, 0, map, &nmap, |
6024 | NULL, NULL); | 6030 | NULL, NULL); |
6025 | if (error) | 6031 | if (error) |
6026 | goto unlock_and_return; | 6032 | goto out_free_map; |
6027 | ASSERT(nmap <= subnex); | 6033 | ASSERT(nmap <= subnex); |
6028 | 6034 | ||
6029 | for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { | 6035 | for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) { |
6030 | out.bmv_oflags = 0; | 6036 | out[cur_ext].bmv_oflags = 0; |
6031 | if (map[i].br_state == XFS_EXT_UNWRITTEN) | 6037 | if (map[i].br_state == XFS_EXT_UNWRITTEN) |
6032 | out.bmv_oflags |= BMV_OF_PREALLOC; | 6038 | out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; |
6033 | else if (map[i].br_startblock == DELAYSTARTBLOCK) | 6039 | else if (map[i].br_startblock == DELAYSTARTBLOCK) |
6034 | out.bmv_oflags |= BMV_OF_DELALLOC; | 6040 | out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC; |
6035 | out.bmv_offset = XFS_FSB_TO_BB(mp, map[i].br_startoff); | 6041 | out[cur_ext].bmv_offset = |
6036 | out.bmv_length = XFS_FSB_TO_BB(mp, map[i].br_blockcount); | 6042 | XFS_FSB_TO_BB(mp, map[i].br_startoff); |
6037 | out.bmv_unused1 = out.bmv_unused2 = 0; | 6043 | out[cur_ext].bmv_length = |
6044 | XFS_FSB_TO_BB(mp, map[i].br_blockcount); | ||
6045 | out[cur_ext].bmv_unused1 = 0; | ||
6046 | out[cur_ext].bmv_unused2 = 0; | ||
6038 | ASSERT(((iflags & BMV_IF_DELALLOC) != 0) || | 6047 | ASSERT(((iflags & BMV_IF_DELALLOC) != 0) || |
6039 | (map[i].br_startblock != DELAYSTARTBLOCK)); | 6048 | (map[i].br_startblock != DELAYSTARTBLOCK)); |
6040 | if (map[i].br_startblock == HOLESTARTBLOCK && | 6049 | if (map[i].br_startblock == HOLESTARTBLOCK && |
6041 | whichfork == XFS_ATTR_FORK) { | 6050 | whichfork == XFS_ATTR_FORK) { |
6042 | /* came to the end of attribute fork */ | 6051 | /* came to the end of attribute fork */ |
6043 | out.bmv_oflags |= BMV_OF_LAST; | 6052 | out[cur_ext].bmv_oflags |= BMV_OF_LAST; |
6044 | goto unlock_and_return; | 6053 | goto out_free_map; |
6045 | } else { | ||
6046 | int full = 0; /* user array is full */ | ||
6047 | |||
6048 | if (!xfs_getbmapx_fix_eof_hole(ip, &out, | ||
6049 | prealloced, bmvend, | ||
6050 | map[i].br_startblock)) { | ||
6051 | goto unlock_and_return; | ||
6052 | } | ||
6053 | |||
6054 | /* format results & advance arg */ | ||
6055 | error = formatter(&arg, &out, &full); | ||
6056 | if (error || full) | ||
6057 | goto unlock_and_return; | ||
6058 | nexleft--; | ||
6059 | bmv->bmv_offset = | ||
6060 | out.bmv_offset + out.bmv_length; | ||
6061 | bmv->bmv_length = MAX((__int64_t)0, | ||
6062 | (__int64_t)(bmvend - bmv->bmv_offset)); | ||
6063 | bmv->bmv_entries++; | ||
6064 | } | 6054 | } |
6055 | |||
6056 | if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext], | ||
6057 | prealloced, bmvend, | ||
6058 | map[i].br_startblock)) | ||
6059 | goto out_free_map; | ||
6060 | |||
6061 | nexleft--; | ||
6062 | bmv->bmv_offset = | ||
6063 | out[cur_ext].bmv_offset + | ||
6064 | out[cur_ext].bmv_length; | ||
6065 | bmv->bmv_length = | ||
6066 | max_t(__int64_t, 0, bmvend - bmv->bmv_offset); | ||
6067 | bmv->bmv_entries++; | ||
6068 | cur_ext++; | ||
6065 | } | 6069 | } |
6066 | } while (nmap && nexleft && bmv->bmv_length); | 6070 | } while (nmap && nexleft && bmv->bmv_length); |
6067 | 6071 | ||
6068 | unlock_and_return: | 6072 | out_free_map: |
6073 | kmem_free(map); | ||
6074 | out_unlock_ilock: | ||
6069 | xfs_iunlock_map_shared(ip, lock); | 6075 | xfs_iunlock_map_shared(ip, lock); |
6076 | out_unlock_iolock: | ||
6070 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 6077 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); |
6071 | 6078 | ||
6072 | kmem_free(map); | 6079 | for (i = 0; i < cur_ext; i++) { |
6080 | int full = 0; /* user array is full */ | ||
6081 | |||
6082 | /* format results & advance arg */ | ||
6083 | error = formatter(&arg, &out[i], &full); | ||
6084 | if (error || full) | ||
6085 | break; | ||
6086 | } | ||
6073 | 6087 | ||
6074 | return error; | 6088 | return error; |
6075 | } | 6089 | } |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 478e587087fe..89b81eedce6a 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -69,15 +69,6 @@ xfs_inode_alloc( | |||
69 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 69 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
70 | ASSERT(completion_done(&ip->i_flush)); | 70 | ASSERT(completion_done(&ip->i_flush)); |
71 | 71 | ||
72 | /* | ||
73 | * initialise the VFS inode here to get failures | ||
74 | * out of the way early. | ||
75 | */ | ||
76 | if (!inode_init_always(mp->m_super, VFS_I(ip))) { | ||
77 | kmem_zone_free(xfs_inode_zone, ip); | ||
78 | return NULL; | ||
79 | } | ||
80 | |||
81 | /* initialise the xfs inode */ | 72 | /* initialise the xfs inode */ |
82 | ip->i_ino = ino; | 73 | ip->i_ino = ino; |
83 | ip->i_mount = mp; | 74 | ip->i_mount = mp; |
@@ -113,6 +104,20 @@ xfs_inode_alloc( | |||
113 | #ifdef XFS_DIR2_TRACE | 104 | #ifdef XFS_DIR2_TRACE |
114 | ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); | 105 | ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); |
115 | #endif | 106 | #endif |
107 | /* | ||
108 | * Now initialise the VFS inode. We do this after the xfs_inode | ||
109 | * initialisation as internal failures will result in ->destroy_inode | ||
110 | * being called and that will pass down through the reclaim path and | ||
111 | * free the XFS inode. This path requires the XFS inode to already be | ||
112 | * initialised. Hence if this call fails, the xfs_inode has already | ||
113 | * been freed and we should not reference it at all in the error | ||
114 | * handling. | ||
115 | */ | ||
116 | if (!inode_init_always(mp->m_super, VFS_I(ip))) | ||
117 | return NULL; | ||
118 | |||
119 | /* prevent anyone from using this yet */ | ||
120 | VFS_I(ip)->i_state = I_NEW|I_LOCK; | ||
116 | 121 | ||
117 | return ip; | 122 | return ip; |
118 | } | 123 | } |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index e7ae08d1df48..123b20c8cbf2 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -1258,8 +1258,10 @@ xfs_file_last_byte( | |||
1258 | * necessary. | 1258 | * necessary. |
1259 | */ | 1259 | */ |
1260 | if (ip->i_df.if_flags & XFS_IFEXTENTS) { | 1260 | if (ip->i_df.if_flags & XFS_IFEXTENTS) { |
1261 | xfs_ilock(ip, XFS_ILOCK_SHARED); | ||
1261 | error = xfs_bmap_last_offset(NULL, ip, &last_block, | 1262 | error = xfs_bmap_last_offset(NULL, ip, &last_block, |
1262 | XFS_DATA_FORK); | 1263 | XFS_DATA_FORK); |
1264 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | ||
1263 | if (error) { | 1265 | if (error) { |
1264 | last_block = 0; | 1266 | last_block = 0; |
1265 | } | 1267 | } |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 08ce72316bfe..5aaa2d7ec155 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -338,38 +338,6 @@ xfs_iomap_eof_align_last_fsb( | |||
338 | } | 338 | } |
339 | 339 | ||
340 | STATIC int | 340 | STATIC int |
341 | xfs_flush_space( | ||
342 | xfs_inode_t *ip, | ||
343 | int *fsynced, | ||
344 | int *ioflags) | ||
345 | { | ||
346 | switch (*fsynced) { | ||
347 | case 0: | ||
348 | if (ip->i_delayed_blks) { | ||
349 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
350 | xfs_flush_inode(ip); | ||
351 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
352 | *fsynced = 1; | ||
353 | } else { | ||
354 | *ioflags |= BMAPI_SYNC; | ||
355 | *fsynced = 2; | ||
356 | } | ||
357 | return 0; | ||
358 | case 1: | ||
359 | *fsynced = 2; | ||
360 | *ioflags |= BMAPI_SYNC; | ||
361 | return 0; | ||
362 | case 2: | ||
363 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
364 | xfs_flush_device(ip); | ||
365 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
366 | *fsynced = 3; | ||
367 | return 0; | ||
368 | } | ||
369 | return 1; | ||
370 | } | ||
371 | |||
372 | STATIC int | ||
373 | xfs_cmn_err_fsblock_zero( | 341 | xfs_cmn_err_fsblock_zero( |
374 | xfs_inode_t *ip, | 342 | xfs_inode_t *ip, |
375 | xfs_bmbt_irec_t *imap) | 343 | xfs_bmbt_irec_t *imap) |
@@ -538,15 +506,9 @@ error_out: | |||
538 | } | 506 | } |
539 | 507 | ||
540 | /* | 508 | /* |
541 | * If the caller is doing a write at the end of the file, | 509 | * If the caller is doing a write at the end of the file, then extend the |
542 | * then extend the allocation out to the file system's write | 510 | * allocation out to the file system's write iosize. We clean up any extra |
543 | * iosize. We clean up any extra space left over when the | 511 | * space left over when the file is closed in xfs_inactive(). |
544 | * file is closed in xfs_inactive(). | ||
545 | * | ||
546 | * For sync writes, we are flushing delayed allocate space to | ||
547 | * try to make additional space available for allocation near | ||
548 | * the filesystem full boundary - preallocation hurts in that | ||
549 | * situation, of course. | ||
550 | */ | 512 | */ |
551 | STATIC int | 513 | STATIC int |
552 | xfs_iomap_eof_want_preallocate( | 514 | xfs_iomap_eof_want_preallocate( |
@@ -565,7 +527,7 @@ xfs_iomap_eof_want_preallocate( | |||
565 | int n, error, imaps; | 527 | int n, error, imaps; |
566 | 528 | ||
567 | *prealloc = 0; | 529 | *prealloc = 0; |
568 | if ((ioflag & BMAPI_SYNC) || (offset + count) <= ip->i_size) | 530 | if ((offset + count) <= ip->i_size) |
569 | return 0; | 531 | return 0; |
570 | 532 | ||
571 | /* | 533 | /* |
@@ -611,7 +573,7 @@ xfs_iomap_write_delay( | |||
611 | xfs_extlen_t extsz; | 573 | xfs_extlen_t extsz; |
612 | int nimaps; | 574 | int nimaps; |
613 | xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; | 575 | xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; |
614 | int prealloc, fsynced = 0; | 576 | int prealloc, flushed = 0; |
615 | int error; | 577 | int error; |
616 | 578 | ||
617 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | 579 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); |
@@ -627,12 +589,12 @@ xfs_iomap_write_delay( | |||
627 | extsz = xfs_get_extsz_hint(ip); | 589 | extsz = xfs_get_extsz_hint(ip); |
628 | offset_fsb = XFS_B_TO_FSBT(mp, offset); | 590 | offset_fsb = XFS_B_TO_FSBT(mp, offset); |
629 | 591 | ||
630 | retry: | ||
631 | error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, | 592 | error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, |
632 | ioflag, imap, XFS_WRITE_IMAPS, &prealloc); | 593 | ioflag, imap, XFS_WRITE_IMAPS, &prealloc); |
633 | if (error) | 594 | if (error) |
634 | return error; | 595 | return error; |
635 | 596 | ||
597 | retry: | ||
636 | if (prealloc) { | 598 | if (prealloc) { |
637 | aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); | 599 | aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); |
638 | ioalign = XFS_B_TO_FSBT(mp, aligned_offset); | 600 | ioalign = XFS_B_TO_FSBT(mp, aligned_offset); |
@@ -659,15 +621,22 @@ retry: | |||
659 | 621 | ||
660 | /* | 622 | /* |
661 | * If bmapi returned us nothing, and if we didn't get back EDQUOT, | 623 | * If bmapi returned us nothing, and if we didn't get back EDQUOT, |
662 | * then we must have run out of space - flush delalloc, and retry.. | 624 | * then we must have run out of space - flush all other inodes with |
625 | * delalloc blocks and retry without EOF preallocation. | ||
663 | */ | 626 | */ |
664 | if (nimaps == 0) { | 627 | if (nimaps == 0) { |
665 | xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, | 628 | xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, |
666 | ip, offset, count); | 629 | ip, offset, count); |
667 | if (xfs_flush_space(ip, &fsynced, &ioflag)) | 630 | if (flushed) |
668 | return XFS_ERROR(ENOSPC); | 631 | return XFS_ERROR(ENOSPC); |
669 | 632 | ||
633 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
634 | xfs_flush_inodes(ip); | ||
635 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
636 | |||
637 | flushed = 1; | ||
670 | error = 0; | 638 | error = 0; |
639 | prealloc = 0; | ||
671 | goto retry; | 640 | goto retry; |
672 | } | 641 | } |
673 | 642 | ||
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index a1cc1322fc0f..fdcf7b82747f 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h | |||
@@ -40,8 +40,7 @@ typedef enum { | |||
40 | BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */ | 40 | BMAPI_IGNSTATE = (1 << 4), /* ignore unwritten state on read */ |
41 | BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */ | 41 | BMAPI_DIRECT = (1 << 5), /* direct instead of buffered write */ |
42 | BMAPI_MMAP = (1 << 6), /* allocate for mmap write */ | 42 | BMAPI_MMAP = (1 << 6), /* allocate for mmap write */ |
43 | BMAPI_SYNC = (1 << 7), /* sync write to flush delalloc space */ | 43 | BMAPI_TRYLOCK = (1 << 7), /* non-blocking request */ |
44 | BMAPI_TRYLOCK = (1 << 8), /* non-blocking request */ | ||
45 | } bmapi_flags_t; | 44 | } bmapi_flags_t; |
46 | 45 | ||
47 | 46 | ||
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index f76c6d7cea21..3750f04ede0b 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -562,9 +562,8 @@ xfs_log_mount( | |||
562 | } | 562 | } |
563 | 563 | ||
564 | mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); | 564 | mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks); |
565 | if (!mp->m_log) { | 565 | if (IS_ERR(mp->m_log)) { |
566 | cmn_err(CE_WARN, "XFS: Log allocation failed: No memory!"); | 566 | error = -PTR_ERR(mp->m_log); |
567 | error = ENOMEM; | ||
568 | goto out; | 567 | goto out; |
569 | } | 568 | } |
570 | 569 | ||
@@ -1180,10 +1179,13 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1180 | xfs_buf_t *bp; | 1179 | xfs_buf_t *bp; |
1181 | int i; | 1180 | int i; |
1182 | int iclogsize; | 1181 | int iclogsize; |
1182 | int error = ENOMEM; | ||
1183 | 1183 | ||
1184 | log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); | 1184 | log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); |
1185 | if (!log) | 1185 | if (!log) { |
1186 | return NULL; | 1186 | xlog_warn("XFS: Log allocation failed: No memory!"); |
1187 | goto out; | ||
1188 | } | ||
1187 | 1189 | ||
1188 | log->l_mp = mp; | 1190 | log->l_mp = mp; |
1189 | log->l_targ = log_target; | 1191 | log->l_targ = log_target; |
@@ -1201,19 +1203,35 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1201 | log->l_grant_reserve_cycle = 1; | 1203 | log->l_grant_reserve_cycle = 1; |
1202 | log->l_grant_write_cycle = 1; | 1204 | log->l_grant_write_cycle = 1; |
1203 | 1205 | ||
1206 | error = EFSCORRUPTED; | ||
1204 | if (xfs_sb_version_hassector(&mp->m_sb)) { | 1207 | if (xfs_sb_version_hassector(&mp->m_sb)) { |
1205 | log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT; | 1208 | log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT; |
1206 | ASSERT(log->l_sectbb_log <= mp->m_sectbb_log); | 1209 | if (log->l_sectbb_log < 0 || |
1210 | log->l_sectbb_log > mp->m_sectbb_log) { | ||
1211 | xlog_warn("XFS: Log sector size (0x%x) out of range.", | ||
1212 | log->l_sectbb_log); | ||
1213 | goto out_free_log; | ||
1214 | } | ||
1215 | |||
1207 | /* for larger sector sizes, must have v2 or external log */ | 1216 | /* for larger sector sizes, must have v2 or external log */ |
1208 | ASSERT(log->l_sectbb_log == 0 || | 1217 | if (log->l_sectbb_log != 0 && |
1209 | log->l_logBBstart == 0 || | 1218 | (log->l_logBBstart != 0 && |
1210 | xfs_sb_version_haslogv2(&mp->m_sb)); | 1219 | !xfs_sb_version_haslogv2(&mp->m_sb))) { |
1211 | ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT); | 1220 | xlog_warn("XFS: log sector size (0x%x) invalid " |
1221 | "for configuration.", log->l_sectbb_log); | ||
1222 | goto out_free_log; | ||
1223 | } | ||
1224 | if (mp->m_sb.sb_logsectlog < BBSHIFT) { | ||
1225 | xlog_warn("XFS: Log sector log (0x%x) too small.", | ||
1226 | mp->m_sb.sb_logsectlog); | ||
1227 | goto out_free_log; | ||
1228 | } | ||
1212 | } | 1229 | } |
1213 | log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1; | 1230 | log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1; |
1214 | 1231 | ||
1215 | xlog_get_iclog_buffer_size(mp, log); | 1232 | xlog_get_iclog_buffer_size(mp, log); |
1216 | 1233 | ||
1234 | error = ENOMEM; | ||
1217 | bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); | 1235 | bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); |
1218 | if (!bp) | 1236 | if (!bp) |
1219 | goto out_free_log; | 1237 | goto out_free_log; |
@@ -1313,7 +1331,8 @@ out_free_iclog: | |||
1313 | xfs_buf_free(log->l_xbuf); | 1331 | xfs_buf_free(log->l_xbuf); |
1314 | out_free_log: | 1332 | out_free_log: |
1315 | kmem_free(log); | 1333 | kmem_free(log); |
1316 | return NULL; | 1334 | out: |
1335 | return ERR_PTR(-error); | ||
1317 | } /* xlog_alloc_log */ | 1336 | } /* xlog_alloc_log */ |
1318 | 1337 | ||
1319 | 1338 | ||
@@ -2541,18 +2560,19 @@ redo: | |||
2541 | xlog_ins_ticketq(&log->l_reserve_headq, tic); | 2560 | xlog_ins_ticketq(&log->l_reserve_headq, tic); |
2542 | xlog_trace_loggrant(log, tic, | 2561 | xlog_trace_loggrant(log, tic, |
2543 | "xlog_grant_log_space: sleep 2"); | 2562 | "xlog_grant_log_space: sleep 2"); |
2563 | spin_unlock(&log->l_grant_lock); | ||
2564 | xlog_grant_push_ail(log->l_mp, need_bytes); | ||
2565 | spin_lock(&log->l_grant_lock); | ||
2566 | |||
2544 | XFS_STATS_INC(xs_sleep_logspace); | 2567 | XFS_STATS_INC(xs_sleep_logspace); |
2545 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); | 2568 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); |
2546 | 2569 | ||
2547 | if (XLOG_FORCED_SHUTDOWN(log)) { | 2570 | spin_lock(&log->l_grant_lock); |
2548 | spin_lock(&log->l_grant_lock); | 2571 | if (XLOG_FORCED_SHUTDOWN(log)) |
2549 | goto error_return; | 2572 | goto error_return; |
2550 | } | ||
2551 | 2573 | ||
2552 | xlog_trace_loggrant(log, tic, | 2574 | xlog_trace_loggrant(log, tic, |
2553 | "xlog_grant_log_space: wake 2"); | 2575 | "xlog_grant_log_space: wake 2"); |
2554 | xlog_grant_push_ail(log->l_mp, need_bytes); | ||
2555 | spin_lock(&log->l_grant_lock); | ||
2556 | goto redo; | 2576 | goto redo; |
2557 | } else if (tic->t_flags & XLOG_TIC_IN_Q) | 2577 | } else if (tic->t_flags & XLOG_TIC_IN_Q) |
2558 | xlog_del_ticketq(&log->l_reserve_headq, tic); | 2578 | xlog_del_ticketq(&log->l_reserve_headq, tic); |
@@ -2631,7 +2651,7 @@ xlog_regrant_write_log_space(xlog_t *log, | |||
2631 | * for more free space, otherwise try to get some space for | 2651 | * for more free space, otherwise try to get some space for |
2632 | * this transaction. | 2652 | * this transaction. |
2633 | */ | 2653 | */ |
2634 | 2654 | need_bytes = tic->t_unit_res; | |
2635 | if ((ntic = log->l_write_headq)) { | 2655 | if ((ntic = log->l_write_headq)) { |
2636 | free_bytes = xlog_space_left(log, log->l_grant_write_cycle, | 2656 | free_bytes = xlog_space_left(log, log->l_grant_write_cycle, |
2637 | log->l_grant_write_bytes); | 2657 | log->l_grant_write_bytes); |
@@ -2651,26 +2671,25 @@ xlog_regrant_write_log_space(xlog_t *log, | |||
2651 | 2671 | ||
2652 | xlog_trace_loggrant(log, tic, | 2672 | xlog_trace_loggrant(log, tic, |
2653 | "xlog_regrant_write_log_space: sleep 1"); | 2673 | "xlog_regrant_write_log_space: sleep 1"); |
2674 | spin_unlock(&log->l_grant_lock); | ||
2675 | xlog_grant_push_ail(log->l_mp, need_bytes); | ||
2676 | spin_lock(&log->l_grant_lock); | ||
2677 | |||
2654 | XFS_STATS_INC(xs_sleep_logspace); | 2678 | XFS_STATS_INC(xs_sleep_logspace); |
2655 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, | 2679 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, |
2656 | &log->l_grant_lock, s); | 2680 | &log->l_grant_lock, s); |
2657 | 2681 | ||
2658 | /* If we're shutting down, this tic is already | 2682 | /* If we're shutting down, this tic is already |
2659 | * off the queue */ | 2683 | * off the queue */ |
2660 | if (XLOG_FORCED_SHUTDOWN(log)) { | 2684 | spin_lock(&log->l_grant_lock); |
2661 | spin_lock(&log->l_grant_lock); | 2685 | if (XLOG_FORCED_SHUTDOWN(log)) |
2662 | goto error_return; | 2686 | goto error_return; |
2663 | } | ||
2664 | 2687 | ||
2665 | xlog_trace_loggrant(log, tic, | 2688 | xlog_trace_loggrant(log, tic, |
2666 | "xlog_regrant_write_log_space: wake 1"); | 2689 | "xlog_regrant_write_log_space: wake 1"); |
2667 | xlog_grant_push_ail(log->l_mp, tic->t_unit_res); | ||
2668 | spin_lock(&log->l_grant_lock); | ||
2669 | } | 2690 | } |
2670 | } | 2691 | } |
2671 | 2692 | ||
2672 | need_bytes = tic->t_unit_res; | ||
2673 | |||
2674 | redo: | 2693 | redo: |
2675 | if (XLOG_FORCED_SHUTDOWN(log)) | 2694 | if (XLOG_FORCED_SHUTDOWN(log)) |
2676 | goto error_return; | 2695 | goto error_return; |
@@ -2680,19 +2699,20 @@ redo: | |||
2680 | if (free_bytes < need_bytes) { | 2699 | if (free_bytes < need_bytes) { |
2681 | if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) | 2700 | if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) |
2682 | xlog_ins_ticketq(&log->l_write_headq, tic); | 2701 | xlog_ins_ticketq(&log->l_write_headq, tic); |
2702 | spin_unlock(&log->l_grant_lock); | ||
2703 | xlog_grant_push_ail(log->l_mp, need_bytes); | ||
2704 | spin_lock(&log->l_grant_lock); | ||
2705 | |||
2683 | XFS_STATS_INC(xs_sleep_logspace); | 2706 | XFS_STATS_INC(xs_sleep_logspace); |
2684 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); | 2707 | sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); |
2685 | 2708 | ||
2686 | /* If we're shutting down, this tic is already off the queue */ | 2709 | /* If we're shutting down, this tic is already off the queue */ |
2687 | if (XLOG_FORCED_SHUTDOWN(log)) { | 2710 | spin_lock(&log->l_grant_lock); |
2688 | spin_lock(&log->l_grant_lock); | 2711 | if (XLOG_FORCED_SHUTDOWN(log)) |
2689 | goto error_return; | 2712 | goto error_return; |
2690 | } | ||
2691 | 2713 | ||
2692 | xlog_trace_loggrant(log, tic, | 2714 | xlog_trace_loggrant(log, tic, |
2693 | "xlog_regrant_write_log_space: wake 2"); | 2715 | "xlog_regrant_write_log_space: wake 2"); |
2694 | xlog_grant_push_ail(log->l_mp, need_bytes); | ||
2695 | spin_lock(&log->l_grant_lock); | ||
2696 | goto redo; | 2716 | goto redo; |
2697 | } else if (tic->t_flags & XLOG_TIC_IN_Q) | 2717 | } else if (tic->t_flags & XLOG_TIC_IN_Q) |
2698 | xlog_del_ticketq(&log->l_write_headq, tic); | 2718 | xlog_del_ticketq(&log->l_write_headq, tic); |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index b101990df027..65a99725d0cc 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -291,14 +291,17 @@ xfs_mount_validate_sb( | |||
291 | sbp->sb_sectsize > XFS_MAX_SECTORSIZE || | 291 | sbp->sb_sectsize > XFS_MAX_SECTORSIZE || |
292 | sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG || | 292 | sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG || |
293 | sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG || | 293 | sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG || |
294 | sbp->sb_sectsize != (1 << sbp->sb_sectlog) || | ||
294 | sbp->sb_blocksize < XFS_MIN_BLOCKSIZE || | 295 | sbp->sb_blocksize < XFS_MIN_BLOCKSIZE || |
295 | sbp->sb_blocksize > XFS_MAX_BLOCKSIZE || | 296 | sbp->sb_blocksize > XFS_MAX_BLOCKSIZE || |
296 | sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || | 297 | sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || |
297 | sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || | 298 | sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || |
299 | sbp->sb_blocksize != (1 << sbp->sb_blocklog) || | ||
298 | sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || | 300 | sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || |
299 | sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || | 301 | sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || |
300 | sbp->sb_inodelog < XFS_DINODE_MIN_LOG || | 302 | sbp->sb_inodelog < XFS_DINODE_MIN_LOG || |
301 | sbp->sb_inodelog > XFS_DINODE_MAX_LOG || | 303 | sbp->sb_inodelog > XFS_DINODE_MAX_LOG || |
304 | sbp->sb_inodesize != (1 << sbp->sb_inodelog) || | ||
302 | (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || | 305 | (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || |
303 | (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || | 306 | (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || |
304 | (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || | 307 | (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 7af44adffc8f..d6a64392f983 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -313,7 +313,7 @@ typedef struct xfs_mount { | |||
313 | #endif | 313 | #endif |
314 | struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ | 314 | struct xfs_mru_cache *m_filestream; /* per-mount filestream data */ |
315 | struct task_struct *m_sync_task; /* generalised sync thread */ | 315 | struct task_struct *m_sync_task; /* generalised sync thread */ |
316 | bhv_vfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ | 316 | xfs_sync_work_t m_sync_work; /* work item for VFS_SYNC */ |
317 | struct list_head m_sync_list; /* sync thread work item list */ | 317 | struct list_head m_sync_list; /* sync thread work item list */ |
318 | spinlock_t m_sync_lock; /* work item list lock */ | 318 | spinlock_t m_sync_lock; /* work item list lock */ |
319 | int m_sync_seq; /* sync thread generation no. */ | 319 | int m_sync_seq; /* sync thread generation no. */ |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 7394c7af5de5..19cf90a9c762 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -1457,6 +1457,13 @@ xfs_create( | |||
1457 | error = xfs_trans_reserve(tp, resblks, log_res, 0, | 1457 | error = xfs_trans_reserve(tp, resblks, log_res, 0, |
1458 | XFS_TRANS_PERM_LOG_RES, log_count); | 1458 | XFS_TRANS_PERM_LOG_RES, log_count); |
1459 | if (error == ENOSPC) { | 1459 | if (error == ENOSPC) { |
1460 | /* flush outstanding delalloc blocks and retry */ | ||
1461 | xfs_flush_inodes(dp); | ||
1462 | error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0, | ||
1463 | XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT); | ||
1464 | } | ||
1465 | if (error == ENOSPC) { | ||
1466 | /* No space at all so try a "no-allocation" reservation */ | ||
1460 | resblks = 0; | 1467 | resblks = 0; |
1461 | error = xfs_trans_reserve(tp, 0, log_res, 0, | 1468 | error = xfs_trans_reserve(tp, 0, log_res, 0, |
1462 | XFS_TRANS_PERM_LOG_RES, log_count); | 1469 | XFS_TRANS_PERM_LOG_RES, log_count); |