aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2013-07-01 05:16:54 -0400
committerIngo Molnar <mingo@kernel.org>2013-07-01 05:18:53 -0400
commit2fd1b487884310d0aa0c0640179dc7490ad86313 (patch)
tree1083dce15bd7dc0858c3883b8a361242046c5e09 /fs
parent333bb864f192015a53b5060b829089decd0220ef (diff)
parent8bb495e3f02401ee6f76d1b1d77f3ac9f079e376 (diff)
Merge tag 'v3.10' into sched/core
Merge in a recent upstream commit: c2853c8df57f include/linux/math64.h: add div64_ul() because: 72a4cf20cb71 sched: Change cfs_rq load avg to unsigned long relies on it. [ We don't rebase sched/core for this, because the handful of followup commits after the broken commit are not behavioral changes so are unlikely to be needed during bisection. ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c36
-rw-r--r--fs/befs/linuxvfs.c4
-rw-r--r--fs/btrfs/disk-io.c10
-rw-r--r--fs/btrfs/inode.c3
-rw-r--r--fs/btrfs/relocation.c9
-rw-r--r--fs/ceph/locks.c73
-rw-r--r--fs/ceph/mds_client.c65
-rw-r--r--fs/ceph/super.h9
-rw-r--r--fs/cifs/connect.c4
-rw-r--r--fs/ecryptfs/file.c6
-rw-r--r--fs/efivarfs/file.c14
-rw-r--r--fs/exec.c16
-rw-r--r--fs/file_table.c19
-rw-r--r--fs/fuse/dir.c12
-rw-r--r--fs/fuse/file.c62
-rw-r--r--fs/fuse/inode.c7
-rw-r--r--fs/gfs2/bmap.c17
-rw-r--r--fs/gfs2/dir.c43
-rw-r--r--fs/gfs2/file.c19
-rw-r--r--fs/gfs2/inode.c1
-rw-r--r--fs/gfs2/lops.c4
-rw-r--r--fs/gfs2/rgrp.c4
-rw-r--r--fs/gfs2/super.c6
-rw-r--r--fs/hpfs/dir.c10
-rw-r--r--fs/hpfs/file.c4
-rw-r--r--fs/internal.h6
-rw-r--r--fs/jfs/jfs_logmgr.c8
-rw-r--r--fs/jfs/super.c38
-rw-r--r--fs/namei.c4
-rw-r--r--fs/ncpfs/dir.c9
-rw-r--r--fs/nfs/nfs4proc.c2
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c1
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/pnode.c3
-rw-r--r--fs/proc/base.c1
-rw-r--r--fs/proc/kmsg.c10
-rw-r--r--fs/qnx6/dir.c2
-rw-r--r--fs/read_write.c24
-rw-r--r--fs/reiserfs/dir.c2
-rw-r--r--fs/reiserfs/inode.c9
-rw-r--r--fs/reiserfs/xattr.c14
-rw-r--r--fs/reiserfs/xattr_acl.c3
-rw-r--r--fs/splice.c32
-rw-r--r--fs/ubifs/dir.c54
-rw-r--r--fs/xfs/xfs_acl.c31
-rw-r--r--fs/xfs/xfs_acl.h31
-rw-r--r--fs/xfs/xfs_attr_leaf.c73
-rw-r--r--fs/xfs/xfs_attr_leaf.h1
-rw-r--r--fs/xfs/xfs_attr_remote.c408
-rw-r--r--fs/xfs/xfs_attr_remote.h10
-rw-r--r--fs/xfs/xfs_btree.c10
-rw-r--r--fs/xfs/xfs_buf.c1
-rw-r--r--fs/xfs/xfs_buf_item.c7
-rw-r--r--fs/xfs/xfs_dfrag.c8
-rw-r--r--fs/xfs/xfs_dir2_format.h4
-rw-r--r--fs/xfs/xfs_dir2_node.c13
-rw-r--r--fs/xfs/xfs_dquot.c37
-rw-r--r--fs/xfs/xfs_fs.h1
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_inode.c16
-rw-r--r--fs/xfs/xfs_iops.c47
-rw-r--r--fs/xfs/xfs_log_recover.c114
-rw-r--r--fs/xfs/xfs_mount.c18
-rw-r--r--fs/xfs/xfs_qm.c40
-rw-r--r--fs/xfs/xfs_qm_syscalls.c40
-rw-r--r--fs/xfs/xfs_quota.h2
-rw-r--r--fs/xfs/xfs_super.c11
-rw-r--r--fs/xfs/xfs_symlink.c20
69 files changed, 1100 insertions, 532 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 7fe5bdee1630..2bbcacf74d0c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -141,9 +141,6 @@ static void aio_free_ring(struct kioctx *ctx)
141 for (i = 0; i < ctx->nr_pages; i++) 141 for (i = 0; i < ctx->nr_pages; i++)
142 put_page(ctx->ring_pages[i]); 142 put_page(ctx->ring_pages[i]);
143 143
144 if (ctx->mmap_size)
145 vm_munmap(ctx->mmap_base, ctx->mmap_size);
146
147 if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) 144 if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages)
148 kfree(ctx->ring_pages); 145 kfree(ctx->ring_pages);
149} 146}
@@ -322,11 +319,6 @@ static void free_ioctx(struct kioctx *ctx)
322 319
323 aio_free_ring(ctx); 320 aio_free_ring(ctx);
324 321
325 spin_lock(&aio_nr_lock);
326 BUG_ON(aio_nr - ctx->max_reqs > aio_nr);
327 aio_nr -= ctx->max_reqs;
328 spin_unlock(&aio_nr_lock);
329
330 pr_debug("freeing %p\n", ctx); 322 pr_debug("freeing %p\n", ctx);
331 323
332 /* 324 /*
@@ -435,17 +427,24 @@ static void kill_ioctx(struct kioctx *ctx)
435{ 427{
436 if (!atomic_xchg(&ctx->dead, 1)) { 428 if (!atomic_xchg(&ctx->dead, 1)) {
437 hlist_del_rcu(&ctx->list); 429 hlist_del_rcu(&ctx->list);
438 /* Between hlist_del_rcu() and dropping the initial ref */
439 synchronize_rcu();
440 430
441 /* 431 /*
442 * We can't punt to workqueue here because put_ioctx() -> 432 * It'd be more correct to do this in free_ioctx(), after all
443 * free_ioctx() will unmap the ringbuffer, and that has to be 433 * the outstanding kiocbs have finished - but by then io_destroy
444 * done in the original process's context. kill_ioctx_rcu/work() 434 * has already returned, so io_setup() could potentially return
445 * exist for exit_aio(), as in that path free_ioctx() won't do 435 * -EAGAIN with no ioctxs actually in use (as far as userspace
446 * the unmap. 436 * could tell).
447 */ 437 */
448 kill_ioctx_work(&ctx->rcu_work); 438 spin_lock(&aio_nr_lock);
439 BUG_ON(aio_nr - ctx->max_reqs > aio_nr);
440 aio_nr -= ctx->max_reqs;
441 spin_unlock(&aio_nr_lock);
442
443 if (ctx->mmap_size)
444 vm_munmap(ctx->mmap_base, ctx->mmap_size);
445
446 /* Between hlist_del_rcu() and dropping the initial ref */
447 call_rcu(&ctx->rcu_head, kill_ioctx_rcu);
449 } 448 }
450} 449}
451 450
@@ -495,10 +494,7 @@ void exit_aio(struct mm_struct *mm)
495 */ 494 */
496 ctx->mmap_size = 0; 495 ctx->mmap_size = 0;
497 496
498 if (!atomic_xchg(&ctx->dead, 1)) { 497 kill_ioctx(ctx);
499 hlist_del_rcu(&ctx->list);
500 call_rcu(&ctx->rcu_head, kill_ioctx_rcu);
501 }
502 } 498 }
503} 499}
504 500
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 8615ee89ab55..f95dddced968 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -265,8 +265,8 @@ befs_readdir(struct file *filp, void *dirent, filldir_t filldir)
265 result = filldir(dirent, keybuf, keysize, filp->f_pos, 265 result = filldir(dirent, keybuf, keysize, filp->f_pos,
266 (ino_t) value, d_type); 266 (ino_t) value, d_type);
267 } 267 }
268 268 if (!result)
269 filp->f_pos++; 269 filp->f_pos++;
270 270
271 befs_debug(sb, "<--- befs_readdir() filp->f_pos %Ld", filp->f_pos); 271 befs_debug(sb, "<--- befs_readdir() filp->f_pos %Ld", filp->f_pos);
272 272
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e7b3cb5286a5..b8b60b660c8f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2859,8 +2859,8 @@ fail_qgroup:
2859 btrfs_free_qgroup_config(fs_info); 2859 btrfs_free_qgroup_config(fs_info);
2860fail_trans_kthread: 2860fail_trans_kthread:
2861 kthread_stop(fs_info->transaction_kthread); 2861 kthread_stop(fs_info->transaction_kthread);
2862 del_fs_roots(fs_info);
2863 btrfs_cleanup_transaction(fs_info->tree_root); 2862 btrfs_cleanup_transaction(fs_info->tree_root);
2863 del_fs_roots(fs_info);
2864fail_cleaner: 2864fail_cleaner:
2865 kthread_stop(fs_info->cleaner_kthread); 2865 kthread_stop(fs_info->cleaner_kthread);
2866 2866
@@ -3512,15 +3512,15 @@ int close_ctree(struct btrfs_root *root)
3512 percpu_counter_sum(&fs_info->delalloc_bytes)); 3512 percpu_counter_sum(&fs_info->delalloc_bytes));
3513 } 3513 }
3514 3514
3515 free_root_pointers(fs_info, 1);
3516
3517 btrfs_free_block_groups(fs_info); 3515 btrfs_free_block_groups(fs_info);
3518 3516
3517 btrfs_stop_all_workers(fs_info);
3518
3519 del_fs_roots(fs_info); 3519 del_fs_roots(fs_info);
3520 3520
3521 iput(fs_info->btree_inode); 3521 free_root_pointers(fs_info, 1);
3522 3522
3523 btrfs_stop_all_workers(fs_info); 3523 iput(fs_info->btree_inode);
3524 3524
3525#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 3525#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
3526 if (btrfs_test_opt(root, CHECK_INTEGRITY)) 3526 if (btrfs_test_opt(root, CHECK_INTEGRITY))
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index af978f7682b3..17f3064b4a3e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8012,6 +8012,9 @@ int btrfs_drop_inode(struct inode *inode)
8012{ 8012{
8013 struct btrfs_root *root = BTRFS_I(inode)->root; 8013 struct btrfs_root *root = BTRFS_I(inode)->root;
8014 8014
8015 if (root == NULL)
8016 return 1;
8017
8015 /* the snap/subvol tree is on deleting */ 8018 /* the snap/subvol tree is on deleting */
8016 if (btrfs_root_refs(&root->root_item) == 0 && 8019 if (btrfs_root_refs(&root->root_item) == 0 &&
8017 root != root->fs_info->tree_root) 8020 root != root->fs_info->tree_root)
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 395b82031a42..4febca4fc2de 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4082,7 +4082,7 @@ out:
4082 return inode; 4082 return inode;
4083} 4083}
4084 4084
4085static struct reloc_control *alloc_reloc_control(void) 4085static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
4086{ 4086{
4087 struct reloc_control *rc; 4087 struct reloc_control *rc;
4088 4088
@@ -4093,7 +4093,8 @@ static struct reloc_control *alloc_reloc_control(void)
4093 INIT_LIST_HEAD(&rc->reloc_roots); 4093 INIT_LIST_HEAD(&rc->reloc_roots);
4094 backref_cache_init(&rc->backref_cache); 4094 backref_cache_init(&rc->backref_cache);
4095 mapping_tree_init(&rc->reloc_root_tree); 4095 mapping_tree_init(&rc->reloc_root_tree);
4096 extent_io_tree_init(&rc->processed_blocks, NULL); 4096 extent_io_tree_init(&rc->processed_blocks,
4097 fs_info->btree_inode->i_mapping);
4097 return rc; 4098 return rc;
4098} 4099}
4099 4100
@@ -4110,7 +4111,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4110 int rw = 0; 4111 int rw = 0;
4111 int err = 0; 4112 int err = 0;
4112 4113
4113 rc = alloc_reloc_control(); 4114 rc = alloc_reloc_control(fs_info);
4114 if (!rc) 4115 if (!rc)
4115 return -ENOMEM; 4116 return -ENOMEM;
4116 4117
@@ -4311,7 +4312,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
4311 if (list_empty(&reloc_roots)) 4312 if (list_empty(&reloc_roots))
4312 goto out; 4313 goto out;
4313 4314
4314 rc = alloc_reloc_control(); 4315 rc = alloc_reloc_control(root->fs_info);
4315 if (!rc) { 4316 if (!rc) {
4316 err = -ENOMEM; 4317 err = -ENOMEM;
4317 goto out; 4318 goto out;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 202dd3d68be0..ebbf680378e2 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -191,27 +191,23 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
191} 191}
192 192
193/** 193/**
194 * Encode the flock and fcntl locks for the given inode into the pagelist. 194 * Encode the flock and fcntl locks for the given inode into the ceph_filelock
195 * Format is: #fcntl locks, sequential fcntl locks, #flock locks, 195 * array. Must be called with lock_flocks() already held.
196 * sequential flock locks. 196 * If we encounter more of a specific lock type than expected, return -ENOSPC.
197 * Must be called with lock_flocks() already held.
198 * If we encounter more of a specific lock type than expected,
199 * we return the value 1.
200 */ 197 */
201int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, 198int ceph_encode_locks_to_buffer(struct inode *inode,
202 int num_fcntl_locks, int num_flock_locks) 199 struct ceph_filelock *flocks,
200 int num_fcntl_locks, int num_flock_locks)
203{ 201{
204 struct file_lock *lock; 202 struct file_lock *lock;
205 struct ceph_filelock cephlock;
206 int err = 0; 203 int err = 0;
207 int seen_fcntl = 0; 204 int seen_fcntl = 0;
208 int seen_flock = 0; 205 int seen_flock = 0;
206 int l = 0;
209 207
210 dout("encoding %d flock and %d fcntl locks", num_flock_locks, 208 dout("encoding %d flock and %d fcntl locks", num_flock_locks,
211 num_fcntl_locks); 209 num_fcntl_locks);
212 err = ceph_pagelist_append(pagelist, &num_fcntl_locks, sizeof(u32)); 210
213 if (err)
214 goto fail;
215 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 211 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
216 if (lock->fl_flags & FL_POSIX) { 212 if (lock->fl_flags & FL_POSIX) {
217 ++seen_fcntl; 213 ++seen_fcntl;
@@ -219,19 +215,12 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist,
219 err = -ENOSPC; 215 err = -ENOSPC;
220 goto fail; 216 goto fail;
221 } 217 }
222 err = lock_to_ceph_filelock(lock, &cephlock); 218 err = lock_to_ceph_filelock(lock, &flocks[l]);
223 if (err) 219 if (err)
224 goto fail; 220 goto fail;
225 err = ceph_pagelist_append(pagelist, &cephlock, 221 ++l;
226 sizeof(struct ceph_filelock));
227 } 222 }
228 if (err)
229 goto fail;
230 } 223 }
231
232 err = ceph_pagelist_append(pagelist, &num_flock_locks, sizeof(u32));
233 if (err)
234 goto fail;
235 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { 224 for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) {
236 if (lock->fl_flags & FL_FLOCK) { 225 if (lock->fl_flags & FL_FLOCK) {
237 ++seen_flock; 226 ++seen_flock;
@@ -239,19 +228,51 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist,
239 err = -ENOSPC; 228 err = -ENOSPC;
240 goto fail; 229 goto fail;
241 } 230 }
242 err = lock_to_ceph_filelock(lock, &cephlock); 231 err = lock_to_ceph_filelock(lock, &flocks[l]);
243 if (err) 232 if (err)
244 goto fail; 233 goto fail;
245 err = ceph_pagelist_append(pagelist, &cephlock, 234 ++l;
246 sizeof(struct ceph_filelock));
247 } 235 }
248 if (err)
249 goto fail;
250 } 236 }
251fail: 237fail:
252 return err; 238 return err;
253} 239}
254 240
241/**
242 * Copy the encoded flock and fcntl locks into the pagelist.
243 * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
244 * sequential flock locks.
245 * Returns zero on success.
246 */
247int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
248 struct ceph_pagelist *pagelist,
249 int num_fcntl_locks, int num_flock_locks)
250{
251 int err = 0;
252 __le32 nlocks;
253
254 nlocks = cpu_to_le32(num_fcntl_locks);
255 err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
256 if (err)
257 goto out_fail;
258
259 err = ceph_pagelist_append(pagelist, flocks,
260 num_fcntl_locks * sizeof(*flocks));
261 if (err)
262 goto out_fail;
263
264 nlocks = cpu_to_le32(num_flock_locks);
265 err = ceph_pagelist_append(pagelist, &nlocks, sizeof(nlocks));
266 if (err)
267 goto out_fail;
268
269 err = ceph_pagelist_append(pagelist,
270 &flocks[num_fcntl_locks],
271 num_flock_locks * sizeof(*flocks));
272out_fail:
273 return err;
274}
275
255/* 276/*
256 * Given a pointer to a lock, convert it to a ceph filelock 277 * Given a pointer to a lock, convert it to a ceph filelock
257 */ 278 */
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 4f22671a5bd4..4d2920304be8 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2478,39 +2478,44 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2478 2478
2479 if (recon_state->flock) { 2479 if (recon_state->flock) {
2480 int num_fcntl_locks, num_flock_locks; 2480 int num_fcntl_locks, num_flock_locks;
2481 struct ceph_pagelist_cursor trunc_point; 2481 struct ceph_filelock *flocks;
2482 2482
2483 ceph_pagelist_set_cursor(pagelist, &trunc_point); 2483encode_again:
2484 do { 2484 lock_flocks();
2485 lock_flocks(); 2485 ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
2486 ceph_count_locks(inode, &num_fcntl_locks, 2486 unlock_flocks();
2487 &num_flock_locks); 2487 flocks = kmalloc((num_fcntl_locks+num_flock_locks) *
2488 rec.v2.flock_len = (2*sizeof(u32) + 2488 sizeof(struct ceph_filelock), GFP_NOFS);
2489 (num_fcntl_locks+num_flock_locks) * 2489 if (!flocks) {
2490 sizeof(struct ceph_filelock)); 2490 err = -ENOMEM;
2491 unlock_flocks(); 2491 goto out_free;
2492 2492 }
2493 /* pre-alloc pagelist */ 2493 lock_flocks();
2494 ceph_pagelist_truncate(pagelist, &trunc_point); 2494 err = ceph_encode_locks_to_buffer(inode, flocks,
2495 err = ceph_pagelist_append(pagelist, &rec, reclen); 2495 num_fcntl_locks,
2496 if (!err) 2496 num_flock_locks);
2497 err = ceph_pagelist_reserve(pagelist, 2497 unlock_flocks();
2498 rec.v2.flock_len); 2498 if (err) {
2499 2499 kfree(flocks);
2500 /* encode locks */ 2500 if (err == -ENOSPC)
2501 if (!err) { 2501 goto encode_again;
2502 lock_flocks(); 2502 goto out_free;
2503 err = ceph_encode_locks(inode, 2503 }
2504 pagelist, 2504 /*
2505 num_fcntl_locks, 2505 * number of encoded locks is stable, so copy to pagelist
2506 num_flock_locks); 2506 */
2507 unlock_flocks(); 2507 rec.v2.flock_len = cpu_to_le32(2*sizeof(u32) +
2508 } 2508 (num_fcntl_locks+num_flock_locks) *
2509 } while (err == -ENOSPC); 2509 sizeof(struct ceph_filelock));
2510 err = ceph_pagelist_append(pagelist, &rec, reclen);
2511 if (!err)
2512 err = ceph_locks_to_pagelist(flocks, pagelist,
2513 num_fcntl_locks,
2514 num_flock_locks);
2515 kfree(flocks);
2510 } else { 2516 } else {
2511 err = ceph_pagelist_append(pagelist, &rec, reclen); 2517 err = ceph_pagelist_append(pagelist, &rec, reclen);
2512 } 2518 }
2513
2514out_free: 2519out_free:
2515 kfree(path); 2520 kfree(path);
2516out_dput: 2521out_dput:
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 8696be2ff679..7ccfdb4aea2e 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -822,8 +822,13 @@ extern const struct export_operations ceph_export_ops;
822extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); 822extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl);
823extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); 823extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl);
824extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num); 824extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num);
825extern int ceph_encode_locks(struct inode *i, struct ceph_pagelist *p, 825extern int ceph_encode_locks_to_buffer(struct inode *inode,
826 int p_locks, int f_locks); 826 struct ceph_filelock *flocks,
827 int num_fcntl_locks,
828 int num_flock_locks);
829extern int ceph_locks_to_pagelist(struct ceph_filelock *flocks,
830 struct ceph_pagelist *pagelist,
831 int num_fcntl_locks, int num_flock_locks);
827extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c); 832extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c);
828 833
829/* debugfs.c */ 834/* debugfs.c */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5b97e56ddbca..e3bc39bb9d12 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3279,8 +3279,8 @@ build_unc_path_to_root(const struct smb_vol *vol,
3279 pos = full_path + unc_len; 3279 pos = full_path + unc_len;
3280 3280
3281 if (pplen) { 3281 if (pplen) {
3282 *pos++ = CIFS_DIR_SEP(cifs_sb); 3282 *pos = CIFS_DIR_SEP(cifs_sb);
3283 strncpy(pos, vol->prepath, pplen); 3283 strncpy(pos + 1, vol->prepath, pplen);
3284 pos += pplen; 3284 pos += pplen;
3285 } 3285 }
3286 3286
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 201f0a0d6b0a..a7abbea2c096 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -295,6 +295,12 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
295static int 295static int
296ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) 296ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
297{ 297{
298 int rc;
299
300 rc = filemap_write_and_wait(file->f_mapping);
301 if (rc)
302 return rc;
303
298 return vfs_fsync(ecryptfs_file_to_lower(file), datasync); 304 return vfs_fsync(ecryptfs_file_to_lower(file), datasync);
299} 305}
300 306
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index bfb531564319..8dd524f32284 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -44,8 +44,11 @@ static ssize_t efivarfs_file_write(struct file *file,
44 44
45 bytes = efivar_entry_set_get_size(var, attributes, &datasize, 45 bytes = efivar_entry_set_get_size(var, attributes, &datasize,
46 data, &set); 46 data, &set);
47 if (!set && bytes) 47 if (!set && bytes) {
48 if (bytes == -ENOENT)
49 bytes = -EIO;
48 goto out; 50 goto out;
51 }
49 52
50 if (bytes == -ENOENT) { 53 if (bytes == -ENOENT) {
51 drop_nlink(inode); 54 drop_nlink(inode);
@@ -76,7 +79,14 @@ static ssize_t efivarfs_file_read(struct file *file, char __user *userbuf,
76 int err; 79 int err;
77 80
78 err = efivar_entry_size(var, &datasize); 81 err = efivar_entry_size(var, &datasize);
79 if (err) 82
83 /*
84 * efivarfs represents uncommitted variables with
85 * zero-length files. Reading them should return EOF.
86 */
87 if (err == -ENOENT)
88 return 0;
89 else if (err)
80 return err; 90 return err;
81 91
82 data = kmalloc(datasize + sizeof(attributes), GFP_KERNEL); 92 data = kmalloc(datasize + sizeof(attributes), GFP_KERNEL);
diff --git a/fs/exec.c b/fs/exec.c
index 643019585574..ffd7a813ad3d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1135,13 +1135,6 @@ void setup_new_exec(struct linux_binprm * bprm)
1135 set_dumpable(current->mm, suid_dumpable); 1135 set_dumpable(current->mm, suid_dumpable);
1136 } 1136 }
1137 1137
1138 /*
1139 * Flush performance counters when crossing a
1140 * security domain:
1141 */
1142 if (!get_dumpable(current->mm))
1143 perf_event_exit_task(current);
1144
1145 /* An exec changes our domain. We are no longer part of the thread 1138 /* An exec changes our domain. We are no longer part of the thread
1146 group */ 1139 group */
1147 1140
@@ -1205,6 +1198,15 @@ void install_exec_creds(struct linux_binprm *bprm)
1205 1198
1206 commit_creds(bprm->cred); 1199 commit_creds(bprm->cred);
1207 bprm->cred = NULL; 1200 bprm->cred = NULL;
1201
1202 /*
1203 * Disable monitoring for regular users
1204 * when executing setuid binaries. Must
1205 * wait until new credentials are committed
1206 * by commit_creds() above
1207 */
1208 if (get_dumpable(current->mm) != SUID_DUMP_USER)
1209 perf_event_exit_task(current);
1208 /* 1210 /*
1209 * cred_guard_mutex must be held at least to this point to prevent 1211 * cred_guard_mutex must be held at least to this point to prevent
1210 * ptrace_attach() from altering our determination of the task's 1212 * ptrace_attach() from altering our determination of the task's
diff --git a/fs/file_table.c b/fs/file_table.c
index cd4d87a82951..485dc0eddd67 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -306,17 +306,18 @@ void fput(struct file *file)
306{ 306{
307 if (atomic_long_dec_and_test(&file->f_count)) { 307 if (atomic_long_dec_and_test(&file->f_count)) {
308 struct task_struct *task = current; 308 struct task_struct *task = current;
309 unsigned long flags;
310
309 file_sb_list_del(file); 311 file_sb_list_del(file);
310 if (unlikely(in_interrupt() || task->flags & PF_KTHREAD)) { 312 if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
311 unsigned long flags; 313 init_task_work(&file->f_u.fu_rcuhead, ____fput);
312 spin_lock_irqsave(&delayed_fput_lock, flags); 314 if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
313 list_add(&file->f_u.fu_list, &delayed_fput_list); 315 return;
314 schedule_work(&delayed_fput_work);
315 spin_unlock_irqrestore(&delayed_fput_lock, flags);
316 return;
317 } 316 }
318 init_task_work(&file->f_u.fu_rcuhead, ____fput); 317 spin_lock_irqsave(&delayed_fput_lock, flags);
319 task_work_add(task, &file->f_u.fu_rcuhead, true); 318 list_add(&file->f_u.fu_list, &delayed_fput_list);
319 schedule_work(&delayed_fput_work);
320 spin_unlock_irqrestore(&delayed_fput_lock, flags);
320 } 321 }
321} 322}
322 323
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 254df56b847b..f3f783dc4f75 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -180,6 +180,8 @@ u64 fuse_get_attr_version(struct fuse_conn *fc)
180static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) 180static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
181{ 181{
182 struct inode *inode; 182 struct inode *inode;
183 struct dentry *parent;
184 struct fuse_conn *fc;
183 185
184 inode = ACCESS_ONCE(entry->d_inode); 186 inode = ACCESS_ONCE(entry->d_inode);
185 if (inode && is_bad_inode(inode)) 187 if (inode && is_bad_inode(inode))
@@ -187,10 +189,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
187 else if (fuse_dentry_time(entry) < get_jiffies_64()) { 189 else if (fuse_dentry_time(entry) < get_jiffies_64()) {
188 int err; 190 int err;
189 struct fuse_entry_out outarg; 191 struct fuse_entry_out outarg;
190 struct fuse_conn *fc;
191 struct fuse_req *req; 192 struct fuse_req *req;
192 struct fuse_forget_link *forget; 193 struct fuse_forget_link *forget;
193 struct dentry *parent;
194 u64 attr_version; 194 u64 attr_version;
195 195
196 /* For negative dentries, always do a fresh lookup */ 196 /* For negative dentries, always do a fresh lookup */
@@ -241,8 +241,14 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
241 entry_attr_timeout(&outarg), 241 entry_attr_timeout(&outarg),
242 attr_version); 242 attr_version);
243 fuse_change_entry_timeout(entry, &outarg); 243 fuse_change_entry_timeout(entry, &outarg);
244 } else if (inode) {
245 fc = get_fuse_conn(inode);
246 if (fc->readdirplus_auto) {
247 parent = dget_parent(entry);
248 fuse_advise_use_readdirplus(parent->d_inode);
249 dput(parent);
250 }
244 } 251 }
245 fuse_advise_use_readdirplus(inode);
246 return 1; 252 return 1;
247} 253}
248 254
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index d1c9b85b3f58..35f281033142 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -16,6 +16,7 @@
16#include <linux/compat.h> 16#include <linux/compat.h>
17#include <linux/swap.h> 17#include <linux/swap.h>
18#include <linux/aio.h> 18#include <linux/aio.h>
19#include <linux/falloc.h>
19 20
20static const struct file_operations fuse_direct_io_file_operations; 21static const struct file_operations fuse_direct_io_file_operations;
21 22
@@ -1278,7 +1279,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
1278 1279
1279 iov_iter_init(&ii, iov, nr_segs, count, 0); 1280 iov_iter_init(&ii, iov, nr_segs, count, 0);
1280 1281
1281 req = fuse_get_req(fc, fuse_iter_npages(&ii)); 1282 if (io->async)
1283 req = fuse_get_req_for_background(fc, fuse_iter_npages(&ii));
1284 else
1285 req = fuse_get_req(fc, fuse_iter_npages(&ii));
1282 if (IS_ERR(req)) 1286 if (IS_ERR(req))
1283 return PTR_ERR(req); 1287 return PTR_ERR(req);
1284 1288
@@ -1314,7 +1318,11 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
1314 break; 1318 break;
1315 if (count) { 1319 if (count) {
1316 fuse_put_request(fc, req); 1320 fuse_put_request(fc, req);
1317 req = fuse_get_req(fc, fuse_iter_npages(&ii)); 1321 if (io->async)
1322 req = fuse_get_req_for_background(fc,
1323 fuse_iter_npages(&ii));
1324 else
1325 req = fuse_get_req(fc, fuse_iter_npages(&ii));
1318 if (IS_ERR(req)) 1326 if (IS_ERR(req))
1319 break; 1327 break;
1320 } 1328 }
@@ -2365,6 +2373,11 @@ static void fuse_do_truncate(struct file *file)
2365 fuse_do_setattr(inode, &attr, file); 2373 fuse_do_setattr(inode, &attr, file);
2366} 2374}
2367 2375
2376static inline loff_t fuse_round_up(loff_t off)
2377{
2378 return round_up(off, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
2379}
2380
2368static ssize_t 2381static ssize_t
2369fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 2382fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2370 loff_t offset, unsigned long nr_segs) 2383 loff_t offset, unsigned long nr_segs)
@@ -2372,6 +2385,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2372 ssize_t ret = 0; 2385 ssize_t ret = 0;
2373 struct file *file = iocb->ki_filp; 2386 struct file *file = iocb->ki_filp;
2374 struct fuse_file *ff = file->private_data; 2387 struct fuse_file *ff = file->private_data;
2388 bool async_dio = ff->fc->async_dio;
2375 loff_t pos = 0; 2389 loff_t pos = 0;
2376 struct inode *inode; 2390 struct inode *inode;
2377 loff_t i_size; 2391 loff_t i_size;
@@ -2383,10 +2397,10 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2383 i_size = i_size_read(inode); 2397 i_size = i_size_read(inode);
2384 2398
2385 /* optimization for short read */ 2399 /* optimization for short read */
2386 if (rw != WRITE && offset + count > i_size) { 2400 if (async_dio && rw != WRITE && offset + count > i_size) {
2387 if (offset >= i_size) 2401 if (offset >= i_size)
2388 return 0; 2402 return 0;
2389 count = i_size - offset; 2403 count = min_t(loff_t, count, fuse_round_up(i_size - offset));
2390 } 2404 }
2391 2405
2392 io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); 2406 io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
@@ -2404,7 +2418,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2404 * By default, we want to optimize all I/Os with async request 2418 * By default, we want to optimize all I/Os with async request
2405 * submission to the client filesystem if supported. 2419 * submission to the client filesystem if supported.
2406 */ 2420 */
2407 io->async = ff->fc->async_dio; 2421 io->async = async_dio;
2408 io->iocb = iocb; 2422 io->iocb = iocb;
2409 2423
2410 /* 2424 /*
@@ -2412,7 +2426,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2412 * to wait on real async I/O requests, so we must submit this request 2426 * to wait on real async I/O requests, so we must submit this request
2413 * synchronously. 2427 * synchronously.
2414 */ 2428 */
2415 if (!is_sync_kiocb(iocb) && (offset + count > i_size)) 2429 if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE)
2416 io->async = false; 2430 io->async = false;
2417 2431
2418 if (rw == WRITE) 2432 if (rw == WRITE)
@@ -2424,7 +2438,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2424 fuse_aio_complete(io, ret < 0 ? ret : 0, -1); 2438 fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
2425 2439
2426 /* we have a non-extending, async request, so return */ 2440 /* we have a non-extending, async request, so return */
2427 if (ret > 0 && !is_sync_kiocb(iocb)) 2441 if (!is_sync_kiocb(iocb))
2428 return -EIOCBQUEUED; 2442 return -EIOCBQUEUED;
2429 2443
2430 ret = wait_on_sync_kiocb(iocb); 2444 ret = wait_on_sync_kiocb(iocb);
@@ -2446,6 +2460,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2446 loff_t length) 2460 loff_t length)
2447{ 2461{
2448 struct fuse_file *ff = file->private_data; 2462 struct fuse_file *ff = file->private_data;
2463 struct inode *inode = file->f_inode;
2449 struct fuse_conn *fc = ff->fc; 2464 struct fuse_conn *fc = ff->fc;
2450 struct fuse_req *req; 2465 struct fuse_req *req;
2451 struct fuse_fallocate_in inarg = { 2466 struct fuse_fallocate_in inarg = {
@@ -2455,13 +2470,23 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2455 .mode = mode 2470 .mode = mode
2456 }; 2471 };
2457 int err; 2472 int err;
2473 bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) ||
2474 (mode & FALLOC_FL_PUNCH_HOLE);
2458 2475
2459 if (fc->no_fallocate) 2476 if (fc->no_fallocate)
2460 return -EOPNOTSUPP; 2477 return -EOPNOTSUPP;
2461 2478
2479 if (lock_inode) {
2480 mutex_lock(&inode->i_mutex);
2481 if (mode & FALLOC_FL_PUNCH_HOLE)
2482 fuse_set_nowrite(inode);
2483 }
2484
2462 req = fuse_get_req_nopages(fc); 2485 req = fuse_get_req_nopages(fc);
2463 if (IS_ERR(req)) 2486 if (IS_ERR(req)) {
2464 return PTR_ERR(req); 2487 err = PTR_ERR(req);
2488 goto out;
2489 }
2465 2490
2466 req->in.h.opcode = FUSE_FALLOCATE; 2491 req->in.h.opcode = FUSE_FALLOCATE;
2467 req->in.h.nodeid = ff->nodeid; 2492 req->in.h.nodeid = ff->nodeid;
@@ -2476,6 +2501,25 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
2476 } 2501 }
2477 fuse_put_request(fc, req); 2502 fuse_put_request(fc, req);
2478 2503
2504 if (err)
2505 goto out;
2506
2507 /* we could have extended the file */
2508 if (!(mode & FALLOC_FL_KEEP_SIZE))
2509 fuse_write_update_size(inode, offset + length);
2510
2511 if (mode & FALLOC_FL_PUNCH_HOLE)
2512 truncate_pagecache_range(inode, offset, offset + length - 1);
2513
2514 fuse_invalidate_attr(inode);
2515
2516out:
2517 if (lock_inode) {
2518 if (mode & FALLOC_FL_PUNCH_HOLE)
2519 fuse_release_nowrite(inode);
2520 mutex_unlock(&inode->i_mutex);
2521 }
2522
2479 return err; 2523 return err;
2480} 2524}
2481 2525
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6201f81e4d3a..9a0cdde14a08 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -867,10 +867,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
867 fc->dont_mask = 1; 867 fc->dont_mask = 1;
868 if (arg->flags & FUSE_AUTO_INVAL_DATA) 868 if (arg->flags & FUSE_AUTO_INVAL_DATA)
869 fc->auto_inval_data = 1; 869 fc->auto_inval_data = 1;
870 if (arg->flags & FUSE_DO_READDIRPLUS) 870 if (arg->flags & FUSE_DO_READDIRPLUS) {
871 fc->do_readdirplus = 1; 871 fc->do_readdirplus = 1;
872 if (arg->flags & FUSE_READDIRPLUS_AUTO) 872 if (arg->flags & FUSE_READDIRPLUS_AUTO)
873 fc->readdirplus_auto = 1; 873 fc->readdirplus_auto = 1;
874 }
874 if (arg->flags & FUSE_ASYNC_DIO) 875 if (arg->flags & FUSE_ASYNC_DIO)
875 fc->async_dio = 1; 876 fc->async_dio = 1;
876 } else { 877 } else {
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 1dc9a13ce6bb..93b5809c20bb 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1286,17 +1286,26 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
1286 if (ret) 1286 if (ret)
1287 return ret; 1287 return ret;
1288 1288
1289 ret = get_write_access(inode);
1290 if (ret)
1291 return ret;
1292
1289 inode_dio_wait(inode); 1293 inode_dio_wait(inode);
1290 1294
1291 ret = gfs2_rs_alloc(GFS2_I(inode)); 1295 ret = gfs2_rs_alloc(GFS2_I(inode));
1292 if (ret) 1296 if (ret)
1293 return ret; 1297 goto out;
1294 1298
1295 oldsize = inode->i_size; 1299 oldsize = inode->i_size;
1296 if (newsize >= oldsize) 1300 if (newsize >= oldsize) {
1297 return do_grow(inode, newsize); 1301 ret = do_grow(inode, newsize);
1302 goto out;
1303 }
1298 1304
1299 return do_shrink(inode, oldsize, newsize); 1305 ret = do_shrink(inode, oldsize, newsize);
1306out:
1307 put_write_access(inode);
1308 return ret;
1300} 1309}
1301 1310
1302int gfs2_truncatei_resume(struct gfs2_inode *ip) 1311int gfs2_truncatei_resume(struct gfs2_inode *ip)
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index c3e82bd23179..b631c9043460 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -354,22 +354,31 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
354 return ERR_PTR(-EIO); 354 return ERR_PTR(-EIO);
355 } 355 }
356 356
357 hc = kmalloc(hsize, GFP_NOFS); 357 hc = kmalloc(hsize, GFP_NOFS | __GFP_NOWARN);
358 ret = -ENOMEM; 358 if (hc == NULL)
359 hc = __vmalloc(hsize, GFP_NOFS, PAGE_KERNEL);
360
359 if (hc == NULL) 361 if (hc == NULL)
360 return ERR_PTR(-ENOMEM); 362 return ERR_PTR(-ENOMEM);
361 363
362 ret = gfs2_dir_read_data(ip, hc, hsize); 364 ret = gfs2_dir_read_data(ip, hc, hsize);
363 if (ret < 0) { 365 if (ret < 0) {
364 kfree(hc); 366 if (is_vmalloc_addr(hc))
367 vfree(hc);
368 else
369 kfree(hc);
365 return ERR_PTR(ret); 370 return ERR_PTR(ret);
366 } 371 }
367 372
368 spin_lock(&inode->i_lock); 373 spin_lock(&inode->i_lock);
369 if (ip->i_hash_cache) 374 if (ip->i_hash_cache) {
370 kfree(hc); 375 if (is_vmalloc_addr(hc))
371 else 376 vfree(hc);
377 else
378 kfree(hc);
379 } else {
372 ip->i_hash_cache = hc; 380 ip->i_hash_cache = hc;
381 }
373 spin_unlock(&inode->i_lock); 382 spin_unlock(&inode->i_lock);
374 383
375 return ip->i_hash_cache; 384 return ip->i_hash_cache;
@@ -385,7 +394,10 @@ void gfs2_dir_hash_inval(struct gfs2_inode *ip)
385{ 394{
386 __be64 *hc = ip->i_hash_cache; 395 __be64 *hc = ip->i_hash_cache;
387 ip->i_hash_cache = NULL; 396 ip->i_hash_cache = NULL;
388 kfree(hc); 397 if (is_vmalloc_addr(hc))
398 vfree(hc);
399 else
400 kfree(hc);
389} 401}
390 402
391static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent) 403static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent)
@@ -1113,7 +1125,10 @@ static int dir_double_exhash(struct gfs2_inode *dip)
1113 if (IS_ERR(hc)) 1125 if (IS_ERR(hc))
1114 return PTR_ERR(hc); 1126 return PTR_ERR(hc);
1115 1127
1116 h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS); 1128 h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS | __GFP_NOWARN);
1129 if (hc2 == NULL)
1130 hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS, PAGE_KERNEL);
1131
1117 if (!hc2) 1132 if (!hc2)
1118 return -ENOMEM; 1133 return -ENOMEM;
1119 1134
@@ -1145,7 +1160,10 @@ fail:
1145 gfs2_dinode_out(dip, dibh->b_data); 1160 gfs2_dinode_out(dip, dibh->b_data);
1146 brelse(dibh); 1161 brelse(dibh);
1147out_kfree: 1162out_kfree:
1148 kfree(hc2); 1163 if (is_vmalloc_addr(hc2))
1164 vfree(hc2);
1165 else
1166 kfree(hc2);
1149 return error; 1167 return error;
1150} 1168}
1151 1169
@@ -1846,6 +1864,8 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1846 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); 1864 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1847 1865
1848 ht = kzalloc(size, GFP_NOFS); 1866 ht = kzalloc(size, GFP_NOFS);
1867 if (ht == NULL)
1868 ht = vzalloc(size);
1849 if (!ht) 1869 if (!ht)
1850 return -ENOMEM; 1870 return -ENOMEM;
1851 1871
@@ -1933,7 +1953,10 @@ out_rlist:
1933 gfs2_rlist_free(&rlist); 1953 gfs2_rlist_free(&rlist);
1934 gfs2_quota_unhold(dip); 1954 gfs2_quota_unhold(dip);
1935out: 1955out:
1936 kfree(ht); 1956 if (is_vmalloc_addr(ht))
1957 vfree(ht);
1958 else
1959 kfree(ht);
1937 return error; 1960 return error;
1938} 1961}
1939 1962
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index acd16764b133..ad0dc38d87ab 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -402,16 +402,20 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
402 /* Update file times before taking page lock */ 402 /* Update file times before taking page lock */
403 file_update_time(vma->vm_file); 403 file_update_time(vma->vm_file);
404 404
405 ret = get_write_access(inode);
406 if (ret)
407 goto out;
408
405 ret = gfs2_rs_alloc(ip); 409 ret = gfs2_rs_alloc(ip);
406 if (ret) 410 if (ret)
407 return ret; 411 goto out_write_access;
408 412
409 gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE); 413 gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE);
410 414
411 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 415 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
412 ret = gfs2_glock_nq(&gh); 416 ret = gfs2_glock_nq(&gh);
413 if (ret) 417 if (ret)
414 goto out; 418 goto out_uninit;
415 419
416 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); 420 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
417 set_bit(GIF_SW_PAGED, &ip->i_flags); 421 set_bit(GIF_SW_PAGED, &ip->i_flags);
@@ -480,12 +484,15 @@ out_quota_unlock:
480 gfs2_quota_unlock(ip); 484 gfs2_quota_unlock(ip);
481out_unlock: 485out_unlock:
482 gfs2_glock_dq(&gh); 486 gfs2_glock_dq(&gh);
483out: 487out_uninit:
484 gfs2_holder_uninit(&gh); 488 gfs2_holder_uninit(&gh);
485 if (ret == 0) { 489 if (ret == 0) {
486 set_page_dirty(page); 490 set_page_dirty(page);
487 wait_for_stable_page(page); 491 wait_for_stable_page(page);
488 } 492 }
493out_write_access:
494 put_write_access(inode);
495out:
489 sb_end_pagefault(inode->i_sb); 496 sb_end_pagefault(inode->i_sb);
490 return block_page_mkwrite_return(ret); 497 return block_page_mkwrite_return(ret);
491} 498}
@@ -594,10 +601,10 @@ static int gfs2_release(struct inode *inode, struct file *file)
594 kfree(file->private_data); 601 kfree(file->private_data);
595 file->private_data = NULL; 602 file->private_data = NULL;
596 603
597 if ((file->f_mode & FMODE_WRITE) && 604 if (!(file->f_mode & FMODE_WRITE))
598 (atomic_read(&inode->i_writecount) == 1)) 605 return 0;
599 gfs2_rs_delete(ip);
600 606
607 gfs2_rs_delete(ip);
601 return 0; 608 return 0;
602} 609}
603 610
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 8833a4f264e3..62b484e4a9e4 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -189,6 +189,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
189 return inode; 189 return inode;
190 190
191fail_refresh: 191fail_refresh:
192 ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
192 ip->i_iopen_gh.gh_gl->gl_object = NULL; 193 ip->i_iopen_gh.gh_gl->gl_object = NULL;
193 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 194 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
194fail_iopen: 195fail_iopen:
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 68b4c8f1fce8..6c33d7b6e0c4 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -419,7 +419,9 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit,
419 if (total > limit) 419 if (total > limit)
420 num = limit; 420 num = limit;
421 gfs2_log_unlock(sdp); 421 gfs2_log_unlock(sdp);
422 page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA, num + 1, num); 422 page = gfs2_get_log_desc(sdp,
423 is_databuf ? GFS2_LOG_DESC_JDATA :
424 GFS2_LOG_DESC_METADATA, num + 1, num);
423 ld = page_address(page); 425 ld = page_address(page);
424 gfs2_log_lock(sdp); 426 gfs2_log_lock(sdp);
425 ptr = (__be64 *)(ld + 1); 427 ptr = (__be64 *)(ld + 1);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 5232525934ae..9809156e3d04 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -638,8 +638,10 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
638 */ 638 */
639void gfs2_rs_delete(struct gfs2_inode *ip) 639void gfs2_rs_delete(struct gfs2_inode *ip)
640{ 640{
641 struct inode *inode = &ip->i_inode;
642
641 down_write(&ip->i_rw_mutex); 643 down_write(&ip->i_rw_mutex);
642 if (ip->i_res) { 644 if (ip->i_res && atomic_read(&inode->i_writecount) <= 1) {
643 gfs2_rs_deltree(ip->i_res); 645 gfs2_rs_deltree(ip->i_res);
644 BUG_ON(ip->i_res->rs_free); 646 BUG_ON(ip->i_res->rs_free);
645 kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); 647 kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 917c8e1eb4ae..e5639dec66c4 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1444,6 +1444,7 @@ static void gfs2_evict_inode(struct inode *inode)
1444 /* Must not read inode block until block type has been verified */ 1444 /* Must not read inode block until block type has been verified */
1445 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh); 1445 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
1446 if (unlikely(error)) { 1446 if (unlikely(error)) {
1447 ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1447 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 1448 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1448 goto out; 1449 goto out;
1449 } 1450 }
@@ -1514,8 +1515,10 @@ out_unlock:
1514 if (gfs2_rs_active(ip->i_res)) 1515 if (gfs2_rs_active(ip->i_res))
1515 gfs2_rs_deltree(ip->i_res); 1516 gfs2_rs_deltree(ip->i_res);
1516 1517
1517 if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) 1518 if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
1519 ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1518 gfs2_glock_dq(&ip->i_iopen_gh); 1520 gfs2_glock_dq(&ip->i_iopen_gh);
1521 }
1519 gfs2_holder_uninit(&ip->i_iopen_gh); 1522 gfs2_holder_uninit(&ip->i_iopen_gh);
1520 gfs2_glock_dq_uninit(&gh); 1523 gfs2_glock_dq_uninit(&gh);
1521 if (error && error != GLR_TRYFAILED && error != -EROFS) 1524 if (error && error != GLR_TRYFAILED && error != -EROFS)
@@ -1534,6 +1537,7 @@ out:
1534 ip->i_gl = NULL; 1537 ip->i_gl = NULL;
1535 if (ip->i_iopen_gh.gh_gl) { 1538 if (ip->i_iopen_gh.gh_gl) {
1536 ip->i_iopen_gh.gh_gl->gl_object = NULL; 1539 ip->i_iopen_gh.gh_gl->gl_object = NULL;
1540 ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
1537 gfs2_glock_dq_uninit(&ip->i_iopen_gh); 1541 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
1538 } 1542 }
1539} 1543}
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 546f6d39713a..834ac13c04b7 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -33,25 +33,27 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
33 if (whence == SEEK_DATA || whence == SEEK_HOLE) 33 if (whence == SEEK_DATA || whence == SEEK_HOLE)
34 return -EINVAL; 34 return -EINVAL;
35 35
36 mutex_lock(&i->i_mutex);
36 hpfs_lock(s); 37 hpfs_lock(s);
37 38
38 /*printk("dir lseek\n");*/ 39 /*printk("dir lseek\n");*/
39 if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok; 40 if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok;
40 mutex_lock(&i->i_mutex);
41 pos = ((loff_t) hpfs_de_as_down_as_possible(s, hpfs_inode->i_dno) << 4) + 1; 41 pos = ((loff_t) hpfs_de_as_down_as_possible(s, hpfs_inode->i_dno) << 4) + 1;
42 while (pos != new_off) { 42 while (pos != new_off) {
43 if (map_pos_dirent(i, &pos, &qbh)) hpfs_brelse4(&qbh); 43 if (map_pos_dirent(i, &pos, &qbh)) hpfs_brelse4(&qbh);
44 else goto fail; 44 else goto fail;
45 if (pos == 12) goto fail; 45 if (pos == 12) goto fail;
46 } 46 }
47 mutex_unlock(&i->i_mutex); 47 hpfs_add_pos(i, &filp->f_pos);
48ok: 48ok:
49 filp->f_pos = new_off;
49 hpfs_unlock(s); 50 hpfs_unlock(s);
50 return filp->f_pos = new_off;
51fail:
52 mutex_unlock(&i->i_mutex); 51 mutex_unlock(&i->i_mutex);
52 return new_off;
53fail:
53 /*printk("illegal lseek: %016llx\n", new_off);*/ 54 /*printk("illegal lseek: %016llx\n", new_off);*/
54 hpfs_unlock(s); 55 hpfs_unlock(s);
56 mutex_unlock(&i->i_mutex);
55 return -ESPIPE; 57 return -ESPIPE;
56} 58}
57 59
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 3027f4dbbab5..e4ba5fe4c3b5 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -109,10 +109,14 @@ static void hpfs_write_failed(struct address_space *mapping, loff_t to)
109{ 109{
110 struct inode *inode = mapping->host; 110 struct inode *inode = mapping->host;
111 111
112 hpfs_lock(inode->i_sb);
113
112 if (to > inode->i_size) { 114 if (to > inode->i_size) {
113 truncate_pagecache(inode, to, inode->i_size); 115 truncate_pagecache(inode, to, inode->i_size);
114 hpfs_truncate(inode); 116 hpfs_truncate(inode);
115 } 117 }
118
119 hpfs_unlock(inode->i_sb);
116} 120}
117 121
118static int hpfs_write_begin(struct file *file, struct address_space *mapping, 122static int hpfs_write_begin(struct file *file, struct address_space *mapping,
diff --git a/fs/internal.h b/fs/internal.h
index eaa75f75b625..68121584ae37 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -132,6 +132,12 @@ extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
132extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); 132extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
133 133
134/* 134/*
135 * splice.c
136 */
137extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
138 loff_t *opos, size_t len, unsigned int flags);
139
140/*
135 * pipe.c 141 * pipe.c
136 */ 142 */
137extern const struct file_operations pipefifo_fops; 143extern const struct file_operations pipefifo_fops;
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index c57499dca89c..360d27c48887 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2009,7 +2009,13 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
2009 2009
2010 bio->bi_end_io = lbmIODone; 2010 bio->bi_end_io = lbmIODone;
2011 bio->bi_private = bp; 2011 bio->bi_private = bp;
2012 submit_bio(READ_SYNC, bio); 2012 /*check if journaling to disk has been disabled*/
2013 if (log->no_integrity) {
2014 bio->bi_size = 0;
2015 lbmIODone(bio, 0);
2016 } else {
2017 submit_bio(READ_SYNC, bio);
2018 }
2013 2019
2014 wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD)); 2020 wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
2015 2021
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 2003e830ed1c..788e0a9c1fb0 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -611,11 +611,28 @@ static int jfs_freeze(struct super_block *sb)
611{ 611{
612 struct jfs_sb_info *sbi = JFS_SBI(sb); 612 struct jfs_sb_info *sbi = JFS_SBI(sb);
613 struct jfs_log *log = sbi->log; 613 struct jfs_log *log = sbi->log;
614 int rc = 0;
614 615
615 if (!(sb->s_flags & MS_RDONLY)) { 616 if (!(sb->s_flags & MS_RDONLY)) {
616 txQuiesce(sb); 617 txQuiesce(sb);
617 lmLogShutdown(log); 618 rc = lmLogShutdown(log);
618 updateSuper(sb, FM_CLEAN); 619 if (rc) {
620 jfs_error(sb, "jfs_freeze: lmLogShutdown failed");
621
622 /* let operations fail rather than hang */
623 txResume(sb);
624
625 return rc;
626 }
627 rc = updateSuper(sb, FM_CLEAN);
628 if (rc) {
629 jfs_err("jfs_freeze: updateSuper failed\n");
630 /*
631 * Don't fail here. Everything succeeded except
632 * marking the superblock clean, so there's really
633 * no harm in leaving it frozen for now.
634 */
635 }
619 } 636 }
620 return 0; 637 return 0;
621} 638}
@@ -627,13 +644,18 @@ static int jfs_unfreeze(struct super_block *sb)
627 int rc = 0; 644 int rc = 0;
628 645
629 if (!(sb->s_flags & MS_RDONLY)) { 646 if (!(sb->s_flags & MS_RDONLY)) {
630 updateSuper(sb, FM_MOUNT); 647 rc = updateSuper(sb, FM_MOUNT);
631 if ((rc = lmLogInit(log))) 648 if (rc) {
632 jfs_err("jfs_unlock failed with return code %d", rc); 649 jfs_error(sb, "jfs_unfreeze: updateSuper failed");
633 else 650 goto out;
634 txResume(sb); 651 }
652 rc = lmLogInit(log);
653 if (rc)
654 jfs_error(sb, "jfs_unfreeze: lmLogInit failed");
655out:
656 txResume(sb);
635 } 657 }
636 return 0; 658 return rc;
637} 659}
638 660
639static struct dentry *jfs_do_mount(struct file_system_type *fs_type, 661static struct dentry *jfs_do_mount(struct file_system_type *fs_type,
diff --git a/fs/namei.c b/fs/namei.c
index 85e40d1c0a8f..9ed9361223c0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1976,7 +1976,7 @@ static int path_lookupat(int dfd, const char *name,
1976 err = complete_walk(nd); 1976 err = complete_walk(nd);
1977 1977
1978 if (!err && nd->flags & LOOKUP_DIRECTORY) { 1978 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1979 if (!nd->inode->i_op->lookup) { 1979 if (!can_lookup(nd->inode)) {
1980 path_put(&nd->path); 1980 path_put(&nd->path);
1981 err = -ENOTDIR; 1981 err = -ENOTDIR;
1982 } 1982 }
@@ -2850,7 +2850,7 @@ finish_lookup:
2850 if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) 2850 if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
2851 goto out; 2851 goto out;
2852 error = -ENOTDIR; 2852 error = -ENOTDIR;
2853 if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) 2853 if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode))
2854 goto out; 2854 goto out;
2855 audit_inode(name, nd->path.dentry, 0); 2855 audit_inode(name, nd->path.dentry, 0);
2856finish_open: 2856finish_open:
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 816326093656..6792ce11f2bf 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -1029,15 +1029,6 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry)
1029 DPRINTK("ncp_rmdir: removing %s/%s\n", 1029 DPRINTK("ncp_rmdir: removing %s/%s\n",
1030 dentry->d_parent->d_name.name, dentry->d_name.name); 1030 dentry->d_parent->d_name.name, dentry->d_name.name);
1031 1031
1032 /*
1033 * fail with EBUSY if there are still references to this
1034 * directory.
1035 */
1036 dentry_unhash(dentry);
1037 error = -EBUSY;
1038 if (!d_unhashed(dentry))
1039 goto out;
1040
1041 len = sizeof(__name); 1032 len = sizeof(__name);
1042 error = ncp_io2vol(server, __name, &len, dentry->d_name.name, 1033 error = ncp_io2vol(server, __name, &len, dentry->d_name.name,
1043 dentry->d_name.len, !ncp_preserve_case(dir)); 1034 dentry->d_name.len, !ncp_preserve_case(dir));
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4e2fe714d5c2..d7ba5616989c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1078,7 +1078,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
1078 struct nfs4_state *state = opendata->state; 1078 struct nfs4_state *state = opendata->state;
1079 struct nfs_inode *nfsi = NFS_I(state->inode); 1079 struct nfs_inode *nfsi = NFS_I(state->inode);
1080 struct nfs_delegation *delegation; 1080 struct nfs_delegation *delegation;
1081 int open_mode = opendata->o_arg.open_flags & (O_EXCL|O_TRUNC); 1081 int open_mode = opendata->o_arg.open_flags;
1082 fmode_t fmode = opendata->o_arg.fmode; 1082 fmode_t fmode = opendata->o_arg.fmode;
1083 nfs4_stateid stateid; 1083 nfs4_stateid stateid;
1084 int ret = -EAGAIN; 1084 int ret = -EAGAIN;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index a366107a7331..2d7525fbcf25 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1942,6 +1942,7 @@ static int nfs23_validate_mount_data(void *options,
1942 args->namlen = data->namlen; 1942 args->namlen = data->namlen;
1943 args->bsize = data->bsize; 1943 args->bsize = data->bsize;
1944 1944
1945 args->auth_flavors[0] = RPC_AUTH_UNIX;
1945 if (data->flags & NFS_MOUNT_SECFLAVOUR) 1946 if (data->flags & NFS_MOUNT_SECFLAVOUR)
1946 args->auth_flavors[0] = data->pseudoflavor; 1947 args->auth_flavors[0] = data->pseudoflavor;
1947 if (!args->nfs_server.hostname) 1948 if (!args->nfs_server.hostname)
@@ -2637,6 +2638,7 @@ static int nfs4_validate_mount_data(void *options,
2637 goto out_no_address; 2638 goto out_no_address;
2638 args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); 2639 args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port);
2639 2640
2641 args->auth_flavors[0] = RPC_AUTH_UNIX;
2640 if (data->auth_flavourlen) { 2642 if (data->auth_flavourlen) {
2641 if (data->auth_flavourlen > 1) 2643 if (data->auth_flavourlen > 1)
2642 goto out_inval_auth; 2644 goto out_inval_auth;
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index b3fdd1a323d6..e68588e6b1e8 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1408,6 +1408,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
1408 mres->lockname_len, mres->lockname); 1408 mres->lockname_len, mres->lockname);
1409 ret = -EFAULT; 1409 ret = -EFAULT;
1410 spin_unlock(&res->spinlock); 1410 spin_unlock(&res->spinlock);
1411 dlm_lockres_put(res);
1411 goto leave; 1412 goto leave;
1412 } 1413 }
1413 res->state |= DLM_LOCK_RES_MIGRATING; 1414 res->state |= DLM_LOCK_RES_MIGRATING;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 04ee1b57c243..b4a5cdf9dbc5 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -947,7 +947,7 @@ leave:
947 ocfs2_free_dir_lookup_result(&orphan_insert); 947 ocfs2_free_dir_lookup_result(&orphan_insert);
948 ocfs2_free_dir_lookup_result(&lookup); 948 ocfs2_free_dir_lookup_result(&lookup);
949 949
950 if (status) 950 if (status && (status != -ENOTEMPTY))
951 mlog_errno(status); 951 mlog_errno(status);
952 952
953 return status; 953 return status;
@@ -2216,7 +2216,7 @@ out:
2216 2216
2217 brelse(orphan_dir_bh); 2217 brelse(orphan_dir_bh);
2218 2218
2219 return 0; 2219 return ret;
2220} 2220}
2221 2221
2222int ocfs2_create_inode_in_orphan(struct inode *dir, 2222int ocfs2_create_inode_in_orphan(struct inode *dir,
diff --git a/fs/pnode.c b/fs/pnode.c
index 3d2a7141b87a..9af0df15256e 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -83,7 +83,8 @@ static int do_make_slave(struct mount *mnt)
83 if (peer_mnt == mnt) 83 if (peer_mnt == mnt)
84 peer_mnt = NULL; 84 peer_mnt = NULL;
85 } 85 }
86 if (IS_MNT_SHARED(mnt) && list_empty(&mnt->mnt_share)) 86 if (mnt->mnt_group_id && IS_MNT_SHARED(mnt) &&
87 list_empty(&mnt->mnt_share))
87 mnt_release_group_id(mnt); 88 mnt_release_group_id(mnt);
88 89
89 list_del_init(&mnt->mnt_share); 90 list_del_init(&mnt->mnt_share);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dd51e50001fe..c3834dad09b3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2118,6 +2118,7 @@ static int show_timer(struct seq_file *m, void *v)
2118 nstr[notify & ~SIGEV_THREAD_ID], 2118 nstr[notify & ~SIGEV_THREAD_ID],
2119 (notify & SIGEV_THREAD_ID) ? "tid" : "pid", 2119 (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
2120 pid_nr_ns(timer->it_pid, tp->ns)); 2120 pid_nr_ns(timer->it_pid, tp->ns));
2121 seq_printf(m, "ClockID: %d\n", timer->it_clock);
2121 2122
2122 return 0; 2123 return 0;
2123} 2124}
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index bd4b5a740ff1..bdfabdaefdce 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -21,12 +21,12 @@ extern wait_queue_head_t log_wait;
21 21
22static int kmsg_open(struct inode * inode, struct file * file) 22static int kmsg_open(struct inode * inode, struct file * file)
23{ 23{
24 return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_FILE); 24 return do_syslog(SYSLOG_ACTION_OPEN, NULL, 0, SYSLOG_FROM_PROC);
25} 25}
26 26
27static int kmsg_release(struct inode * inode, struct file * file) 27static int kmsg_release(struct inode * inode, struct file * file)
28{ 28{
29 (void) do_syslog(SYSLOG_ACTION_CLOSE, NULL, 0, SYSLOG_FROM_FILE); 29 (void) do_syslog(SYSLOG_ACTION_CLOSE, NULL, 0, SYSLOG_FROM_PROC);
30 return 0; 30 return 0;
31} 31}
32 32
@@ -34,15 +34,15 @@ static ssize_t kmsg_read(struct file *file, char __user *buf,
34 size_t count, loff_t *ppos) 34 size_t count, loff_t *ppos)
35{ 35{
36 if ((file->f_flags & O_NONBLOCK) && 36 if ((file->f_flags & O_NONBLOCK) &&
37 !do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE)) 37 !do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_PROC))
38 return -EAGAIN; 38 return -EAGAIN;
39 return do_syslog(SYSLOG_ACTION_READ, buf, count, SYSLOG_FROM_FILE); 39 return do_syslog(SYSLOG_ACTION_READ, buf, count, SYSLOG_FROM_PROC);
40} 40}
41 41
42static unsigned int kmsg_poll(struct file *file, poll_table *wait) 42static unsigned int kmsg_poll(struct file *file, poll_table *wait)
43{ 43{
44 poll_wait(file, &log_wait, wait); 44 poll_wait(file, &log_wait, wait);
45 if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_FILE)) 45 if (do_syslog(SYSLOG_ACTION_SIZE_UNREAD, NULL, 0, SYSLOG_FROM_PROC))
46 return POLLIN | POLLRDNORM; 46 return POLLIN | POLLRDNORM;
47 return 0; 47 return 0;
48} 48}
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index 8798d065e400..afa6be6fc397 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c
@@ -120,7 +120,7 @@ static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir)
120 struct inode *inode = file_inode(filp); 120 struct inode *inode = file_inode(filp);
121 struct super_block *s = inode->i_sb; 121 struct super_block *s = inode->i_sb;
122 struct qnx6_sb_info *sbi = QNX6_SB(s); 122 struct qnx6_sb_info *sbi = QNX6_SB(s);
123 loff_t pos = filp->f_pos & (QNX6_DIR_ENTRY_SIZE - 1); 123 loff_t pos = filp->f_pos & ~(QNX6_DIR_ENTRY_SIZE - 1);
124 unsigned long npages = dir_pages(inode); 124 unsigned long npages = dir_pages(inode);
125 unsigned long n = pos >> PAGE_CACHE_SHIFT; 125 unsigned long n = pos >> PAGE_CACHE_SHIFT;
126 unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE; 126 unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE;
diff --git a/fs/read_write.c b/fs/read_write.c
index 03430008704e..2cefa417be34 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1064,6 +1064,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
1064 struct fd in, out; 1064 struct fd in, out;
1065 struct inode *in_inode, *out_inode; 1065 struct inode *in_inode, *out_inode;
1066 loff_t pos; 1066 loff_t pos;
1067 loff_t out_pos;
1067 ssize_t retval; 1068 ssize_t retval;
1068 int fl; 1069 int fl;
1069 1070
@@ -1077,12 +1078,14 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
1077 if (!(in.file->f_mode & FMODE_READ)) 1078 if (!(in.file->f_mode & FMODE_READ))
1078 goto fput_in; 1079 goto fput_in;
1079 retval = -ESPIPE; 1080 retval = -ESPIPE;
1080 if (!ppos) 1081 if (!ppos) {
1081 ppos = &in.file->f_pos; 1082 pos = in.file->f_pos;
1082 else 1083 } else {
1084 pos = *ppos;
1083 if (!(in.file->f_mode & FMODE_PREAD)) 1085 if (!(in.file->f_mode & FMODE_PREAD))
1084 goto fput_in; 1086 goto fput_in;
1085 retval = rw_verify_area(READ, in.file, ppos, count); 1087 }
1088 retval = rw_verify_area(READ, in.file, &pos, count);
1086 if (retval < 0) 1089 if (retval < 0)
1087 goto fput_in; 1090 goto fput_in;
1088 count = retval; 1091 count = retval;
@@ -1099,7 +1102,8 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
1099 retval = -EINVAL; 1102 retval = -EINVAL;
1100 in_inode = file_inode(in.file); 1103 in_inode = file_inode(in.file);
1101 out_inode = file_inode(out.file); 1104 out_inode = file_inode(out.file);
1102 retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count); 1105 out_pos = out.file->f_pos;
1106 retval = rw_verify_area(WRITE, out.file, &out_pos, count);
1103 if (retval < 0) 1107 if (retval < 0)
1104 goto fput_out; 1108 goto fput_out;
1105 count = retval; 1109 count = retval;
@@ -1107,7 +1111,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
1107 if (!max) 1111 if (!max)
1108 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 1112 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
1109 1113
1110 pos = *ppos;
1111 if (unlikely(pos + count > max)) { 1114 if (unlikely(pos + count > max)) {
1112 retval = -EOVERFLOW; 1115 retval = -EOVERFLOW;
1113 if (pos >= max) 1116 if (pos >= max)
@@ -1126,18 +1129,23 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
1126 if (in.file->f_flags & O_NONBLOCK) 1129 if (in.file->f_flags & O_NONBLOCK)
1127 fl = SPLICE_F_NONBLOCK; 1130 fl = SPLICE_F_NONBLOCK;
1128#endif 1131#endif
1129 retval = do_splice_direct(in.file, ppos, out.file, count, fl); 1132 retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
1130 1133
1131 if (retval > 0) { 1134 if (retval > 0) {
1132 add_rchar(current, retval); 1135 add_rchar(current, retval);
1133 add_wchar(current, retval); 1136 add_wchar(current, retval);
1134 fsnotify_access(in.file); 1137 fsnotify_access(in.file);
1135 fsnotify_modify(out.file); 1138 fsnotify_modify(out.file);
1139 out.file->f_pos = out_pos;
1140 if (ppos)
1141 *ppos = pos;
1142 else
1143 in.file->f_pos = pos;
1136 } 1144 }
1137 1145
1138 inc_syscr(current); 1146 inc_syscr(current);
1139 inc_syscw(current); 1147 inc_syscw(current);
1140 if (*ppos > max) 1148 if (pos > max)
1141 retval = -EOVERFLOW; 1149 retval = -EOVERFLOW;
1142 1150
1143fput_out: 1151fput_out:
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 66c53b642a88..6c2d136561cb 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -204,6 +204,8 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
204 next_pos = deh_offset(deh) + 1; 204 next_pos = deh_offset(deh) + 1;
205 205
206 if (item_moved(&tmp_ih, &path_to_entry)) { 206 if (item_moved(&tmp_ih, &path_to_entry)) {
207 set_cpu_key_k_offset(&pos_key,
208 next_pos);
207 goto research; 209 goto research;
208 } 210 }
209 } /* for */ 211 } /* for */
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 77d6d47abc83..f844533792ee 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1811,11 +1811,16 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1811 TYPE_STAT_DATA, SD_SIZE, MAX_US_INT); 1811 TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
1812 memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); 1812 memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE);
1813 args.dirid = le32_to_cpu(ih.ih_key.k_dir_id); 1813 args.dirid = le32_to_cpu(ih.ih_key.k_dir_id);
1814 if (insert_inode_locked4(inode, args.objectid, 1814
1815 reiserfs_find_actor, &args) < 0) { 1815 reiserfs_write_unlock(inode->i_sb);
1816 err = insert_inode_locked4(inode, args.objectid,
1817 reiserfs_find_actor, &args);
1818 reiserfs_write_lock(inode->i_sb);
1819 if (err) {
1816 err = -EINVAL; 1820 err = -EINVAL;
1817 goto out_bad_inode; 1821 goto out_bad_inode;
1818 } 1822 }
1823
1819 if (old_format_only(sb)) 1824 if (old_format_only(sb))
1820 /* not a perfect generation count, as object ids can be reused, but 1825 /* not a perfect generation count, as object ids can be reused, but
1821 ** this is as good as reiserfs can do right now. 1826 ** this is as good as reiserfs can do right now.
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 4cce1d9552fb..821bcf70e467 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -318,7 +318,19 @@ static int delete_one_xattr(struct dentry *dentry, void *data)
318static int chown_one_xattr(struct dentry *dentry, void *data) 318static int chown_one_xattr(struct dentry *dentry, void *data)
319{ 319{
320 struct iattr *attrs = data; 320 struct iattr *attrs = data;
321 return reiserfs_setattr(dentry, attrs); 321 int ia_valid = attrs->ia_valid;
322 int err;
323
324 /*
325 * We only want the ownership bits. Otherwise, we'll do
326 * things like change a directory to a regular file if
327 * ATTR_MODE is set.
328 */
329 attrs->ia_valid &= (ATTR_UID|ATTR_GID);
330 err = reiserfs_setattr(dentry, attrs);
331 attrs->ia_valid = ia_valid;
332
333 return err;
322} 334}
323 335
324/* No i_mutex, but the inode is unconnected. */ 336/* No i_mutex, but the inode is unconnected. */
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index d7c01ef64eda..6c8767fdfc6a 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -443,6 +443,9 @@ int reiserfs_acl_chmod(struct inode *inode)
443 int depth; 443 int depth;
444 int error; 444 int error;
445 445
446 if (IS_PRIVATE(inode))
447 return 0;
448
446 if (S_ISLNK(inode->i_mode)) 449 if (S_ISLNK(inode->i_mode))
447 return -EOPNOTSUPP; 450 return -EOPNOTSUPP;
448 451
diff --git a/fs/splice.c b/fs/splice.c
index e6b25598c8c4..d37431dd60a1 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1274,7 +1274,7 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
1274{ 1274{
1275 struct file *file = sd->u.file; 1275 struct file *file = sd->u.file;
1276 1276
1277 return do_splice_from(pipe, file, &file->f_pos, sd->total_len, 1277 return do_splice_from(pipe, file, sd->opos, sd->total_len,
1278 sd->flags); 1278 sd->flags);
1279} 1279}
1280 1280
@@ -1283,6 +1283,7 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
1283 * @in: file to splice from 1283 * @in: file to splice from
1284 * @ppos: input file offset 1284 * @ppos: input file offset
1285 * @out: file to splice to 1285 * @out: file to splice to
1286 * @opos: output file offset
1286 * @len: number of bytes to splice 1287 * @len: number of bytes to splice
1287 * @flags: splice modifier flags 1288 * @flags: splice modifier flags
1288 * 1289 *
@@ -1294,7 +1295,7 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
1294 * 1295 *
1295 */ 1296 */
1296long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, 1297long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1297 size_t len, unsigned int flags) 1298 loff_t *opos, size_t len, unsigned int flags)
1298{ 1299{
1299 struct splice_desc sd = { 1300 struct splice_desc sd = {
1300 .len = len, 1301 .len = len,
@@ -1302,6 +1303,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1302 .flags = flags, 1303 .flags = flags,
1303 .pos = *ppos, 1304 .pos = *ppos,
1304 .u.file = out, 1305 .u.file = out,
1306 .opos = opos,
1305 }; 1307 };
1306 long ret; 1308 long ret;
1307 1309
@@ -1325,7 +1327,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1325{ 1327{
1326 struct pipe_inode_info *ipipe; 1328 struct pipe_inode_info *ipipe;
1327 struct pipe_inode_info *opipe; 1329 struct pipe_inode_info *opipe;
1328 loff_t offset, *off; 1330 loff_t offset;
1329 long ret; 1331 long ret;
1330 1332
1331 ipipe = get_pipe_info(in); 1333 ipipe = get_pipe_info(in);
@@ -1356,13 +1358,15 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1356 return -EINVAL; 1358 return -EINVAL;
1357 if (copy_from_user(&offset, off_out, sizeof(loff_t))) 1359 if (copy_from_user(&offset, off_out, sizeof(loff_t)))
1358 return -EFAULT; 1360 return -EFAULT;
1359 off = &offset; 1361 } else {
1360 } else 1362 offset = out->f_pos;
1361 off = &out->f_pos; 1363 }
1362 1364
1363 ret = do_splice_from(ipipe, out, off, len, flags); 1365 ret = do_splice_from(ipipe, out, &offset, len, flags);
1364 1366
1365 if (off_out && copy_to_user(off_out, off, sizeof(loff_t))) 1367 if (!off_out)
1368 out->f_pos = offset;
1369 else if (copy_to_user(off_out, &offset, sizeof(loff_t)))
1366 ret = -EFAULT; 1370 ret = -EFAULT;
1367 1371
1368 return ret; 1372 return ret;
@@ -1376,13 +1380,15 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1376 return -EINVAL; 1380 return -EINVAL;
1377 if (copy_from_user(&offset, off_in, sizeof(loff_t))) 1381 if (copy_from_user(&offset, off_in, sizeof(loff_t)))
1378 return -EFAULT; 1382 return -EFAULT;
1379 off = &offset; 1383 } else {
1380 } else 1384 offset = in->f_pos;
1381 off = &in->f_pos; 1385 }
1382 1386
1383 ret = do_splice_to(in, off, opipe, len, flags); 1387 ret = do_splice_to(in, &offset, opipe, len, flags);
1384 1388
1385 if (off_in && copy_to_user(off_in, off, sizeof(loff_t))) 1389 if (!off_in)
1390 in->f_pos = offset;
1391 else if (copy_to_user(off_in, &offset, sizeof(loff_t)))
1386 ret = -EFAULT; 1392 ret = -EFAULT;
1387 1393
1388 return ret; 1394 return ret;
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index de08c92f2e23..605af512aec2 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -349,31 +349,50 @@ static unsigned int vfs_dent_type(uint8_t type)
349static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) 349static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
350{ 350{
351 int err, over = 0; 351 int err, over = 0;
352 loff_t pos = file->f_pos;
352 struct qstr nm; 353 struct qstr nm;
353 union ubifs_key key; 354 union ubifs_key key;
354 struct ubifs_dent_node *dent; 355 struct ubifs_dent_node *dent;
355 struct inode *dir = file_inode(file); 356 struct inode *dir = file_inode(file);
356 struct ubifs_info *c = dir->i_sb->s_fs_info; 357 struct ubifs_info *c = dir->i_sb->s_fs_info;
357 358
358 dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos); 359 dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, pos);
359 360
360 if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2) 361 if (pos > UBIFS_S_KEY_HASH_MASK || pos == 2)
361 /* 362 /*
362 * The directory was seek'ed to a senseless position or there 363 * The directory was seek'ed to a senseless position or there
363 * are no more entries. 364 * are no more entries.
364 */ 365 */
365 return 0; 366 return 0;
366 367
368 if (file->f_version == 0) {
369 /*
370 * The file was seek'ed, which means that @file->private_data
371 * is now invalid. This may also be just the first
372 * 'ubifs_readdir()' invocation, in which case
373 * @file->private_data is NULL, and the below code is
374 * basically a no-op.
375 */
376 kfree(file->private_data);
377 file->private_data = NULL;
378 }
379
380 /*
381 * 'generic_file_llseek()' unconditionally sets @file->f_version to
382 * zero, and we use this for detecting whether the file was seek'ed.
383 */
384 file->f_version = 1;
385
367 /* File positions 0 and 1 correspond to "." and ".." */ 386 /* File positions 0 and 1 correspond to "." and ".." */
368 if (file->f_pos == 0) { 387 if (pos == 0) {
369 ubifs_assert(!file->private_data); 388 ubifs_assert(!file->private_data);
370 over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR); 389 over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR);
371 if (over) 390 if (over)
372 return 0; 391 return 0;
373 file->f_pos = 1; 392 file->f_pos = pos = 1;
374 } 393 }
375 394
376 if (file->f_pos == 1) { 395 if (pos == 1) {
377 ubifs_assert(!file->private_data); 396 ubifs_assert(!file->private_data);
378 over = filldir(dirent, "..", 2, 1, 397 over = filldir(dirent, "..", 2, 1,
379 parent_ino(file->f_path.dentry), DT_DIR); 398 parent_ino(file->f_path.dentry), DT_DIR);
@@ -389,7 +408,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
389 goto out; 408 goto out;
390 } 409 }
391 410
392 file->f_pos = key_hash_flash(c, &dent->key); 411 file->f_pos = pos = key_hash_flash(c, &dent->key);
393 file->private_data = dent; 412 file->private_data = dent;
394 } 413 }
395 414
@@ -397,17 +416,16 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
397 if (!dent) { 416 if (!dent) {
398 /* 417 /*
399 * The directory was seek'ed to and is now readdir'ed. 418 * The directory was seek'ed to and is now readdir'ed.
400 * Find the entry corresponding to @file->f_pos or the 419 * Find the entry corresponding to @pos or the closest one.
401 * closest one.
402 */ 420 */
403 dent_key_init_hash(c, &key, dir->i_ino, file->f_pos); 421 dent_key_init_hash(c, &key, dir->i_ino, pos);
404 nm.name = NULL; 422 nm.name = NULL;
405 dent = ubifs_tnc_next_ent(c, &key, &nm); 423 dent = ubifs_tnc_next_ent(c, &key, &nm);
406 if (IS_ERR(dent)) { 424 if (IS_ERR(dent)) {
407 err = PTR_ERR(dent); 425 err = PTR_ERR(dent);
408 goto out; 426 goto out;
409 } 427 }
410 file->f_pos = key_hash_flash(c, &dent->key); 428 file->f_pos = pos = key_hash_flash(c, &dent->key);
411 file->private_data = dent; 429 file->private_data = dent;
412 } 430 }
413 431
@@ -419,7 +437,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
419 ubifs_inode(dir)->creat_sqnum); 437 ubifs_inode(dir)->creat_sqnum);
420 438
421 nm.len = le16_to_cpu(dent->nlen); 439 nm.len = le16_to_cpu(dent->nlen);
422 over = filldir(dirent, dent->name, nm.len, file->f_pos, 440 over = filldir(dirent, dent->name, nm.len, pos,
423 le64_to_cpu(dent->inum), 441 le64_to_cpu(dent->inum),
424 vfs_dent_type(dent->type)); 442 vfs_dent_type(dent->type));
425 if (over) 443 if (over)
@@ -435,9 +453,17 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
435 } 453 }
436 454
437 kfree(file->private_data); 455 kfree(file->private_data);
438 file->f_pos = key_hash_flash(c, &dent->key); 456 file->f_pos = pos = key_hash_flash(c, &dent->key);
439 file->private_data = dent; 457 file->private_data = dent;
440 cond_resched(); 458 cond_resched();
459
460 if (file->f_version == 0)
461 /*
462 * The file was seek'ed meanwhile, lets return and start
463 * reading direntries from the new position on the next
464 * invocation.
465 */
466 return 0;
441 } 467 }
442 468
443out: 469out:
@@ -448,15 +474,13 @@ out:
448 474
449 kfree(file->private_data); 475 kfree(file->private_data);
450 file->private_data = NULL; 476 file->private_data = NULL;
477 /* 2 is a special value indicating that there are no more direntries */
451 file->f_pos = 2; 478 file->f_pos = 2;
452 return 0; 479 return 0;
453} 480}
454 481
455/* If a directory is seeked, we have to free saved readdir() state */
456static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int whence) 482static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int whence)
457{ 483{
458 kfree(file->private_data);
459 file->private_data = NULL;
460 return generic_file_llseek(file, offset, whence); 484 return generic_file_llseek(file, offset, whence);
461} 485}
462 486
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 1d32f1d52763..306d883d89bc 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -21,6 +21,8 @@
21#include "xfs_bmap_btree.h" 21#include "xfs_bmap_btree.h"
22#include "xfs_inode.h" 22#include "xfs_inode.h"
23#include "xfs_vnodeops.h" 23#include "xfs_vnodeops.h"
24#include "xfs_sb.h"
25#include "xfs_mount.h"
24#include "xfs_trace.h" 26#include "xfs_trace.h"
25#include <linux/slab.h> 27#include <linux/slab.h>
26#include <linux/xattr.h> 28#include <linux/xattr.h>
@@ -34,7 +36,9 @@
34 */ 36 */
35 37
36STATIC struct posix_acl * 38STATIC struct posix_acl *
37xfs_acl_from_disk(struct xfs_acl *aclp) 39xfs_acl_from_disk(
40 struct xfs_acl *aclp,
41 int max_entries)
38{ 42{
39 struct posix_acl_entry *acl_e; 43 struct posix_acl_entry *acl_e;
40 struct posix_acl *acl; 44 struct posix_acl *acl;
@@ -42,7 +46,7 @@ xfs_acl_from_disk(struct xfs_acl *aclp)
42 unsigned int count, i; 46 unsigned int count, i;
43 47
44 count = be32_to_cpu(aclp->acl_cnt); 48 count = be32_to_cpu(aclp->acl_cnt);
45 if (count > XFS_ACL_MAX_ENTRIES) 49 if (count > max_entries)
46 return ERR_PTR(-EFSCORRUPTED); 50 return ERR_PTR(-EFSCORRUPTED);
47 51
48 acl = posix_acl_alloc(count, GFP_KERNEL); 52 acl = posix_acl_alloc(count, GFP_KERNEL);
@@ -108,9 +112,9 @@ xfs_get_acl(struct inode *inode, int type)
108 struct xfs_inode *ip = XFS_I(inode); 112 struct xfs_inode *ip = XFS_I(inode);
109 struct posix_acl *acl; 113 struct posix_acl *acl;
110 struct xfs_acl *xfs_acl; 114 struct xfs_acl *xfs_acl;
111 int len = sizeof(struct xfs_acl);
112 unsigned char *ea_name; 115 unsigned char *ea_name;
113 int error; 116 int error;
117 int len;
114 118
115 acl = get_cached_acl(inode, type); 119 acl = get_cached_acl(inode, type);
116 if (acl != ACL_NOT_CACHED) 120 if (acl != ACL_NOT_CACHED)
@@ -133,8 +137,8 @@ xfs_get_acl(struct inode *inode, int type)
133 * If we have a cached ACLs value just return it, not need to 137 * If we have a cached ACLs value just return it, not need to
134 * go out to the disk. 138 * go out to the disk.
135 */ 139 */
136 140 len = XFS_ACL_MAX_SIZE(ip->i_mount);
137 xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); 141 xfs_acl = kzalloc(len, GFP_KERNEL);
138 if (!xfs_acl) 142 if (!xfs_acl)
139 return ERR_PTR(-ENOMEM); 143 return ERR_PTR(-ENOMEM);
140 144
@@ -153,7 +157,7 @@ xfs_get_acl(struct inode *inode, int type)
153 goto out; 157 goto out;
154 } 158 }
155 159
156 acl = xfs_acl_from_disk(xfs_acl); 160 acl = xfs_acl_from_disk(xfs_acl, XFS_ACL_MAX_ENTRIES(ip->i_mount));
157 if (IS_ERR(acl)) 161 if (IS_ERR(acl))
158 goto out; 162 goto out;
159 163
@@ -189,16 +193,17 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
189 193
190 if (acl) { 194 if (acl) {
191 struct xfs_acl *xfs_acl; 195 struct xfs_acl *xfs_acl;
192 int len; 196 int len = XFS_ACL_MAX_SIZE(ip->i_mount);
193 197
194 xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); 198 xfs_acl = kzalloc(len, GFP_KERNEL);
195 if (!xfs_acl) 199 if (!xfs_acl)
196 return -ENOMEM; 200 return -ENOMEM;
197 201
198 xfs_acl_to_disk(xfs_acl, acl); 202 xfs_acl_to_disk(xfs_acl, acl);
199 len = sizeof(struct xfs_acl) - 203
200 (sizeof(struct xfs_acl_entry) * 204 /* subtract away the unused acl entries */
201 (XFS_ACL_MAX_ENTRIES - acl->a_count)); 205 len -= sizeof(struct xfs_acl_entry) *
206 (XFS_ACL_MAX_ENTRIES(ip->i_mount) - acl->a_count);
202 207
203 error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl, 208 error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
204 len, ATTR_ROOT); 209 len, ATTR_ROOT);
@@ -243,7 +248,7 @@ xfs_set_mode(struct inode *inode, umode_t mode)
243static int 248static int
244xfs_acl_exists(struct inode *inode, unsigned char *name) 249xfs_acl_exists(struct inode *inode, unsigned char *name)
245{ 250{
246 int len = sizeof(struct xfs_acl); 251 int len = XFS_ACL_MAX_SIZE(XFS_M(inode->i_sb));
247 252
248 return (xfs_attr_get(XFS_I(inode), name, NULL, &len, 253 return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
249 ATTR_ROOT|ATTR_KERNOVAL) == 0); 254 ATTR_ROOT|ATTR_KERNOVAL) == 0);
@@ -379,7 +384,7 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name,
379 goto out_release; 384 goto out_release;
380 385
381 error = -EINVAL; 386 error = -EINVAL;
382 if (acl->a_count > XFS_ACL_MAX_ENTRIES) 387 if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb)))
383 goto out_release; 388 goto out_release;
384 389
385 if (type == ACL_TYPE_ACCESS) { 390 if (type == ACL_TYPE_ACCESS) {
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 39632d941354..4016a567b83c 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -22,19 +22,36 @@ struct inode;
22struct posix_acl; 22struct posix_acl;
23struct xfs_inode; 23struct xfs_inode;
24 24
25#define XFS_ACL_MAX_ENTRIES 25
26#define XFS_ACL_NOT_PRESENT (-1) 25#define XFS_ACL_NOT_PRESENT (-1)
27 26
28/* On-disk XFS access control list structure */ 27/* On-disk XFS access control list structure */
28struct xfs_acl_entry {
29 __be32 ae_tag;
30 __be32 ae_id;
31 __be16 ae_perm;
32 __be16 ae_pad; /* fill the implicit hole in the structure */
33};
34
29struct xfs_acl { 35struct xfs_acl {
30 __be32 acl_cnt; 36 __be32 acl_cnt;
31 struct xfs_acl_entry { 37 struct xfs_acl_entry acl_entry[0];
32 __be32 ae_tag;
33 __be32 ae_id;
34 __be16 ae_perm;
35 } acl_entry[XFS_ACL_MAX_ENTRIES];
36}; 38};
37 39
40/*
41 * The number of ACL entries allowed is defined by the on-disk format.
42 * For v4 superblocks, that is limited to 25 entries. For v5 superblocks, it is
43 * limited only by the maximum size of the xattr that stores the information.
44 */
45#define XFS_ACL_MAX_ENTRIES(mp) \
46 (xfs_sb_version_hascrc(&mp->m_sb) \
47 ? (XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \
48 sizeof(struct xfs_acl_entry) \
49 : 25)
50
51#define XFS_ACL_MAX_SIZE(mp) \
52 (sizeof(struct xfs_acl) + \
53 sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp)))
54
38/* On-disk XFS extended attribute names */ 55/* On-disk XFS extended attribute names */
39#define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE" 56#define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE"
40#define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT" 57#define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT"
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 0bce1b348580..31d3cd129269 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -1412,7 +1412,7 @@ xfs_attr3_leaf_add_work(
1412 name_rmt->valuelen = 0; 1412 name_rmt->valuelen = 0;
1413 name_rmt->valueblk = 0; 1413 name_rmt->valueblk = 0;
1414 args->rmtblkno = 1; 1414 args->rmtblkno = 1;
1415 args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen); 1415 args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
1416 } 1416 }
1417 xfs_trans_log_buf(args->trans, bp, 1417 xfs_trans_log_buf(args->trans, bp,
1418 XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index), 1418 XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index),
@@ -1445,11 +1445,12 @@ xfs_attr3_leaf_add_work(
1445STATIC void 1445STATIC void
1446xfs_attr3_leaf_compact( 1446xfs_attr3_leaf_compact(
1447 struct xfs_da_args *args, 1447 struct xfs_da_args *args,
1448 struct xfs_attr3_icleaf_hdr *ichdr_d, 1448 struct xfs_attr3_icleaf_hdr *ichdr_dst,
1449 struct xfs_buf *bp) 1449 struct xfs_buf *bp)
1450{ 1450{
1451 xfs_attr_leafblock_t *leaf_s, *leaf_d; 1451 struct xfs_attr_leafblock *leaf_src;
1452 struct xfs_attr3_icleaf_hdr ichdr_s; 1452 struct xfs_attr_leafblock *leaf_dst;
1453 struct xfs_attr3_icleaf_hdr ichdr_src;
1453 struct xfs_trans *trans = args->trans; 1454 struct xfs_trans *trans = args->trans;
1454 struct xfs_mount *mp = trans->t_mountp; 1455 struct xfs_mount *mp = trans->t_mountp;
1455 char *tmpbuffer; 1456 char *tmpbuffer;
@@ -1457,29 +1458,38 @@ xfs_attr3_leaf_compact(
1457 trace_xfs_attr_leaf_compact(args); 1458 trace_xfs_attr_leaf_compact(args);
1458 1459
1459 tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP); 1460 tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP);
1460 ASSERT(tmpbuffer != NULL);
1461 memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp)); 1461 memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp));
1462 memset(bp->b_addr, 0, XFS_LBSIZE(mp)); 1462 memset(bp->b_addr, 0, XFS_LBSIZE(mp));
1463 leaf_src = (xfs_attr_leafblock_t *)tmpbuffer;
1464 leaf_dst = bp->b_addr;
1463 1465
1464 /* 1466 /*
1465 * Copy basic information 1467 * Copy the on-disk header back into the destination buffer to ensure
1468 * all the information in the header that is not part of the incore
1469 * header structure is preserved.
1466 */ 1470 */
1467 leaf_s = (xfs_attr_leafblock_t *)tmpbuffer; 1471 memcpy(bp->b_addr, tmpbuffer, xfs_attr3_leaf_hdr_size(leaf_src));
1468 leaf_d = bp->b_addr; 1472
1469 ichdr_s = *ichdr_d; /* struct copy */ 1473 /* Initialise the incore headers */
1470 ichdr_d->firstused = XFS_LBSIZE(mp); 1474 ichdr_src = *ichdr_dst; /* struct copy */
1471 ichdr_d->usedbytes = 0; 1475 ichdr_dst->firstused = XFS_LBSIZE(mp);
1472 ichdr_d->count = 0; 1476 ichdr_dst->usedbytes = 0;
1473 ichdr_d->holes = 0; 1477 ichdr_dst->count = 0;
1474 ichdr_d->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_s); 1478 ichdr_dst->holes = 0;
1475 ichdr_d->freemap[0].size = ichdr_d->firstused - ichdr_d->freemap[0].base; 1479 ichdr_dst->freemap[0].base = xfs_attr3_leaf_hdr_size(leaf_src);
1480 ichdr_dst->freemap[0].size = ichdr_dst->firstused -
1481 ichdr_dst->freemap[0].base;
1482
1483
1484 /* write the header back to initialise the underlying buffer */
1485 xfs_attr3_leaf_hdr_to_disk(leaf_dst, ichdr_dst);
1476 1486
1477 /* 1487 /*
1478 * Copy all entry's in the same (sorted) order, 1488 * Copy all entry's in the same (sorted) order,
1479 * but allocate name/value pairs packed and in sequence. 1489 * but allocate name/value pairs packed and in sequence.
1480 */ 1490 */
1481 xfs_attr3_leaf_moveents(leaf_s, &ichdr_s, 0, leaf_d, ichdr_d, 0, 1491 xfs_attr3_leaf_moveents(leaf_src, &ichdr_src, 0, leaf_dst, ichdr_dst, 0,
1482 ichdr_s.count, mp); 1492 ichdr_src.count, mp);
1483 /* 1493 /*
1484 * this logs the entire buffer, but the caller must write the header 1494 * this logs the entire buffer, but the caller must write the header
1485 * back to the buffer when it is finished modifying it. 1495 * back to the buffer when it is finished modifying it.
@@ -2181,14 +2191,24 @@ xfs_attr3_leaf_unbalance(
2181 struct xfs_attr_leafblock *tmp_leaf; 2191 struct xfs_attr_leafblock *tmp_leaf;
2182 struct xfs_attr3_icleaf_hdr tmphdr; 2192 struct xfs_attr3_icleaf_hdr tmphdr;
2183 2193
2184 tmp_leaf = kmem_alloc(state->blocksize, KM_SLEEP); 2194 tmp_leaf = kmem_zalloc(state->blocksize, KM_SLEEP);
2185 memset(tmp_leaf, 0, state->blocksize);
2186 memset(&tmphdr, 0, sizeof(tmphdr));
2187 2195
2196 /*
2197 * Copy the header into the temp leaf so that all the stuff
2198 * not in the incore header is present and gets copied back in
2199 * once we've moved all the entries.
2200 */
2201 memcpy(tmp_leaf, save_leaf, xfs_attr3_leaf_hdr_size(save_leaf));
2202
2203 memset(&tmphdr, 0, sizeof(tmphdr));
2188 tmphdr.magic = savehdr.magic; 2204 tmphdr.magic = savehdr.magic;
2189 tmphdr.forw = savehdr.forw; 2205 tmphdr.forw = savehdr.forw;
2190 tmphdr.back = savehdr.back; 2206 tmphdr.back = savehdr.back;
2191 tmphdr.firstused = state->blocksize; 2207 tmphdr.firstused = state->blocksize;
2208
2209 /* write the header to the temp buffer to initialise it */
2210 xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr);
2211
2192 if (xfs_attr3_leaf_order(save_blk->bp, &savehdr, 2212 if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
2193 drop_blk->bp, &drophdr)) { 2213 drop_blk->bp, &drophdr)) {
2194 xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0, 2214 xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0,
@@ -2334,8 +2354,9 @@ xfs_attr3_leaf_lookup_int(
2334 args->index = probe; 2354 args->index = probe;
2335 args->valuelen = be32_to_cpu(name_rmt->valuelen); 2355 args->valuelen = be32_to_cpu(name_rmt->valuelen);
2336 args->rmtblkno = be32_to_cpu(name_rmt->valueblk); 2356 args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
2337 args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, 2357 args->rmtblkcnt = xfs_attr3_rmt_blocks(
2338 args->valuelen); 2358 args->dp->i_mount,
2359 args->valuelen);
2339 return XFS_ERROR(EEXIST); 2360 return XFS_ERROR(EEXIST);
2340 } 2361 }
2341 } 2362 }
@@ -2386,7 +2407,8 @@ xfs_attr3_leaf_getvalue(
2386 ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); 2407 ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
2387 valuelen = be32_to_cpu(name_rmt->valuelen); 2408 valuelen = be32_to_cpu(name_rmt->valuelen);
2388 args->rmtblkno = be32_to_cpu(name_rmt->valueblk); 2409 args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
2389 args->rmtblkcnt = XFS_B_TO_FSB(args->dp->i_mount, valuelen); 2410 args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount,
2411 valuelen);
2390 if (args->flags & ATTR_KERNOVAL) { 2412 if (args->flags & ATTR_KERNOVAL) {
2391 args->valuelen = valuelen; 2413 args->valuelen = valuelen;
2392 return 0; 2414 return 0;
@@ -2712,7 +2734,8 @@ xfs_attr3_leaf_list_int(
2712 args.valuelen = valuelen; 2734 args.valuelen = valuelen;
2713 args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS); 2735 args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);
2714 args.rmtblkno = be32_to_cpu(name_rmt->valueblk); 2736 args.rmtblkno = be32_to_cpu(name_rmt->valueblk);
2715 args.rmtblkcnt = XFS_B_TO_FSB(args.dp->i_mount, valuelen); 2737 args.rmtblkcnt = xfs_attr3_rmt_blocks(
2738 args.dp->i_mount, valuelen);
2716 retval = xfs_attr_rmtval_get(&args); 2739 retval = xfs_attr_rmtval_get(&args);
2717 if (retval) 2740 if (retval)
2718 return retval; 2741 return retval;
@@ -3235,7 +3258,7 @@ xfs_attr3_leaf_inactive(
3235 name_rmt = xfs_attr3_leaf_name_remote(leaf, i); 3258 name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
3236 if (name_rmt->valueblk) { 3259 if (name_rmt->valueblk) {
3237 lp->valueblk = be32_to_cpu(name_rmt->valueblk); 3260 lp->valueblk = be32_to_cpu(name_rmt->valueblk);
3238 lp->valuelen = XFS_B_TO_FSB(dp->i_mount, 3261 lp->valuelen = xfs_attr3_rmt_blocks(dp->i_mount,
3239 be32_to_cpu(name_rmt->valuelen)); 3262 be32_to_cpu(name_rmt->valuelen));
3240 lp++; 3263 lp++;
3241 } 3264 }
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index f9d7846097e2..444a7704596c 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -128,6 +128,7 @@ struct xfs_attr3_leaf_hdr {
128 __u8 holes; 128 __u8 holes;
129 __u8 pad1; 129 __u8 pad1;
130 struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE]; 130 struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE];
131 __be32 pad2; /* 64 bit alignment */
131}; 132};
132 133
133#define XFS_ATTR3_LEAF_CRC_OFF (offsetof(struct xfs_attr3_leaf_hdr, info.crc)) 134#define XFS_ATTR3_LEAF_CRC_OFF (offsetof(struct xfs_attr3_leaf_hdr, info.crc))
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index dee84466dcc9..ef6b0c124528 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -47,22 +47,55 @@
47 * Each contiguous block has a header, so it is not just a simple attribute 47 * Each contiguous block has a header, so it is not just a simple attribute
48 * length to FSB conversion. 48 * length to FSB conversion.
49 */ 49 */
50static int 50int
51xfs_attr3_rmt_blocks( 51xfs_attr3_rmt_blocks(
52 struct xfs_mount *mp, 52 struct xfs_mount *mp,
53 int attrlen) 53 int attrlen)
54{ 54{
55 int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, 55 if (xfs_sb_version_hascrc(&mp->m_sb)) {
56 mp->m_sb.sb_blocksize); 56 int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
57 return (attrlen + buflen - 1) / buflen; 57 return (attrlen + buflen - 1) / buflen;
58 }
59 return XFS_B_TO_FSB(mp, attrlen);
60}
61
62/*
63 * Checking of the remote attribute header is split into two parts. The verifier
64 * does CRC, location and bounds checking, the unpacking function checks the
65 * attribute parameters and owner.
66 */
67static bool
68xfs_attr3_rmt_hdr_ok(
69 struct xfs_mount *mp,
70 void *ptr,
71 xfs_ino_t ino,
72 uint32_t offset,
73 uint32_t size,
74 xfs_daddr_t bno)
75{
76 struct xfs_attr3_rmt_hdr *rmt = ptr;
77
78 if (bno != be64_to_cpu(rmt->rm_blkno))
79 return false;
80 if (offset != be32_to_cpu(rmt->rm_offset))
81 return false;
82 if (size != be32_to_cpu(rmt->rm_bytes))
83 return false;
84 if (ino != be64_to_cpu(rmt->rm_owner))
85 return false;
86
87 /* ok */
88 return true;
58} 89}
59 90
60static bool 91static bool
61xfs_attr3_rmt_verify( 92xfs_attr3_rmt_verify(
62 struct xfs_buf *bp) 93 struct xfs_mount *mp,
94 void *ptr,
95 int fsbsize,
96 xfs_daddr_t bno)
63{ 97{
64 struct xfs_mount *mp = bp->b_target->bt_mount; 98 struct xfs_attr3_rmt_hdr *rmt = ptr;
65 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr;
66 99
67 if (!xfs_sb_version_hascrc(&mp->m_sb)) 100 if (!xfs_sb_version_hascrc(&mp->m_sb))
68 return false; 101 return false;
@@ -70,7 +103,9 @@ xfs_attr3_rmt_verify(
70 return false; 103 return false;
71 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid)) 104 if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid))
72 return false; 105 return false;
73 if (bp->b_bn != be64_to_cpu(rmt->rm_blkno)) 106 if (be64_to_cpu(rmt->rm_blkno) != bno)
107 return false;
108 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
74 return false; 109 return false;
75 if (be32_to_cpu(rmt->rm_offset) + 110 if (be32_to_cpu(rmt->rm_offset) +
76 be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX) 111 be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX)
@@ -86,17 +121,40 @@ xfs_attr3_rmt_read_verify(
86 struct xfs_buf *bp) 121 struct xfs_buf *bp)
87{ 122{
88 struct xfs_mount *mp = bp->b_target->bt_mount; 123 struct xfs_mount *mp = bp->b_target->bt_mount;
124 char *ptr;
125 int len;
126 bool corrupt = false;
127 xfs_daddr_t bno;
89 128
90 /* no verification of non-crc buffers */ 129 /* no verification of non-crc buffers */
91 if (!xfs_sb_version_hascrc(&mp->m_sb)) 130 if (!xfs_sb_version_hascrc(&mp->m_sb))
92 return; 131 return;
93 132
94 if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), 133 ptr = bp->b_addr;
95 XFS_ATTR3_RMT_CRC_OFF) || 134 bno = bp->b_bn;
96 !xfs_attr3_rmt_verify(bp)) { 135 len = BBTOB(bp->b_length);
136 ASSERT(len >= XFS_LBSIZE(mp));
137
138 while (len > 0) {
139 if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp),
140 XFS_ATTR3_RMT_CRC_OFF)) {
141 corrupt = true;
142 break;
143 }
144 if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
145 corrupt = true;
146 break;
147 }
148 len -= XFS_LBSIZE(mp);
149 ptr += XFS_LBSIZE(mp);
150 bno += mp->m_bsize;
151 }
152
153 if (corrupt) {
97 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 154 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
98 xfs_buf_ioerror(bp, EFSCORRUPTED); 155 xfs_buf_ioerror(bp, EFSCORRUPTED);
99 } 156 } else
157 ASSERT(len == 0);
100} 158}
101 159
102static void 160static void
@@ -105,23 +163,39 @@ xfs_attr3_rmt_write_verify(
105{ 163{
106 struct xfs_mount *mp = bp->b_target->bt_mount; 164 struct xfs_mount *mp = bp->b_target->bt_mount;
107 struct xfs_buf_log_item *bip = bp->b_fspriv; 165 struct xfs_buf_log_item *bip = bp->b_fspriv;
166 char *ptr;
167 int len;
168 xfs_daddr_t bno;
108 169
109 /* no verification of non-crc buffers */ 170 /* no verification of non-crc buffers */
110 if (!xfs_sb_version_hascrc(&mp->m_sb)) 171 if (!xfs_sb_version_hascrc(&mp->m_sb))
111 return; 172 return;
112 173
113 if (!xfs_attr3_rmt_verify(bp)) { 174 ptr = bp->b_addr;
114 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 175 bno = bp->b_bn;
115 xfs_buf_ioerror(bp, EFSCORRUPTED); 176 len = BBTOB(bp->b_length);
116 return; 177 ASSERT(len >= XFS_LBSIZE(mp));
117 } 178
179 while (len > 0) {
180 if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
181 XFS_CORRUPTION_ERROR(__func__,
182 XFS_ERRLEVEL_LOW, mp, bp->b_addr);
183 xfs_buf_ioerror(bp, EFSCORRUPTED);
184 return;
185 }
186 if (bip) {
187 struct xfs_attr3_rmt_hdr *rmt;
188
189 rmt = (struct xfs_attr3_rmt_hdr *)ptr;
190 rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
191 }
192 xfs_update_cksum(ptr, XFS_LBSIZE(mp), XFS_ATTR3_RMT_CRC_OFF);
118 193
119 if (bip) { 194 len -= XFS_LBSIZE(mp);
120 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr; 195 ptr += XFS_LBSIZE(mp);
121 rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn); 196 bno += mp->m_bsize;
122 } 197 }
123 xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), 198 ASSERT(len == 0);
124 XFS_ATTR3_RMT_CRC_OFF);
125} 199}
126 200
127const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { 201const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
@@ -129,15 +203,16 @@ const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
129 .verify_write = xfs_attr3_rmt_write_verify, 203 .verify_write = xfs_attr3_rmt_write_verify,
130}; 204};
131 205
132static int 206STATIC int
133xfs_attr3_rmt_hdr_set( 207xfs_attr3_rmt_hdr_set(
134 struct xfs_mount *mp, 208 struct xfs_mount *mp,
209 void *ptr,
135 xfs_ino_t ino, 210 xfs_ino_t ino,
136 uint32_t offset, 211 uint32_t offset,
137 uint32_t size, 212 uint32_t size,
138 struct xfs_buf *bp) 213 xfs_daddr_t bno)
139{ 214{
140 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr; 215 struct xfs_attr3_rmt_hdr *rmt = ptr;
141 216
142 if (!xfs_sb_version_hascrc(&mp->m_sb)) 217 if (!xfs_sb_version_hascrc(&mp->m_sb))
143 return 0; 218 return 0;
@@ -147,36 +222,107 @@ xfs_attr3_rmt_hdr_set(
147 rmt->rm_bytes = cpu_to_be32(size); 222 rmt->rm_bytes = cpu_to_be32(size);
148 uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid); 223 uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid);
149 rmt->rm_owner = cpu_to_be64(ino); 224 rmt->rm_owner = cpu_to_be64(ino);
150 rmt->rm_blkno = cpu_to_be64(bp->b_bn); 225 rmt->rm_blkno = cpu_to_be64(bno);
151 bp->b_ops = &xfs_attr3_rmt_buf_ops;
152 226
153 return sizeof(struct xfs_attr3_rmt_hdr); 227 return sizeof(struct xfs_attr3_rmt_hdr);
154} 228}
155 229
156/* 230/*
157 * Checking of the remote attribute header is split into two parts. the verifier 231 * Helper functions to copy attribute data in and out of the one disk extents
158 * does CRC, location and bounds checking, the unpacking function checks the
159 * attribute parameters and owner.
160 */ 232 */
161static bool 233STATIC int
162xfs_attr3_rmt_hdr_ok( 234xfs_attr_rmtval_copyout(
163 struct xfs_mount *mp, 235 struct xfs_mount *mp,
164 xfs_ino_t ino, 236 struct xfs_buf *bp,
165 uint32_t offset, 237 xfs_ino_t ino,
166 uint32_t size, 238 int *offset,
167 struct xfs_buf *bp) 239 int *valuelen,
240 char **dst)
168{ 241{
169 struct xfs_attr3_rmt_hdr *rmt = bp->b_addr; 242 char *src = bp->b_addr;
243 xfs_daddr_t bno = bp->b_bn;
244 int len = BBTOB(bp->b_length);
170 245
171 if (offset != be32_to_cpu(rmt->rm_offset)) 246 ASSERT(len >= XFS_LBSIZE(mp));
172 return false;
173 if (size != be32_to_cpu(rmt->rm_bytes))
174 return false;
175 if (ino != be64_to_cpu(rmt->rm_owner))
176 return false;
177 247
178 /* ok */ 248 while (len > 0 && *valuelen > 0) {
179 return true; 249 int hdr_size = 0;
250 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
251
252 byte_cnt = min_t(int, *valuelen, byte_cnt);
253
254 if (xfs_sb_version_hascrc(&mp->m_sb)) {
255 if (!xfs_attr3_rmt_hdr_ok(mp, src, ino, *offset,
256 byte_cnt, bno)) {
257 xfs_alert(mp,
258"remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)",
259 bno, *offset, byte_cnt, ino);
260 return EFSCORRUPTED;
261 }
262 hdr_size = sizeof(struct xfs_attr3_rmt_hdr);
263 }
264
265 memcpy(*dst, src + hdr_size, byte_cnt);
266
267 /* roll buffer forwards */
268 len -= XFS_LBSIZE(mp);
269 src += XFS_LBSIZE(mp);
270 bno += mp->m_bsize;
271
272 /* roll attribute data forwards */
273 *valuelen -= byte_cnt;
274 *dst += byte_cnt;
275 *offset += byte_cnt;
276 }
277 return 0;
278}
279
280STATIC void
281xfs_attr_rmtval_copyin(
282 struct xfs_mount *mp,
283 struct xfs_buf *bp,
284 xfs_ino_t ino,
285 int *offset,
286 int *valuelen,
287 char **src)
288{
289 char *dst = bp->b_addr;
290 xfs_daddr_t bno = bp->b_bn;
291 int len = BBTOB(bp->b_length);
292
293 ASSERT(len >= XFS_LBSIZE(mp));
294
295 while (len > 0 && *valuelen > 0) {
296 int hdr_size;
297 int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp));
298
299 byte_cnt = min(*valuelen, byte_cnt);
300 hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
301 byte_cnt, bno);
302
303 memcpy(dst + hdr_size, *src, byte_cnt);
304
305 /*
306 * If this is the last block, zero the remainder of it.
307 * Check that we are actually the last block, too.
308 */
309 if (byte_cnt + hdr_size < XFS_LBSIZE(mp)) {
310 ASSERT(*valuelen - byte_cnt == 0);
311 ASSERT(len == XFS_LBSIZE(mp));
312 memset(dst + hdr_size + byte_cnt, 0,
313 XFS_LBSIZE(mp) - hdr_size - byte_cnt);
314 }
315
316 /* roll buffer forwards */
317 len -= XFS_LBSIZE(mp);
318 dst += XFS_LBSIZE(mp);
319 bno += mp->m_bsize;
320
321 /* roll attribute data forwards */
322 *valuelen -= byte_cnt;
323 *src += byte_cnt;
324 *offset += byte_cnt;
325 }
180} 326}
181 327
182/* 328/*
@@ -190,13 +336,12 @@ xfs_attr_rmtval_get(
190 struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE]; 336 struct xfs_bmbt_irec map[ATTR_RMTVALUE_MAPSIZE];
191 struct xfs_mount *mp = args->dp->i_mount; 337 struct xfs_mount *mp = args->dp->i_mount;
192 struct xfs_buf *bp; 338 struct xfs_buf *bp;
193 xfs_daddr_t dblkno;
194 xfs_dablk_t lblkno = args->rmtblkno; 339 xfs_dablk_t lblkno = args->rmtblkno;
195 void *dst = args->value; 340 char *dst = args->value;
196 int valuelen = args->valuelen; 341 int valuelen = args->valuelen;
197 int nmap; 342 int nmap;
198 int error; 343 int error;
199 int blkcnt; 344 int blkcnt = args->rmtblkcnt;
200 int i; 345 int i;
201 int offset = 0; 346 int offset = 0;
202 347
@@ -207,52 +352,36 @@ xfs_attr_rmtval_get(
207 while (valuelen > 0) { 352 while (valuelen > 0) {
208 nmap = ATTR_RMTVALUE_MAPSIZE; 353 nmap = ATTR_RMTVALUE_MAPSIZE;
209 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 354 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
210 args->rmtblkcnt, map, &nmap, 355 blkcnt, map, &nmap,
211 XFS_BMAPI_ATTRFORK); 356 XFS_BMAPI_ATTRFORK);
212 if (error) 357 if (error)
213 return error; 358 return error;
214 ASSERT(nmap >= 1); 359 ASSERT(nmap >= 1);
215 360
216 for (i = 0; (i < nmap) && (valuelen > 0); i++) { 361 for (i = 0; (i < nmap) && (valuelen > 0); i++) {
217 int byte_cnt; 362 xfs_daddr_t dblkno;
218 char *src; 363 int dblkcnt;
219 364
220 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) && 365 ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
221 (map[i].br_startblock != HOLESTARTBLOCK)); 366 (map[i].br_startblock != HOLESTARTBLOCK));
222 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 367 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
223 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 368 dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
224 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, 369 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
225 dblkno, blkcnt, 0, &bp, 370 dblkno, dblkcnt, 0, &bp,
226 &xfs_attr3_rmt_buf_ops); 371 &xfs_attr3_rmt_buf_ops);
227 if (error) 372 if (error)
228 return error; 373 return error;
229 374
230 byte_cnt = min_t(int, valuelen, BBTOB(bp->b_length)); 375 error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
231 byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt); 376 &offset, &valuelen,
232 377 &dst);
233 src = bp->b_addr;
234 if (xfs_sb_version_hascrc(&mp->m_sb)) {
235 if (!xfs_attr3_rmt_hdr_ok(mp, args->dp->i_ino,
236 offset, byte_cnt, bp)) {
237 xfs_alert(mp,
238"remote attribute header does not match required off/len/owner (0x%x/Ox%x,0x%llx)",
239 offset, byte_cnt, args->dp->i_ino);
240 xfs_buf_relse(bp);
241 return EFSCORRUPTED;
242
243 }
244
245 src += sizeof(struct xfs_attr3_rmt_hdr);
246 }
247
248 memcpy(dst, src, byte_cnt);
249 xfs_buf_relse(bp); 378 xfs_buf_relse(bp);
379 if (error)
380 return error;
250 381
251 offset += byte_cnt; 382 /* roll attribute extent map forwards */
252 dst += byte_cnt;
253 valuelen -= byte_cnt;
254
255 lblkno += map[i].br_blockcount; 383 lblkno += map[i].br_blockcount;
384 blkcnt -= map[i].br_blockcount;
256 } 385 }
257 } 386 }
258 ASSERT(valuelen == 0); 387 ASSERT(valuelen == 0);
@@ -270,17 +399,13 @@ xfs_attr_rmtval_set(
270 struct xfs_inode *dp = args->dp; 399 struct xfs_inode *dp = args->dp;
271 struct xfs_mount *mp = dp->i_mount; 400 struct xfs_mount *mp = dp->i_mount;
272 struct xfs_bmbt_irec map; 401 struct xfs_bmbt_irec map;
273 struct xfs_buf *bp;
274 xfs_daddr_t dblkno;
275 xfs_dablk_t lblkno; 402 xfs_dablk_t lblkno;
276 xfs_fileoff_t lfileoff = 0; 403 xfs_fileoff_t lfileoff = 0;
277 void *src = args->value; 404 char *src = args->value;
278 int blkcnt; 405 int blkcnt;
279 int valuelen; 406 int valuelen;
280 int nmap; 407 int nmap;
281 int error; 408 int error;
282 int hdrcnt = 0;
283 bool crcs = xfs_sb_version_hascrc(&mp->m_sb);
284 int offset = 0; 409 int offset = 0;
285 410
286 trace_xfs_attr_rmtval_set(args); 411 trace_xfs_attr_rmtval_set(args);
@@ -289,24 +414,14 @@ xfs_attr_rmtval_set(
289 * Find a "hole" in the attribute address space large enough for 414 * Find a "hole" in the attribute address space large enough for
290 * us to drop the new attribute's value into. Because CRC enable 415 * us to drop the new attribute's value into. Because CRC enable
291 * attributes have headers, we can't just do a straight byte to FSB 416 * attributes have headers, we can't just do a straight byte to FSB
292 * conversion. We calculate the worst case block count in this case 417 * conversion and have to take the header space into account.
293 * and we may not need that many, so we have to handle this when
294 * allocating the blocks below.
295 */ 418 */
296 if (!crcs) 419 blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
297 blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
298 else
299 blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
300
301 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, 420 error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
302 XFS_ATTR_FORK); 421 XFS_ATTR_FORK);
303 if (error) 422 if (error)
304 return error; 423 return error;
305 424
306 /* Start with the attribute data. We'll allocate the rest afterwards. */
307 if (crcs)
308 blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
309
310 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff; 425 args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
311 args->rmtblkcnt = blkcnt; 426 args->rmtblkcnt = blkcnt;
312 427
@@ -349,26 +464,6 @@ xfs_attr_rmtval_set(
349 (map.br_startblock != HOLESTARTBLOCK)); 464 (map.br_startblock != HOLESTARTBLOCK));
350 lblkno += map.br_blockcount; 465 lblkno += map.br_blockcount;
351 blkcnt -= map.br_blockcount; 466 blkcnt -= map.br_blockcount;
352 hdrcnt++;
353
354 /*
355 * If we have enough blocks for the attribute data, calculate
356 * how many extra blocks we need for headers. We might run
357 * through this multiple times in the case that the additional
358 * headers in the blocks needed for the data fragments spills
359 * into requiring more blocks. e.g. for 512 byte blocks, we'll
360 * spill for another block every 9 headers we require in this
361 * loop.
362 */
363 if (crcs && blkcnt == 0) {
364 int total_len;
365
366 total_len = args->valuelen +
367 hdrcnt * sizeof(struct xfs_attr3_rmt_hdr);
368 blkcnt = XFS_B_TO_FSB(mp, total_len);
369 blkcnt -= args->rmtblkcnt;
370 args->rmtblkcnt += blkcnt;
371 }
372 467
373 /* 468 /*
374 * Start the next trans in the chain. 469 * Start the next trans in the chain.
@@ -385,18 +480,19 @@ xfs_attr_rmtval_set(
385 * the INCOMPLETE flag. 480 * the INCOMPLETE flag.
386 */ 481 */
387 lblkno = args->rmtblkno; 482 lblkno = args->rmtblkno;
483 blkcnt = args->rmtblkcnt;
388 valuelen = args->valuelen; 484 valuelen = args->valuelen;
389 while (valuelen > 0) { 485 while (valuelen > 0) {
390 int byte_cnt; 486 struct xfs_buf *bp;
391 char *buf; 487 xfs_daddr_t dblkno;
488 int dblkcnt;
489
490 ASSERT(blkcnt > 0);
392 491
393 /*
394 * Try to remember where we decided to put the value.
395 */
396 xfs_bmap_init(args->flist, args->firstblock); 492 xfs_bmap_init(args->flist, args->firstblock);
397 nmap = 1; 493 nmap = 1;
398 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno, 494 error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
399 args->rmtblkcnt, &map, &nmap, 495 blkcnt, &map, &nmap,
400 XFS_BMAPI_ATTRFORK); 496 XFS_BMAPI_ATTRFORK);
401 if (error) 497 if (error)
402 return(error); 498 return(error);
@@ -405,41 +501,27 @@ xfs_attr_rmtval_set(
405 (map.br_startblock != HOLESTARTBLOCK)); 501 (map.br_startblock != HOLESTARTBLOCK));
406 502
407 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 503 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
408 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 504 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
409 505
410 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt, 0); 506 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
411 if (!bp) 507 if (!bp)
412 return ENOMEM; 508 return ENOMEM;
413 bp->b_ops = &xfs_attr3_rmt_buf_ops; 509 bp->b_ops = &xfs_attr3_rmt_buf_ops;
414 510
415 byte_cnt = BBTOB(bp->b_length); 511 xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
416 byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, byte_cnt); 512 &valuelen, &src);
417 if (valuelen < byte_cnt)
418 byte_cnt = valuelen;
419
420 buf = bp->b_addr;
421 buf += xfs_attr3_rmt_hdr_set(mp, dp->i_ino, offset,
422 byte_cnt, bp);
423 memcpy(buf, src, byte_cnt);
424
425 if (byte_cnt < BBTOB(bp->b_length))
426 xfs_buf_zero(bp, byte_cnt,
427 BBTOB(bp->b_length) - byte_cnt);
428 513
429 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */ 514 error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
430 xfs_buf_relse(bp); 515 xfs_buf_relse(bp);
431 if (error) 516 if (error)
432 return error; 517 return error;
433 518
434 src += byte_cnt;
435 valuelen -= byte_cnt;
436 offset += byte_cnt;
437 hdrcnt--;
438 519
520 /* roll attribute extent map forwards */
439 lblkno += map.br_blockcount; 521 lblkno += map.br_blockcount;
522 blkcnt -= map.br_blockcount;
440 } 523 }
441 ASSERT(valuelen == 0); 524 ASSERT(valuelen == 0);
442 ASSERT(hdrcnt == 0);
443 return 0; 525 return 0;
444} 526}
445 527
@@ -448,33 +530,40 @@ xfs_attr_rmtval_set(
448 * out-of-line buffer that it is stored on. 530 * out-of-line buffer that it is stored on.
449 */ 531 */
450int 532int
451xfs_attr_rmtval_remove(xfs_da_args_t *args) 533xfs_attr_rmtval_remove(
534 struct xfs_da_args *args)
452{ 535{
453 xfs_mount_t *mp; 536 struct xfs_mount *mp = args->dp->i_mount;
454 xfs_bmbt_irec_t map; 537 xfs_dablk_t lblkno;
455 xfs_buf_t *bp; 538 int blkcnt;
456 xfs_daddr_t dblkno; 539 int error;
457 xfs_dablk_t lblkno; 540 int done;
458 int valuelen, blkcnt, nmap, error, done, committed;
459 541
460 trace_xfs_attr_rmtval_remove(args); 542 trace_xfs_attr_rmtval_remove(args);
461 543
462 mp = args->dp->i_mount;
463
464 /* 544 /*
465 * Roll through the "value", invalidating the attribute value's 545 * Roll through the "value", invalidating the attribute value's blocks.
466 * blocks. 546 * Note that args->rmtblkcnt is the minimum number of data blocks we'll
547 * see for a CRC enabled remote attribute. Each extent will have a
548 * header, and so we may have more blocks than we realise here. If we
549 * fail to map the blocks correctly, we'll have problems with the buffer
550 * lookups.
467 */ 551 */
468 lblkno = args->rmtblkno; 552 lblkno = args->rmtblkno;
469 valuelen = args->rmtblkcnt; 553 blkcnt = args->rmtblkcnt;
470 while (valuelen > 0) { 554 while (blkcnt > 0) {
555 struct xfs_bmbt_irec map;
556 struct xfs_buf *bp;
557 xfs_daddr_t dblkno;
558 int dblkcnt;
559 int nmap;
560
471 /* 561 /*
472 * Try to remember where we decided to put the value. 562 * Try to remember where we decided to put the value.
473 */ 563 */
474 nmap = 1; 564 nmap = 1;
475 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, 565 error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
476 args->rmtblkcnt, &map, &nmap, 566 blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
477 XFS_BMAPI_ATTRFORK);
478 if (error) 567 if (error)
479 return(error); 568 return(error);
480 ASSERT(nmap == 1); 569 ASSERT(nmap == 1);
@@ -482,21 +571,20 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
482 (map.br_startblock != HOLESTARTBLOCK)); 571 (map.br_startblock != HOLESTARTBLOCK));
483 572
484 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 573 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
485 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 574 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
486 575
487 /* 576 /*
488 * If the "remote" value is in the cache, remove it. 577 * If the "remote" value is in the cache, remove it.
489 */ 578 */
490 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK); 579 bp = xfs_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
491 if (bp) { 580 if (bp) {
492 xfs_buf_stale(bp); 581 xfs_buf_stale(bp);
493 xfs_buf_relse(bp); 582 xfs_buf_relse(bp);
494 bp = NULL; 583 bp = NULL;
495 } 584 }
496 585
497 valuelen -= map.br_blockcount;
498
499 lblkno += map.br_blockcount; 586 lblkno += map.br_blockcount;
587 blkcnt -= map.br_blockcount;
500 } 588 }
501 589
502 /* 590 /*
@@ -506,6 +594,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
506 blkcnt = args->rmtblkcnt; 594 blkcnt = args->rmtblkcnt;
507 done = 0; 595 done = 0;
508 while (!done) { 596 while (!done) {
597 int committed;
598
509 xfs_bmap_init(args->flist, args->firstblock); 599 xfs_bmap_init(args->flist, args->firstblock);
510 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, 600 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
511 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA, 601 XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
diff --git a/fs/xfs/xfs_attr_remote.h b/fs/xfs/xfs_attr_remote.h
index c7cca60a062a..92a8fd7977cc 100644
--- a/fs/xfs/xfs_attr_remote.h
+++ b/fs/xfs/xfs_attr_remote.h
@@ -20,6 +20,14 @@
20 20
21#define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */ 21#define XFS_ATTR3_RMT_MAGIC 0x5841524d /* XARM */
22 22
23/*
24 * There is one of these headers per filesystem block in a remote attribute.
25 * This is done to ensure there is a 1:1 mapping between the attribute value
26 * length and the number of blocks needed to store the attribute. This makes the
27 * verification of a buffer a little more complex, but greatly simplifies the
28 * allocation, reading and writing of these attributes as we don't have to guess
29 * the number of blocks needed to store the attribute data.
30 */
23struct xfs_attr3_rmt_hdr { 31struct xfs_attr3_rmt_hdr {
24 __be32 rm_magic; 32 __be32 rm_magic;
25 __be32 rm_offset; 33 __be32 rm_offset;
@@ -39,6 +47,8 @@ struct xfs_attr3_rmt_hdr {
39 47
40extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops; 48extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
41 49
50int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen);
51
42int xfs_attr_rmtval_get(struct xfs_da_args *args); 52int xfs_attr_rmtval_get(struct xfs_da_args *args);
43int xfs_attr_rmtval_set(struct xfs_da_args *args); 53int xfs_attr_rmtval_set(struct xfs_da_args *args);
44int xfs_attr_rmtval_remove(struct xfs_da_args *args); 54int xfs_attr_rmtval_remove(struct xfs_da_args *args);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 8804b8a3c310..0903960410a2 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -2544,7 +2544,17 @@ xfs_btree_new_iroot(
2544 if (error) 2544 if (error)
2545 goto error0; 2545 goto error0;
2546 2546
2547 /*
2548 * we can't just memcpy() the root in for CRC enabled btree blocks.
2549 * In that case have to also ensure the blkno remains correct
2550 */
2547 memcpy(cblock, block, xfs_btree_block_len(cur)); 2551 memcpy(cblock, block, xfs_btree_block_len(cur));
2552 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
2553 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
2554 cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn);
2555 else
2556 cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn);
2557 }
2548 2558
2549 be16_add_cpu(&block->bb_level, 1); 2559 be16_add_cpu(&block->bb_level, 1);
2550 xfs_btree_set_numrecs(block, 1); 2560 xfs_btree_set_numrecs(block, 1);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 0d2554299688..1b2472a46e46 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -513,6 +513,7 @@ _xfs_buf_find(
513 xfs_alert(btp->bt_mount, 513 xfs_alert(btp->bt_mount,
514 "%s: Block out of range: block 0x%llx, EOFS 0x%llx ", 514 "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
515 __func__, blkno, eofs); 515 __func__, blkno, eofs);
516 WARN_ON(1);
516 return NULL; 517 return NULL;
517 } 518 }
518 519
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index cf263476d6b4..4ec431777048 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -262,12 +262,7 @@ xfs_buf_item_format_segment(
262 vecp->i_addr = xfs_buf_offset(bp, buffer_offset); 262 vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
263 vecp->i_len = nbits * XFS_BLF_CHUNK; 263 vecp->i_len = nbits * XFS_BLF_CHUNK;
264 vecp->i_type = XLOG_REG_TYPE_BCHUNK; 264 vecp->i_type = XLOG_REG_TYPE_BCHUNK;
265/* 265 nvecs++;
266 * You would think we need to bump the nvecs here too, but we do not
267 * this number is used by recovery, and it gets confused by the boundary
268 * split here
269 * nvecs++;
270 */
271 vecp++; 266 vecp++;
272 first_bit = next_bit; 267 first_bit = next_bit;
273 last_bit = next_bit; 268 last_bit = next_bit;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index f852b082a084..c407e1ccff43 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -219,6 +219,14 @@ xfs_swap_extents(
219 int taforkblks = 0; 219 int taforkblks = 0;
220 __uint64_t tmp; 220 __uint64_t tmp;
221 221
222 /*
223 * We have no way of updating owner information in the BMBT blocks for
224 * each inode on CRC enabled filesystems, so to avoid corrupting the
225 * this metadata we simply don't allow extent swaps to occur.
226 */
227 if (xfs_sb_version_hascrc(&mp->m_sb))
228 return XFS_ERROR(EINVAL);
229
222 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); 230 tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
223 if (!tempifp) { 231 if (!tempifp) {
224 error = XFS_ERROR(ENOMEM); 232 error = XFS_ERROR(ENOMEM);
diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_dir2_format.h
index a3b1bd841a80..7826782b8d78 100644
--- a/fs/xfs/xfs_dir2_format.h
+++ b/fs/xfs/xfs_dir2_format.h
@@ -266,6 +266,7 @@ struct xfs_dir3_blk_hdr {
266struct xfs_dir3_data_hdr { 266struct xfs_dir3_data_hdr {
267 struct xfs_dir3_blk_hdr hdr; 267 struct xfs_dir3_blk_hdr hdr;
268 xfs_dir2_data_free_t best_free[XFS_DIR2_DATA_FD_COUNT]; 268 xfs_dir2_data_free_t best_free[XFS_DIR2_DATA_FD_COUNT];
269 __be32 pad; /* 64 bit alignment */
269}; 270};
270 271
271#define XFS_DIR3_DATA_CRC_OFF offsetof(struct xfs_dir3_data_hdr, hdr.crc) 272#define XFS_DIR3_DATA_CRC_OFF offsetof(struct xfs_dir3_data_hdr, hdr.crc)
@@ -477,7 +478,7 @@ struct xfs_dir3_leaf_hdr {
477 struct xfs_da3_blkinfo info; /* header for da routines */ 478 struct xfs_da3_blkinfo info; /* header for da routines */
478 __be16 count; /* count of entries */ 479 __be16 count; /* count of entries */
479 __be16 stale; /* count of stale entries */ 480 __be16 stale; /* count of stale entries */
480 __be32 pad; 481 __be32 pad; /* 64 bit alignment */
481}; 482};
482 483
483struct xfs_dir3_icleaf_hdr { 484struct xfs_dir3_icleaf_hdr {
@@ -715,6 +716,7 @@ struct xfs_dir3_free_hdr {
715 __be32 firstdb; /* db of first entry */ 716 __be32 firstdb; /* db of first entry */
716 __be32 nvalid; /* count of valid entries */ 717 __be32 nvalid; /* count of valid entries */
717 __be32 nused; /* count of used entries */ 718 __be32 nused; /* count of used entries */
719 __be32 pad; /* 64 bit alignment */
718}; 720};
719 721
720struct xfs_dir3_free { 722struct xfs_dir3_free {
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 5246de4912d4..2226a00acd15 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -263,18 +263,19 @@ xfs_dir3_free_get_buf(
263 * Initialize the new block to be empty, and remember 263 * Initialize the new block to be empty, and remember
264 * its first slot as our empty slot. 264 * its first slot as our empty slot.
265 */ 265 */
266 hdr.magic = XFS_DIR2_FREE_MAGIC; 266 memset(bp->b_addr, 0, sizeof(struct xfs_dir3_free_hdr));
267 hdr.firstdb = 0; 267 memset(&hdr, 0, sizeof(hdr));
268 hdr.nused = 0; 268
269 hdr.nvalid = 0;
270 if (xfs_sb_version_hascrc(&mp->m_sb)) { 269 if (xfs_sb_version_hascrc(&mp->m_sb)) {
271 struct xfs_dir3_free_hdr *hdr3 = bp->b_addr; 270 struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
272 271
273 hdr.magic = XFS_DIR3_FREE_MAGIC; 272 hdr.magic = XFS_DIR3_FREE_MAGIC;
273
274 hdr3->hdr.blkno = cpu_to_be64(bp->b_bn); 274 hdr3->hdr.blkno = cpu_to_be64(bp->b_bn);
275 hdr3->hdr.owner = cpu_to_be64(dp->i_ino); 275 hdr3->hdr.owner = cpu_to_be64(dp->i_ino);
276 uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid); 276 uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid);
277 } 277 } else
278 hdr.magic = XFS_DIR2_FREE_MAGIC;
278 xfs_dir3_free_hdr_to_disk(bp->b_addr, &hdr); 279 xfs_dir3_free_hdr_to_disk(bp->b_addr, &hdr);
279 *bpp = bp; 280 *bpp = bp;
280 return 0; 281 return 0;
@@ -1921,8 +1922,6 @@ xfs_dir2_node_addname_int(
1921 */ 1922 */
1922 freehdr.firstdb = (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) * 1923 freehdr.firstdb = (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
1923 xfs_dir3_free_max_bests(mp); 1924 xfs_dir3_free_max_bests(mp);
1924 free->hdr.nvalid = 0;
1925 free->hdr.nused = 0;
1926 } else { 1925 } else {
1927 free = fbp->b_addr; 1926 free = fbp->b_addr;
1928 bests = xfs_dir3_free_bests_p(mp, free); 1927 bests = xfs_dir3_free_bests_p(mp, free);
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index a41f8bf1da37..044e97a33c8d 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -249,8 +249,11 @@ xfs_qm_init_dquot_blk(
249 d->dd_diskdq.d_version = XFS_DQUOT_VERSION; 249 d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
250 d->dd_diskdq.d_id = cpu_to_be32(curid); 250 d->dd_diskdq.d_id = cpu_to_be32(curid);
251 d->dd_diskdq.d_flags = type; 251 d->dd_diskdq.d_flags = type;
252 if (xfs_sb_version_hascrc(&mp->m_sb)) 252 if (xfs_sb_version_hascrc(&mp->m_sb)) {
253 uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); 253 uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
254 xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
255 XFS_DQUOT_CRC_OFF);
256 }
254 } 257 }
255 258
256 xfs_trans_dquot_buf(tp, bp, 259 xfs_trans_dquot_buf(tp, bp,
@@ -286,23 +289,6 @@ xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
286 dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5; 289 dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
287} 290}
288 291
289STATIC void
290xfs_dquot_buf_calc_crc(
291 struct xfs_mount *mp,
292 struct xfs_buf *bp)
293{
294 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
295 int i;
296
297 if (!xfs_sb_version_hascrc(&mp->m_sb))
298 return;
299
300 for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++, d++) {
301 xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
302 offsetof(struct xfs_dqblk, dd_crc));
303 }
304}
305
306STATIC bool 292STATIC bool
307xfs_dquot_buf_verify_crc( 293xfs_dquot_buf_verify_crc(
308 struct xfs_mount *mp, 294 struct xfs_mount *mp,
@@ -328,12 +314,11 @@ xfs_dquot_buf_verify_crc(
328 314
329 for (i = 0; i < ndquots; i++, d++) { 315 for (i = 0; i < ndquots; i++, d++) {
330 if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), 316 if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
331 offsetof(struct xfs_dqblk, dd_crc))) 317 XFS_DQUOT_CRC_OFF))
332 return false; 318 return false;
333 if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid)) 319 if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid))
334 return false; 320 return false;
335 } 321 }
336
337 return true; 322 return true;
338} 323}
339 324
@@ -393,6 +378,11 @@ xfs_dquot_buf_read_verify(
393 } 378 }
394} 379}
395 380
381/*
382 * we don't calculate the CRC here as that is done when the dquot is flushed to
383 * the buffer after the update is done. This ensures that the dquot in the
384 * buffer always has an up-to-date CRC value.
385 */
396void 386void
397xfs_dquot_buf_write_verify( 387xfs_dquot_buf_write_verify(
398 struct xfs_buf *bp) 388 struct xfs_buf *bp)
@@ -404,7 +394,6 @@ xfs_dquot_buf_write_verify(
404 xfs_buf_ioerror(bp, EFSCORRUPTED); 394 xfs_buf_ioerror(bp, EFSCORRUPTED);
405 return; 395 return;
406 } 396 }
407 xfs_dquot_buf_calc_crc(mp, bp);
408} 397}
409 398
410const struct xfs_buf_ops xfs_dquot_buf_ops = { 399const struct xfs_buf_ops xfs_dquot_buf_ops = {
@@ -1151,11 +1140,17 @@ xfs_qm_dqflush(
1151 * copy the lsn into the on-disk dquot now while we have the in memory 1140 * copy the lsn into the on-disk dquot now while we have the in memory
1152 * dquot here. This can't be done later in the write verifier as we 1141 * dquot here. This can't be done later in the write verifier as we
1153 * can't get access to the log item at that point in time. 1142 * can't get access to the log item at that point in time.
1143 *
1144 * We also calculate the CRC here so that the on-disk dquot in the
1145 * buffer always has a valid CRC. This ensures there is no possibility
1146 * of a dquot without an up-to-date CRC getting to disk.
1154 */ 1147 */
1155 if (xfs_sb_version_hascrc(&mp->m_sb)) { 1148 if (xfs_sb_version_hascrc(&mp->m_sb)) {
1156 struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddqp; 1149 struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddqp;
1157 1150
1158 dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn); 1151 dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
1152 xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk),
1153 XFS_DQUOT_CRC_OFF);
1159 } 1154 }
1160 1155
1161 /* 1156 /*
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 6dda3f949b04..d04695545397 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -236,6 +236,7 @@ typedef struct xfs_fsop_resblks {
236#define XFS_FSOP_GEOM_FLAGS_PROJID32 0x0800 /* 32-bit project IDs */ 236#define XFS_FSOP_GEOM_FLAGS_PROJID32 0x0800 /* 32-bit project IDs */
237#define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */ 237#define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */
238#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ 238#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */
239#define XFS_FSOP_GEOM_FLAGS_V5SB 0x8000 /* version 5 superblock */
239 240
240 241
241/* 242/*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 87595b211da1..3c3644ea825b 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -99,7 +99,9 @@ xfs_fs_geometry(
99 (xfs_sb_version_hasattr2(&mp->m_sb) ? 99 (xfs_sb_version_hasattr2(&mp->m_sb) ?
100 XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) | 100 XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) |
101 (xfs_sb_version_hasprojid32bit(&mp->m_sb) ? 101 (xfs_sb_version_hasprojid32bit(&mp->m_sb) ?
102 XFS_FSOP_GEOM_FLAGS_PROJID32 : 0); 102 XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) |
103 (xfs_sb_version_hascrc(&mp->m_sb) ?
104 XFS_FSOP_GEOM_FLAGS_V5SB : 0);
103 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ? 105 geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
104 mp->m_sb.sb_logsectsize : BBSIZE; 106 mp->m_sb.sb_logsectsize : BBSIZE;
105 geo->rtsectsize = mp->m_sb.sb_blocksize; 107 geo->rtsectsize = mp->m_sb.sb_blocksize;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index efbe1accb6ca..7f7be5f98f52 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1638,6 +1638,10 @@ xfs_iunlink(
1638 dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; 1638 dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
1639 offset = ip->i_imap.im_boffset + 1639 offset = ip->i_imap.im_boffset +
1640 offsetof(xfs_dinode_t, di_next_unlinked); 1640 offsetof(xfs_dinode_t, di_next_unlinked);
1641
1642 /* need to recalc the inode CRC if appropriate */
1643 xfs_dinode_calc_crc(mp, dip);
1644
1641 xfs_trans_inode_buf(tp, ibp); 1645 xfs_trans_inode_buf(tp, ibp);
1642 xfs_trans_log_buf(tp, ibp, offset, 1646 xfs_trans_log_buf(tp, ibp, offset,
1643 (offset + sizeof(xfs_agino_t) - 1)); 1647 (offset + sizeof(xfs_agino_t) - 1));
@@ -1723,6 +1727,10 @@ xfs_iunlink_remove(
1723 dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 1727 dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
1724 offset = ip->i_imap.im_boffset + 1728 offset = ip->i_imap.im_boffset +
1725 offsetof(xfs_dinode_t, di_next_unlinked); 1729 offsetof(xfs_dinode_t, di_next_unlinked);
1730
1731 /* need to recalc the inode CRC if appropriate */
1732 xfs_dinode_calc_crc(mp, dip);
1733
1726 xfs_trans_inode_buf(tp, ibp); 1734 xfs_trans_inode_buf(tp, ibp);
1727 xfs_trans_log_buf(tp, ibp, offset, 1735 xfs_trans_log_buf(tp, ibp, offset,
1728 (offset + sizeof(xfs_agino_t) - 1)); 1736 (offset + sizeof(xfs_agino_t) - 1));
@@ -1796,6 +1804,10 @@ xfs_iunlink_remove(
1796 dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 1804 dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
1797 offset = ip->i_imap.im_boffset + 1805 offset = ip->i_imap.im_boffset +
1798 offsetof(xfs_dinode_t, di_next_unlinked); 1806 offsetof(xfs_dinode_t, di_next_unlinked);
1807
1808 /* need to recalc the inode CRC if appropriate */
1809 xfs_dinode_calc_crc(mp, dip);
1810
1799 xfs_trans_inode_buf(tp, ibp); 1811 xfs_trans_inode_buf(tp, ibp);
1800 xfs_trans_log_buf(tp, ibp, offset, 1812 xfs_trans_log_buf(tp, ibp, offset,
1801 (offset + sizeof(xfs_agino_t) - 1)); 1813 (offset + sizeof(xfs_agino_t) - 1));
@@ -1809,6 +1821,10 @@ xfs_iunlink_remove(
1809 last_dip->di_next_unlinked = cpu_to_be32(next_agino); 1821 last_dip->di_next_unlinked = cpu_to_be32(next_agino);
1810 ASSERT(next_agino != 0); 1822 ASSERT(next_agino != 0);
1811 offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); 1823 offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked);
1824
1825 /* need to recalc the inode CRC if appropriate */
1826 xfs_dinode_calc_crc(mp, last_dip);
1827
1812 xfs_trans_inode_buf(tp, last_ibp); 1828 xfs_trans_inode_buf(tp, last_ibp);
1813 xfs_trans_log_buf(tp, last_ibp, offset, 1829 xfs_trans_log_buf(tp, last_ibp, offset,
1814 (offset + sizeof(xfs_agino_t) - 1)); 1830 (offset + sizeof(xfs_agino_t) - 1));
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index d82efaa2ac73..ca9ecaa81112 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -455,6 +455,28 @@ xfs_vn_getattr(
455 return 0; 455 return 0;
456} 456}
457 457
458static void
459xfs_setattr_mode(
460 struct xfs_trans *tp,
461 struct xfs_inode *ip,
462 struct iattr *iattr)
463{
464 struct inode *inode = VFS_I(ip);
465 umode_t mode = iattr->ia_mode;
466
467 ASSERT(tp);
468 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
469
470 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
471 mode &= ~S_ISGID;
472
473 ip->i_d.di_mode &= S_IFMT;
474 ip->i_d.di_mode |= mode & ~S_IFMT;
475
476 inode->i_mode &= S_IFMT;
477 inode->i_mode |= mode & ~S_IFMT;
478}
479
458int 480int
459xfs_setattr_nonsize( 481xfs_setattr_nonsize(
460 struct xfs_inode *ip, 482 struct xfs_inode *ip,
@@ -606,18 +628,8 @@ xfs_setattr_nonsize(
606 /* 628 /*
607 * Change file access modes. 629 * Change file access modes.
608 */ 630 */
609 if (mask & ATTR_MODE) { 631 if (mask & ATTR_MODE)
610 umode_t mode = iattr->ia_mode; 632 xfs_setattr_mode(tp, ip, iattr);
611
612 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
613 mode &= ~S_ISGID;
614
615 ip->i_d.di_mode &= S_IFMT;
616 ip->i_d.di_mode |= mode & ~S_IFMT;
617
618 inode->i_mode &= S_IFMT;
619 inode->i_mode |= mode & ~S_IFMT;
620 }
621 633
622 /* 634 /*
623 * Change file access or modified times. 635 * Change file access or modified times.
@@ -714,9 +726,8 @@ xfs_setattr_size(
714 return XFS_ERROR(error); 726 return XFS_ERROR(error);
715 727
716 ASSERT(S_ISREG(ip->i_d.di_mode)); 728 ASSERT(S_ISREG(ip->i_d.di_mode));
717 ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| 729 ASSERT((mask & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
718 ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| 730 ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
719 ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
720 731
721 if (!(flags & XFS_ATTR_NOLOCK)) { 732 if (!(flags & XFS_ATTR_NOLOCK)) {
722 lock_flags |= XFS_IOLOCK_EXCL; 733 lock_flags |= XFS_IOLOCK_EXCL;
@@ -860,6 +871,12 @@ xfs_setattr_size(
860 xfs_inode_clear_eofblocks_tag(ip); 871 xfs_inode_clear_eofblocks_tag(ip);
861 } 872 }
862 873
874 /*
875 * Change file access modes.
876 */
877 if (mask & ATTR_MODE)
878 xfs_setattr_mode(tp, ip, iattr);
879
863 if (mask & ATTR_CTIME) { 880 if (mask & ATTR_CTIME) {
864 inode->i_ctime = iattr->ia_ctime; 881 inode->i_ctime = iattr->ia_ctime;
865 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 882 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 93f03ec17eec..7cf5e4eafe28 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1599,10 +1599,43 @@ xlog_recover_add_to_trans(
1599} 1599}
1600 1600
1601/* 1601/*
1602 * Sort the log items in the transaction. Cancelled buffers need 1602 * Sort the log items in the transaction.
1603 * to be put first so they are processed before any items that might 1603 *
1604 * modify the buffers. If they are cancelled, then the modifications 1604 * The ordering constraints are defined by the inode allocation and unlink
1605 * don't need to be replayed. 1605 * behaviour. The rules are:
1606 *
1607 * 1. Every item is only logged once in a given transaction. Hence it
1608 * represents the last logged state of the item. Hence ordering is
1609 * dependent on the order in which operations need to be performed so
1610 * required initial conditions are always met.
1611 *
1612 * 2. Cancelled buffers are recorded in pass 1 in a separate table and
1613 * there's nothing to replay from them so we can simply cull them
1614 * from the transaction. However, we can't do that until after we've
1615 * replayed all the other items because they may be dependent on the
1616 * cancelled buffer and replaying the cancelled buffer can remove it
1617 * form the cancelled buffer table. Hence they have tobe done last.
1618 *
1619 * 3. Inode allocation buffers must be replayed before inode items that
1620 * read the buffer and replay changes into it.
1621 *
1622 * 4. Inode unlink buffers must be replayed after inode items are replayed.
1623 * This ensures that inodes are completely flushed to the inode buffer
1624 * in a "free" state before we remove the unlinked inode list pointer.
1625 *
1626 * Hence the ordering needs to be inode allocation buffers first, inode items
1627 * second, inode unlink buffers third and cancelled buffers last.
1628 *
1629 * But there's a problem with that - we can't tell an inode allocation buffer
1630 * apart from a regular buffer, so we can't separate them. We can, however,
1631 * tell an inode unlink buffer from the others, and so we can separate them out
1632 * from all the other buffers and move them to last.
1633 *
1634 * Hence, 4 lists, in order from head to tail:
1635 * - buffer_list for all buffers except cancelled/inode unlink buffers
1636 * - item_list for all non-buffer items
1637 * - inode_buffer_list for inode unlink buffers
1638 * - cancel_list for the cancelled buffers
1606 */ 1639 */
1607STATIC int 1640STATIC int
1608xlog_recover_reorder_trans( 1641xlog_recover_reorder_trans(
@@ -1612,6 +1645,10 @@ xlog_recover_reorder_trans(
1612{ 1645{
1613 xlog_recover_item_t *item, *n; 1646 xlog_recover_item_t *item, *n;
1614 LIST_HEAD(sort_list); 1647 LIST_HEAD(sort_list);
1648 LIST_HEAD(cancel_list);
1649 LIST_HEAD(buffer_list);
1650 LIST_HEAD(inode_buffer_list);
1651 LIST_HEAD(inode_list);
1615 1652
1616 list_splice_init(&trans->r_itemq, &sort_list); 1653 list_splice_init(&trans->r_itemq, &sort_list);
1617 list_for_each_entry_safe(item, n, &sort_list, ri_list) { 1654 list_for_each_entry_safe(item, n, &sort_list, ri_list) {
@@ -1619,12 +1656,18 @@ xlog_recover_reorder_trans(
1619 1656
1620 switch (ITEM_TYPE(item)) { 1657 switch (ITEM_TYPE(item)) {
1621 case XFS_LI_BUF: 1658 case XFS_LI_BUF:
1622 if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { 1659 if (buf_f->blf_flags & XFS_BLF_CANCEL) {
1623 trace_xfs_log_recover_item_reorder_head(log, 1660 trace_xfs_log_recover_item_reorder_head(log,
1624 trans, item, pass); 1661 trans, item, pass);
1625 list_move(&item->ri_list, &trans->r_itemq); 1662 list_move(&item->ri_list, &cancel_list);
1663 break;
1664 }
1665 if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
1666 list_move(&item->ri_list, &inode_buffer_list);
1626 break; 1667 break;
1627 } 1668 }
1669 list_move_tail(&item->ri_list, &buffer_list);
1670 break;
1628 case XFS_LI_INODE: 1671 case XFS_LI_INODE:
1629 case XFS_LI_DQUOT: 1672 case XFS_LI_DQUOT:
1630 case XFS_LI_QUOTAOFF: 1673 case XFS_LI_QUOTAOFF:
@@ -1632,7 +1675,7 @@ xlog_recover_reorder_trans(
1632 case XFS_LI_EFI: 1675 case XFS_LI_EFI:
1633 trace_xfs_log_recover_item_reorder_tail(log, 1676 trace_xfs_log_recover_item_reorder_tail(log,
1634 trans, item, pass); 1677 trans, item, pass);
1635 list_move_tail(&item->ri_list, &trans->r_itemq); 1678 list_move_tail(&item->ri_list, &inode_list);
1636 break; 1679 break;
1637 default: 1680 default:
1638 xfs_warn(log->l_mp, 1681 xfs_warn(log->l_mp,
@@ -1643,6 +1686,14 @@ xlog_recover_reorder_trans(
1643 } 1686 }
1644 } 1687 }
1645 ASSERT(list_empty(&sort_list)); 1688 ASSERT(list_empty(&sort_list));
1689 if (!list_empty(&buffer_list))
1690 list_splice(&buffer_list, &trans->r_itemq);
1691 if (!list_empty(&inode_list))
1692 list_splice_tail(&inode_list, &trans->r_itemq);
1693 if (!list_empty(&inode_buffer_list))
1694 list_splice_tail(&inode_buffer_list, &trans->r_itemq);
1695 if (!list_empty(&cancel_list))
1696 list_splice_tail(&cancel_list, &trans->r_itemq);
1646 return 0; 1697 return 0;
1647} 1698}
1648 1699
@@ -1794,7 +1845,13 @@ xlog_recover_do_inode_buffer(
1794 xfs_agino_t *buffer_nextp; 1845 xfs_agino_t *buffer_nextp;
1795 1846
1796 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f); 1847 trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
1797 bp->b_ops = &xfs_inode_buf_ops; 1848
1849 /*
1850 * Post recovery validation only works properly on CRC enabled
1851 * filesystems.
1852 */
1853 if (xfs_sb_version_hascrc(&mp->m_sb))
1854 bp->b_ops = &xfs_inode_buf_ops;
1798 1855
1799 inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; 1856 inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;
1800 for (i = 0; i < inodes_per_buf; i++) { 1857 for (i = 0; i < inodes_per_buf; i++) {
@@ -1861,6 +1918,15 @@ xlog_recover_do_inode_buffer(
1861 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, 1918 buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
1862 next_unlinked_offset); 1919 next_unlinked_offset);
1863 *buffer_nextp = *logged_nextp; 1920 *buffer_nextp = *logged_nextp;
1921
1922 /*
1923 * If necessary, recalculate the CRC in the on-disk inode. We
1924 * have to leave the inode in a consistent state for whoever
1925 * reads it next....
1926 */
1927 xfs_dinode_calc_crc(mp, (struct xfs_dinode *)
1928 xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
1929
1864 } 1930 }
1865 1931
1866 return 0; 1932 return 0;
@@ -2097,6 +2163,17 @@ xlog_recover_do_reg_buffer(
2097 ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); 2163 ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
2098 2164
2099 /* 2165 /*
2166 * The dirty regions logged in the buffer, even though
2167 * contiguous, may span multiple chunks. This is because the
2168 * dirty region may span a physical page boundary in a buffer
2169 * and hence be split into two separate vectors for writing into
2170 * the log. Hence we need to trim nbits back to the length of
2171 * the current region being copied out of the log.
2172 */
2173 if (item->ri_buf[i].i_len < (nbits << XFS_BLF_SHIFT))
2174 nbits = item->ri_buf[i].i_len >> XFS_BLF_SHIFT;
2175
2176 /*
2100 * Do a sanity check if this is a dquot buffer. Just checking 2177 * Do a sanity check if this is a dquot buffer. Just checking
2101 * the first dquot in the buffer should do. XXXThis is 2178 * the first dquot in the buffer should do. XXXThis is
2102 * probably a good thing to do for other buf types also. 2179 * probably a good thing to do for other buf types also.
@@ -2134,7 +2211,16 @@ xlog_recover_do_reg_buffer(
2134 /* Shouldn't be any more regions */ 2211 /* Shouldn't be any more regions */
2135 ASSERT(i == item->ri_total); 2212 ASSERT(i == item->ri_total);
2136 2213
2137 xlog_recovery_validate_buf_type(mp, bp, buf_f); 2214 /*
2215 * We can only do post recovery validation on items on CRC enabled
2216 * fielsystems as we need to know when the buffer was written to be able
2217 * to determine if we should have replayed the item. If we replay old
2218 * metadata over a newer buffer, then it will enter a temporarily
2219 * inconsistent state resulting in verification failures. Hence for now
2220 * just avoid the verification stage for non-crc filesystems
2221 */
2222 if (xfs_sb_version_hascrc(&mp->m_sb))
2223 xlog_recovery_validate_buf_type(mp, bp, buf_f);
2138} 2224}
2139 2225
2140/* 2226/*
@@ -2255,6 +2341,12 @@ xfs_qm_dqcheck(
2255 d->dd_diskdq.d_flags = type; 2341 d->dd_diskdq.d_flags = type;
2256 d->dd_diskdq.d_id = cpu_to_be32(id); 2342 d->dd_diskdq.d_id = cpu_to_be32(id);
2257 2343
2344 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2345 uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
2346 xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
2347 XFS_DQUOT_CRC_OFF);
2348 }
2349
2258 return errs; 2350 return errs;
2259} 2351}
2260 2352
@@ -2782,6 +2874,10 @@ xlog_recover_dquot_pass2(
2782 } 2874 }
2783 2875
2784 memcpy(ddq, recddq, item->ri_buf[1].i_len); 2876 memcpy(ddq, recddq, item->ri_buf[1].i_len);
2877 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2878 xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
2879 XFS_DQUOT_CRC_OFF);
2880 }
2785 2881
2786 ASSERT(dq_f->qlf_size == 2); 2882 ASSERT(dq_f->qlf_size == 2);
2787 ASSERT(bp->b_target->bt_mount == mp); 2883 ASSERT(bp->b_target->bt_mount == mp);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index f6bfbd734669..e8e310c05097 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -314,7 +314,8 @@ STATIC int
314xfs_mount_validate_sb( 314xfs_mount_validate_sb(
315 xfs_mount_t *mp, 315 xfs_mount_t *mp,
316 xfs_sb_t *sbp, 316 xfs_sb_t *sbp,
317 bool check_inprogress) 317 bool check_inprogress,
318 bool check_version)
318{ 319{
319 320
320 /* 321 /*
@@ -337,9 +338,10 @@ xfs_mount_validate_sb(
337 338
338 /* 339 /*
339 * Version 5 superblock feature mask validation. Reject combinations the 340 * Version 5 superblock feature mask validation. Reject combinations the
340 * kernel cannot support up front before checking anything else. 341 * kernel cannot support up front before checking anything else. For
342 * write validation, we don't need to check feature masks.
341 */ 343 */
342 if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) { 344 if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) {
343 xfs_alert(mp, 345 xfs_alert(mp,
344"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n" 346"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n"
345"Use of these features in this kernel is at your own risk!"); 347"Use of these features in this kernel is at your own risk!");
@@ -675,7 +677,8 @@ xfs_sb_to_disk(
675 677
676static int 678static int
677xfs_sb_verify( 679xfs_sb_verify(
678 struct xfs_buf *bp) 680 struct xfs_buf *bp,
681 bool check_version)
679{ 682{
680 struct xfs_mount *mp = bp->b_target->bt_mount; 683 struct xfs_mount *mp = bp->b_target->bt_mount;
681 struct xfs_sb sb; 684 struct xfs_sb sb;
@@ -686,7 +689,8 @@ xfs_sb_verify(
686 * Only check the in progress field for the primary superblock as 689 * Only check the in progress field for the primary superblock as
687 * mkfs.xfs doesn't clear it from secondary superblocks. 690 * mkfs.xfs doesn't clear it from secondary superblocks.
688 */ 691 */
689 return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR); 692 return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR,
693 check_version);
690} 694}
691 695
692/* 696/*
@@ -719,7 +723,7 @@ xfs_sb_read_verify(
719 goto out_error; 723 goto out_error;
720 } 724 }
721 } 725 }
722 error = xfs_sb_verify(bp); 726 error = xfs_sb_verify(bp, true);
723 727
724out_error: 728out_error:
725 if (error) { 729 if (error) {
@@ -758,7 +762,7 @@ xfs_sb_write_verify(
758 struct xfs_buf_log_item *bip = bp->b_fspriv; 762 struct xfs_buf_log_item *bip = bp->b_fspriv;
759 int error; 763 int error;
760 764
761 error = xfs_sb_verify(bp); 765 error = xfs_sb_verify(bp, false);
762 if (error) { 766 if (error) {
763 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); 767 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
764 xfs_buf_ioerror(bp, error); 768 xfs_buf_ioerror(bp, error);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index f41702b43003..b75c9bb6e71e 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -41,6 +41,7 @@
41#include "xfs_qm.h" 41#include "xfs_qm.h"
42#include "xfs_trace.h" 42#include "xfs_trace.h"
43#include "xfs_icache.h" 43#include "xfs_icache.h"
44#include "xfs_cksum.h"
44 45
45/* 46/*
46 * The global quota manager. There is only one of these for the entire 47 * The global quota manager. There is only one of these for the entire
@@ -839,7 +840,7 @@ xfs_qm_reset_dqcounts(
839 xfs_dqid_t id, 840 xfs_dqid_t id,
840 uint type) 841 uint type)
841{ 842{
842 xfs_disk_dquot_t *ddq; 843 struct xfs_dqblk *dqb;
843 int j; 844 int j;
844 845
845 trace_xfs_reset_dqcounts(bp, _RET_IP_); 846 trace_xfs_reset_dqcounts(bp, _RET_IP_);
@@ -853,8 +854,12 @@ xfs_qm_reset_dqcounts(
853 do_div(j, sizeof(xfs_dqblk_t)); 854 do_div(j, sizeof(xfs_dqblk_t));
854 ASSERT(mp->m_quotainfo->qi_dqperchunk == j); 855 ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
855#endif 856#endif
856 ddq = bp->b_addr; 857 dqb = bp->b_addr;
857 for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) { 858 for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
859 struct xfs_disk_dquot *ddq;
860
861 ddq = (struct xfs_disk_dquot *)&dqb[j];
862
858 /* 863 /*
859 * Do a sanity check, and if needed, repair the dqblk. Don't 864 * Do a sanity check, and if needed, repair the dqblk. Don't
860 * output any warnings because it's perfectly possible to 865 * output any warnings because it's perfectly possible to
@@ -871,7 +876,12 @@ xfs_qm_reset_dqcounts(
871 ddq->d_bwarns = 0; 876 ddq->d_bwarns = 0;
872 ddq->d_iwarns = 0; 877 ddq->d_iwarns = 0;
873 ddq->d_rtbwarns = 0; 878 ddq->d_rtbwarns = 0;
874 ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); 879
880 if (xfs_sb_version_hascrc(&mp->m_sb)) {
881 xfs_update_cksum((char *)&dqb[j],
882 sizeof(struct xfs_dqblk),
883 XFS_DQUOT_CRC_OFF);
884 }
875 } 885 }
876} 886}
877 887
@@ -907,19 +917,29 @@ xfs_qm_dqiter_bufs(
907 XFS_FSB_TO_DADDR(mp, bno), 917 XFS_FSB_TO_DADDR(mp, bno),
908 mp->m_quotainfo->qi_dqchunklen, 0, &bp, 918 mp->m_quotainfo->qi_dqchunklen, 0, &bp,
909 &xfs_dquot_buf_ops); 919 &xfs_dquot_buf_ops);
910 if (error)
911 break;
912 920
913 /* 921 /*
914 * XXX(hch): need to figure out if it makes sense to validate 922 * CRC and validation errors will return a EFSCORRUPTED here. If
915 * the CRC here. 923 * this occurs, re-read without CRC validation so that we can
924 * repair the damage via xfs_qm_reset_dqcounts(). This process
925 * will leave a trace in the log indicating corruption has
926 * been detected.
916 */ 927 */
928 if (error == EFSCORRUPTED) {
929 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
930 XFS_FSB_TO_DADDR(mp, bno),
931 mp->m_quotainfo->qi_dqchunklen, 0, &bp,
932 NULL);
933 }
934
935 if (error)
936 break;
937
917 xfs_qm_reset_dqcounts(mp, bp, firstid, type); 938 xfs_qm_reset_dqcounts(mp, bp, firstid, type);
918 xfs_buf_delwri_queue(bp, buffer_list); 939 xfs_buf_delwri_queue(bp, buffer_list);
919 xfs_buf_relse(bp); 940 xfs_buf_relse(bp);
920 /* 941
921 * goto the next block. 942 /* goto the next block. */
922 */
923 bno++; 943 bno++;
924 firstid += mp->m_quotainfo->qi_dqperchunk; 944 firstid += mp->m_quotainfo->qi_dqperchunk;
925 } 945 }
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index c41190cad6e9..6cdf6ffc36a1 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -489,31 +489,36 @@ xfs_qm_scall_setqlim(
489 if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0) 489 if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
490 return 0; 490 return 0;
491 491
492 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
493 error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
494 0, 0, XFS_DEFAULT_LOG_COUNT);
495 if (error) {
496 xfs_trans_cancel(tp, 0);
497 return (error);
498 }
499
500 /* 492 /*
501 * We don't want to race with a quotaoff so take the quotaoff lock. 493 * We don't want to race with a quotaoff so take the quotaoff lock.
502 * (We don't hold an inode lock, so there's nothing else to stop 494 * We don't hold an inode lock, so there's nothing else to stop
503 * a quotaoff from happening). (XXXThis doesn't currently happen 495 * a quotaoff from happening.
504 * because we take the vfslock before calling xfs_qm_sysent).
505 */ 496 */
506 mutex_lock(&q->qi_quotaofflock); 497 mutex_lock(&q->qi_quotaofflock);
507 498
508 /* 499 /*
509 * Get the dquot (locked), and join it to the transaction. 500 * Get the dquot (locked) before we start, as we need to do a
510 * Allocate the dquot if this doesn't exist. 501 * transaction to allocate it if it doesn't exist. Once we have the
502 * dquot, unlock it so we can start the next transaction safely. We hold
503 * a reference to the dquot, so it's safe to do this unlock/lock without
504 * it being reclaimed in the mean time.
511 */ 505 */
512 if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) { 506 error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp);
513 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 507 if (error) {
514 ASSERT(error != ENOENT); 508 ASSERT(error != ENOENT);
515 goto out_unlock; 509 goto out_unlock;
516 } 510 }
511 xfs_dqunlock(dqp);
512
513 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
514 error = xfs_trans_reserve(tp, 0, XFS_QM_SETQLIM_LOG_RES(mp),
515 0, 0, XFS_DEFAULT_LOG_COUNT);
516 if (error) {
517 xfs_trans_cancel(tp, 0);
518 goto out_rele;
519 }
520
521 xfs_dqlock(dqp);
517 xfs_trans_dqjoin(tp, dqp); 522 xfs_trans_dqjoin(tp, dqp);
518 ddq = &dqp->q_core; 523 ddq = &dqp->q_core;
519 524
@@ -621,9 +626,10 @@ xfs_qm_scall_setqlim(
621 xfs_trans_log_dquot(tp, dqp); 626 xfs_trans_log_dquot(tp, dqp);
622 627
623 error = xfs_trans_commit(tp, 0); 628 error = xfs_trans_commit(tp, 0);
624 xfs_qm_dqrele(dqp);
625 629
626 out_unlock: 630out_rele:
631 xfs_qm_dqrele(dqp);
632out_unlock:
627 mutex_unlock(&q->qi_quotaofflock); 633 mutex_unlock(&q->qi_quotaofflock);
628 return error; 634 return error;
629} 635}
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index c61e31c7d997..c38068f26c55 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -87,6 +87,8 @@ typedef struct xfs_dqblk {
87 uuid_t dd_uuid; /* location information */ 87 uuid_t dd_uuid; /* location information */
88} xfs_dqblk_t; 88} xfs_dqblk_t;
89 89
90#define XFS_DQUOT_CRC_OFF offsetof(struct xfs_dqblk, dd_crc)
91
90/* 92/*
91 * flags for q_flags field in the dquot. 93 * flags for q_flags field in the dquot.
92 */ 94 */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ea341cea68cb..3033ba5e9762 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1373,6 +1373,17 @@ xfs_finish_flags(
1373 } 1373 }
1374 1374
1375 /* 1375 /*
1376 * V5 filesystems always use attr2 format for attributes.
1377 */
1378 if (xfs_sb_version_hascrc(&mp->m_sb) &&
1379 (mp->m_flags & XFS_MOUNT_NOATTR2)) {
1380 xfs_warn(mp,
1381"Cannot mount a V5 filesystem as %s. %s is always enabled for V5 filesystems.",
1382 MNTOPT_NOATTR2, MNTOPT_ATTR2);
1383 return XFS_ERROR(EINVAL);
1384 }
1385
1386 /*
1376 * mkfs'ed attr2 will turn on attr2 mount unless explicitly 1387 * mkfs'ed attr2 will turn on attr2 mount unless explicitly
1377 * told by noattr2 to turn it off 1388 * told by noattr2 to turn it off
1378 */ 1389 */
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 5f234389327c..195a403e1522 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -56,16 +56,9 @@ xfs_symlink_blocks(
56 struct xfs_mount *mp, 56 struct xfs_mount *mp,
57 int pathlen) 57 int pathlen)
58{ 58{
59 int fsblocks = 0; 59 int buflen = XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
60 int len = pathlen;
61 60
62 do { 61 return (pathlen + buflen - 1) / buflen;
63 fsblocks++;
64 len -= XFS_SYMLINK_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
65 } while (len > 0);
66
67 ASSERT(fsblocks <= XFS_SYMLINK_MAPS);
68 return fsblocks;
69} 62}
70 63
71static int 64static int
@@ -405,7 +398,7 @@ xfs_symlink(
405 if (pathlen <= XFS_LITINO(mp, dp->i_d.di_version)) 398 if (pathlen <= XFS_LITINO(mp, dp->i_d.di_version))
406 fs_blocks = 0; 399 fs_blocks = 0;
407 else 400 else
408 fs_blocks = XFS_B_TO_FSB(mp, pathlen); 401 fs_blocks = xfs_symlink_blocks(mp, pathlen);
409 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); 402 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
410 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0, 403 error = xfs_trans_reserve(tp, resblks, XFS_SYMLINK_LOG_RES(mp), 0,
411 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT); 404 XFS_TRANS_PERM_LOG_RES, XFS_SYMLINK_LOG_COUNT);
@@ -512,7 +505,7 @@ xfs_symlink(
512 cur_chunk = target_path; 505 cur_chunk = target_path;
513 offset = 0; 506 offset = 0;
514 for (n = 0; n < nmaps; n++) { 507 for (n = 0; n < nmaps; n++) {
515 char *buf; 508 char *buf;
516 509
517 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 510 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
518 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 511 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
@@ -525,9 +518,7 @@ xfs_symlink(
525 bp->b_ops = &xfs_symlink_buf_ops; 518 bp->b_ops = &xfs_symlink_buf_ops;
526 519
527 byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); 520 byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
528 if (pathlen < byte_cnt) { 521 byte_cnt = min(byte_cnt, pathlen);
529 byte_cnt = pathlen;
530 }
531 522
532 buf = bp->b_addr; 523 buf = bp->b_addr;
533 buf += xfs_symlink_hdr_set(mp, ip->i_ino, offset, 524 buf += xfs_symlink_hdr_set(mp, ip->i_ino, offset,
@@ -542,6 +533,7 @@ xfs_symlink(
542 xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) - 533 xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) -
543 (char *)bp->b_addr); 534 (char *)bp->b_addr);
544 } 535 }
536 ASSERT(pathlen == 0);
545 } 537 }
546 538
547 /* 539 /*