aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_inode.c41
-rw-r--r--fs/binfmt_elf_fdpic.c17
-rw-r--r--fs/bio.c2
-rw-r--r--fs/btrfs/acl.c12
-rw-r--r--fs/btrfs/extent-tree.c32
-rw-r--r--fs/btrfs/file.c100
-rw-r--r--fs/btrfs/inode.c12
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/relocation.c4
-rw-r--r--fs/btrfs/volumes.c4
-rw-r--r--fs/cifs/CHANGES4
-rw-r--r--fs/cifs/cifs_dfs_ref.c3
-rw-r--r--fs/cifs/cifsfs.c3
-rw-r--r--fs/cifs/connect.c13
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/configfs/symlink.c4
-rw-r--r--fs/ecryptfs/crypto.c4
-rw-r--r--fs/ecryptfs/file.c17
-rw-r--r--fs/ecryptfs/inode.c158
-rw-r--r--fs/ecryptfs/main.c4
-rw-r--r--fs/exofs/inode.c17
-rw-r--r--fs/exofs/pnfs.h10
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/ext4/block_validity.c1
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/ext4_extents.h3
-rw-r--r--fs/ext4/extents.c77
-rw-r--r--fs/ext4/fsync.c16
-rw-r--r--fs/ext4/inode.c225
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/super.c7
-rw-r--r--fs/ext4/xattr.c2
-rw-r--r--fs/fcntl.c102
-rw-r--r--fs/fs-writeback.c18
-rw-r--r--fs/gfs2/file.c38
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_inode.c6
-rw-r--r--fs/gfs2/xattr.c21
-rw-r--r--fs/hppfs/hppfs.c18
-rw-r--r--fs/jbd2/checkpoint.c15
-rw-r--r--fs/jbd2/commit.c19
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/namei.c24
-rw-r--r--fs/namespace.c14
-rw-r--r--fs/nfs/dir.c1
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/nilfs2/bmap.c4
-rw-r--r--fs/nilfs2/cpfile.c31
-rw-r--r--fs/nilfs2/direct.c17
-rw-r--r--fs/nilfs2/ioctl.c2
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c4
-rw-r--r--fs/ocfs2/file.c21
-rw-r--r--fs/proc/array.c89
-rw-r--r--fs/proc/base.c1
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/quota/dquot.c3
-rw-r--r--fs/ramfs/file-nommu.c26
-rw-r--r--fs/reiserfs/bitmap.c3
-rw-r--r--fs/reiserfs/inode.c24
-rw-r--r--fs/reiserfs/ioctl.c3
-rw-r--r--fs/reiserfs/journal.c18
-rw-r--r--fs/reiserfs/lock.c9
-rw-r--r--fs/reiserfs/namei.c7
-rw-r--r--fs/reiserfs/xattr.c38
-rw-r--r--fs/reiserfs/xattr_acl.c2
-rw-r--r--fs/sysfs/dir.c14
-rw-r--r--fs/sysfs/sysfs.h15
-rw-r--r--fs/ubifs/gc.c96
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c183
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h1145
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c2
-rw-r--r--fs/xfs/xfs_alloc.c44
-rw-r--r--fs/xfs/xfs_dfrag.c106
-rw-r--r--fs/xfs/xfs_iget.c1
-rw-r--r--fs/xfs/xfs_inode.c17
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c93
81 files changed, 1737 insertions, 1393 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 18f74ec4dce9..9d03d1ebca6f 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1001,44 +1001,6 @@ done:
1001} 1001}
1002 1002
1003/** 1003/**
1004 * v9fs_vfs_readlink - read a symlink's location
1005 * @dentry: dentry for symlink
1006 * @buffer: buffer to load symlink location into
1007 * @buflen: length of buffer
1008 *
1009 */
1010
1011static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
1012 int buflen)
1013{
1014 int retval;
1015 int ret;
1016 char *link = __getname();
1017
1018 if (unlikely(!link))
1019 return -ENOMEM;
1020
1021 if (buflen > PATH_MAX)
1022 buflen = PATH_MAX;
1023
1024 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
1025 dentry);
1026
1027 retval = v9fs_readlink(dentry, link, buflen);
1028
1029 if (retval > 0) {
1030 if ((ret = copy_to_user(buffer, link, retval)) != 0) {
1031 P9_DPRINTK(P9_DEBUG_ERROR,
1032 "problem copying to user: %d\n", ret);
1033 retval = ret;
1034 }
1035 }
1036
1037 __putname(link);
1038 return retval;
1039}
1040
1041/**
1042 * v9fs_vfs_follow_link - follow a symlink path 1004 * v9fs_vfs_follow_link - follow a symlink path
1043 * @dentry: dentry for symlink 1005 * @dentry: dentry for symlink
1044 * @nd: nameidata 1006 * @nd: nameidata
@@ -1230,7 +1192,6 @@ static const struct inode_operations v9fs_dir_inode_operations_ext = {
1230 .rmdir = v9fs_vfs_rmdir, 1192 .rmdir = v9fs_vfs_rmdir,
1231 .mknod = v9fs_vfs_mknod, 1193 .mknod = v9fs_vfs_mknod,
1232 .rename = v9fs_vfs_rename, 1194 .rename = v9fs_vfs_rename,
1233 .readlink = v9fs_vfs_readlink,
1234 .getattr = v9fs_vfs_getattr, 1195 .getattr = v9fs_vfs_getattr,
1235 .setattr = v9fs_vfs_setattr, 1196 .setattr = v9fs_vfs_setattr,
1236}; 1197};
@@ -1253,7 +1214,7 @@ static const struct inode_operations v9fs_file_inode_operations = {
1253}; 1214};
1254 1215
1255static const struct inode_operations v9fs_symlink_inode_operations = { 1216static const struct inode_operations v9fs_symlink_inode_operations = {
1256 .readlink = v9fs_vfs_readlink, 1217 .readlink = generic_readlink,
1257 .follow_link = v9fs_vfs_follow_link, 1218 .follow_link = v9fs_vfs_follow_link,
1258 .put_link = v9fs_vfs_put_link, 1219 .put_link = v9fs_vfs_put_link,
1259 .getattr = v9fs_vfs_getattr, 1220 .getattr = v9fs_vfs_getattr,
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c25256a5c5b0..c57d9ce5ff7e 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -171,6 +171,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
171#ifdef ELF_FDPIC_PLAT_INIT 171#ifdef ELF_FDPIC_PLAT_INIT
172 unsigned long dynaddr; 172 unsigned long dynaddr;
173#endif 173#endif
174#ifndef CONFIG_MMU
175 unsigned long stack_prot;
176#endif
174 struct file *interpreter = NULL; /* to shut gcc up */ 177 struct file *interpreter = NULL; /* to shut gcc up */
175 char *interpreter_name = NULL; 178 char *interpreter_name = NULL;
176 int executable_stack; 179 int executable_stack;
@@ -316,6 +319,8 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
316 * defunct, deceased, etc. after this point we have to exit via 319 * defunct, deceased, etc. after this point we have to exit via
317 * error_kill */ 320 * error_kill */
318 set_personality(PER_LINUX_FDPIC); 321 set_personality(PER_LINUX_FDPIC);
322 if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
323 current->personality |= READ_IMPLIES_EXEC;
319 set_binfmt(&elf_fdpic_format); 324 set_binfmt(&elf_fdpic_format);
320 325
321 current->mm->start_code = 0; 326 current->mm->start_code = 0;
@@ -377,9 +382,13 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
377 if (stack_size < PAGE_SIZE * 2) 382 if (stack_size < PAGE_SIZE * 2)
378 stack_size = PAGE_SIZE * 2; 383 stack_size = PAGE_SIZE * 2;
379 384
385 stack_prot = PROT_READ | PROT_WRITE;
386 if (executable_stack == EXSTACK_ENABLE_X ||
387 (executable_stack == EXSTACK_DEFAULT && VM_STACK_FLAGS & VM_EXEC))
388 stack_prot |= PROT_EXEC;
389
380 down_write(&current->mm->mmap_sem); 390 down_write(&current->mm->mmap_sem);
381 current->mm->start_brk = do_mmap(NULL, 0, stack_size, 391 current->mm->start_brk = do_mmap(NULL, 0, stack_size, stack_prot,
382 PROT_READ | PROT_WRITE | PROT_EXEC,
383 MAP_PRIVATE | MAP_ANONYMOUS | 392 MAP_PRIVATE | MAP_ANONYMOUS |
384 MAP_UNINITIALIZED | MAP_GROWSDOWN, 393 MAP_UNINITIALIZED | MAP_GROWSDOWN,
385 0); 394 0);
@@ -1798,11 +1807,11 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
1798 ELF_CORE_WRITE_EXTRA_DATA; 1807 ELF_CORE_WRITE_EXTRA_DATA;
1799#endif 1808#endif
1800 1809
1801 if (file->f_pos != offset) { 1810 if (cprm->file->f_pos != offset) {
1802 /* Sanity check */ 1811 /* Sanity check */
1803 printk(KERN_WARNING 1812 printk(KERN_WARNING
1804 "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n", 1813 "elf_core_dump: file->f_pos (%lld) != offset (%lld)\n",
1805 file->f_pos, offset); 1814 cprm->file->f_pos, offset);
1806 } 1815 }
1807 1816
1808end_coredump: 1817end_coredump:
diff --git a/fs/bio.c b/fs/bio.c
index 76e6713abf94..12429c9553eb 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -78,7 +78,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
78 78
79 i = 0; 79 i = 0;
80 while (i < bio_slab_nr) { 80 while (i < bio_slab_nr) {
81 struct bio_slab *bslab = &bio_slabs[i]; 81 bslab = &bio_slabs[i];
82 82
83 if (!bslab->slab && entry == -1) 83 if (!bslab->slab && entry == -1)
84 entry = i; 84 entry = i;
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 2e9e69987a82..54f4798ab46a 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -112,12 +112,14 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans,
112 switch (type) { 112 switch (type) {
113 case ACL_TYPE_ACCESS: 113 case ACL_TYPE_ACCESS:
114 mode = inode->i_mode; 114 mode = inode->i_mode;
115 ret = posix_acl_equiv_mode(acl, &mode);
116 if (ret < 0)
117 return ret;
118 ret = 0;
119 inode->i_mode = mode;
120 name = POSIX_ACL_XATTR_ACCESS; 115 name = POSIX_ACL_XATTR_ACCESS;
116 if (acl) {
117 ret = posix_acl_equiv_mode(acl, &mode);
118 if (ret < 0)
119 return ret;
120 inode->i_mode = mode;
121 }
122 ret = 0;
121 break; 123 break;
122 case ACL_TYPE_DEFAULT: 124 case ACL_TYPE_DEFAULT:
123 if (!S_ISDIR(inode->i_mode)) 125 if (!S_ISDIR(inode->i_mode))
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 56e50137d0e6..432a2da4641e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -83,6 +83,17 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
83 return (cache->flags & bits) == bits; 83 return (cache->flags & bits) == bits;
84} 84}
85 85
86void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
87{
88 atomic_inc(&cache->count);
89}
90
91void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
92{
93 if (atomic_dec_and_test(&cache->count))
94 kfree(cache);
95}
96
86/* 97/*
87 * this adds the block group to the fs_info rb tree for the block group 98 * this adds the block group to the fs_info rb tree for the block group
88 * cache 99 * cache
@@ -156,7 +167,7 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
156 } 167 }
157 } 168 }
158 if (ret) 169 if (ret)
159 atomic_inc(&ret->count); 170 btrfs_get_block_group(ret);
160 spin_unlock(&info->block_group_cache_lock); 171 spin_unlock(&info->block_group_cache_lock);
161 172
162 return ret; 173 return ret;
@@ -407,6 +418,8 @@ err:
407 418
408 put_caching_control(caching_ctl); 419 put_caching_control(caching_ctl);
409 atomic_dec(&block_group->space_info->caching_threads); 420 atomic_dec(&block_group->space_info->caching_threads);
421 btrfs_put_block_group(block_group);
422
410 return 0; 423 return 0;
411} 424}
412 425
@@ -447,6 +460,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
447 up_write(&fs_info->extent_commit_sem); 460 up_write(&fs_info->extent_commit_sem);
448 461
449 atomic_inc(&cache->space_info->caching_threads); 462 atomic_inc(&cache->space_info->caching_threads);
463 btrfs_get_block_group(cache);
450 464
451 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 465 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
452 cache->key.objectid); 466 cache->key.objectid);
@@ -486,12 +500,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
486 return cache; 500 return cache;
487} 501}
488 502
489void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
490{
491 if (atomic_dec_and_test(&cache->count))
492 kfree(cache);
493}
494
495static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 503static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
496 u64 flags) 504 u64 flags)
497{ 505{
@@ -2582,7 +2590,7 @@ next_block_group(struct btrfs_root *root,
2582 if (node) { 2590 if (node) {
2583 cache = rb_entry(node, struct btrfs_block_group_cache, 2591 cache = rb_entry(node, struct btrfs_block_group_cache,
2584 cache_node); 2592 cache_node);
2585 atomic_inc(&cache->count); 2593 btrfs_get_block_group(cache);
2586 } else 2594 } else
2587 cache = NULL; 2595 cache = NULL;
2588 spin_unlock(&root->fs_info->block_group_cache_lock); 2596 spin_unlock(&root->fs_info->block_group_cache_lock);
@@ -4227,7 +4235,7 @@ search:
4227 u64 offset; 4235 u64 offset;
4228 int cached; 4236 int cached;
4229 4237
4230 atomic_inc(&block_group->count); 4238 btrfs_get_block_group(block_group);
4231 search_start = block_group->key.objectid; 4239 search_start = block_group->key.objectid;
4232 4240
4233have_block_group: 4241have_block_group:
@@ -4315,7 +4323,7 @@ have_block_group:
4315 4323
4316 btrfs_put_block_group(block_group); 4324 btrfs_put_block_group(block_group);
4317 block_group = last_ptr->block_group; 4325 block_group = last_ptr->block_group;
4318 atomic_inc(&block_group->count); 4326 btrfs_get_block_group(block_group);
4319 spin_unlock(&last_ptr->lock); 4327 spin_unlock(&last_ptr->lock);
4320 spin_unlock(&last_ptr->refill_lock); 4328 spin_unlock(&last_ptr->refill_lock);
4321 4329
@@ -7395,9 +7403,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7395 wait_block_group_cache_done(block_group); 7403 wait_block_group_cache_done(block_group);
7396 7404
7397 btrfs_remove_free_space_cache(block_group); 7405 btrfs_remove_free_space_cache(block_group);
7398 7406 btrfs_put_block_group(block_group);
7399 WARN_ON(atomic_read(&block_group->count) != 1);
7400 kfree(block_group);
7401 7407
7402 spin_lock(&info->block_group_cache_lock); 7408 spin_lock(&info->block_group_cache_lock);
7403 } 7409 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index feaa13b105d9..c02033596f02 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -506,7 +506,8 @@ next_slot:
506} 506}
507 507
508static int extent_mergeable(struct extent_buffer *leaf, int slot, 508static int extent_mergeable(struct extent_buffer *leaf, int slot,
509 u64 objectid, u64 bytenr, u64 *start, u64 *end) 509 u64 objectid, u64 bytenr, u64 orig_offset,
510 u64 *start, u64 *end)
510{ 511{
511 struct btrfs_file_extent_item *fi; 512 struct btrfs_file_extent_item *fi;
512 struct btrfs_key key; 513 struct btrfs_key key;
@@ -522,6 +523,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
522 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 523 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
523 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG || 524 if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
524 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr || 525 btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
526 btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
525 btrfs_file_extent_compression(leaf, fi) || 527 btrfs_file_extent_compression(leaf, fi) ||
526 btrfs_file_extent_encryption(leaf, fi) || 528 btrfs_file_extent_encryption(leaf, fi) ||
527 btrfs_file_extent_other_encoding(leaf, fi)) 529 btrfs_file_extent_other_encoding(leaf, fi))
@@ -561,6 +563,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
561 u64 split; 563 u64 split;
562 int del_nr = 0; 564 int del_nr = 0;
563 int del_slot = 0; 565 int del_slot = 0;
566 int recow;
564 int ret; 567 int ret;
565 568
566 btrfs_drop_extent_cache(inode, start, end - 1, 0); 569 btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -568,6 +571,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
568 path = btrfs_alloc_path(); 571 path = btrfs_alloc_path();
569 BUG_ON(!path); 572 BUG_ON(!path);
570again: 573again:
574 recow = 0;
571 split = start; 575 split = start;
572 key.objectid = inode->i_ino; 576 key.objectid = inode->i_ino;
573 key.type = BTRFS_EXTENT_DATA_KEY; 577 key.type = BTRFS_EXTENT_DATA_KEY;
@@ -591,12 +595,60 @@ again:
591 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 595 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
592 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 596 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
593 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi); 597 orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
598 memcpy(&new_key, &key, sizeof(new_key));
599
600 if (start == key.offset && end < extent_end) {
601 other_start = 0;
602 other_end = start;
603 if (extent_mergeable(leaf, path->slots[0] - 1,
604 inode->i_ino, bytenr, orig_offset,
605 &other_start, &other_end)) {
606 new_key.offset = end;
607 btrfs_set_item_key_safe(trans, root, path, &new_key);
608 fi = btrfs_item_ptr(leaf, path->slots[0],
609 struct btrfs_file_extent_item);
610 btrfs_set_file_extent_num_bytes(leaf, fi,
611 extent_end - end);
612 btrfs_set_file_extent_offset(leaf, fi,
613 end - orig_offset);
614 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
615 struct btrfs_file_extent_item);
616 btrfs_set_file_extent_num_bytes(leaf, fi,
617 end - other_start);
618 btrfs_mark_buffer_dirty(leaf);
619 goto out;
620 }
621 }
622
623 if (start > key.offset && end == extent_end) {
624 other_start = end;
625 other_end = 0;
626 if (extent_mergeable(leaf, path->slots[0] + 1,
627 inode->i_ino, bytenr, orig_offset,
628 &other_start, &other_end)) {
629 fi = btrfs_item_ptr(leaf, path->slots[0],
630 struct btrfs_file_extent_item);
631 btrfs_set_file_extent_num_bytes(leaf, fi,
632 start - key.offset);
633 path->slots[0]++;
634 new_key.offset = start;
635 btrfs_set_item_key_safe(trans, root, path, &new_key);
636
637 fi = btrfs_item_ptr(leaf, path->slots[0],
638 struct btrfs_file_extent_item);
639 btrfs_set_file_extent_num_bytes(leaf, fi,
640 other_end - start);
641 btrfs_set_file_extent_offset(leaf, fi,
642 start - orig_offset);
643 btrfs_mark_buffer_dirty(leaf);
644 goto out;
645 }
646 }
594 647
595 while (start > key.offset || end < extent_end) { 648 while (start > key.offset || end < extent_end) {
596 if (key.offset == start) 649 if (key.offset == start)
597 split = end; 650 split = end;
598 651
599 memcpy(&new_key, &key, sizeof(new_key));
600 new_key.offset = split; 652 new_key.offset = split;
601 ret = btrfs_duplicate_item(trans, root, path, &new_key); 653 ret = btrfs_duplicate_item(trans, root, path, &new_key);
602 if (ret == -EAGAIN) { 654 if (ret == -EAGAIN) {
@@ -631,15 +683,18 @@ again:
631 path->slots[0]--; 683 path->slots[0]--;
632 extent_end = end; 684 extent_end = end;
633 } 685 }
686 recow = 1;
634 } 687 }
635 688
636 fi = btrfs_item_ptr(leaf, path->slots[0],
637 struct btrfs_file_extent_item);
638
639 other_start = end; 689 other_start = end;
640 other_end = 0; 690 other_end = 0;
641 if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino, 691 if (extent_mergeable(leaf, path->slots[0] + 1,
642 bytenr, &other_start, &other_end)) { 692 inode->i_ino, bytenr, orig_offset,
693 &other_start, &other_end)) {
694 if (recow) {
695 btrfs_release_path(root, path);
696 goto again;
697 }
643 extent_end = other_end; 698 extent_end = other_end;
644 del_slot = path->slots[0] + 1; 699 del_slot = path->slots[0] + 1;
645 del_nr++; 700 del_nr++;
@@ -650,8 +705,13 @@ again:
650 } 705 }
651 other_start = 0; 706 other_start = 0;
652 other_end = start; 707 other_end = start;
653 if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino, 708 if (extent_mergeable(leaf, path->slots[0] - 1,
654 bytenr, &other_start, &other_end)) { 709 inode->i_ino, bytenr, orig_offset,
710 &other_start, &other_end)) {
711 if (recow) {
712 btrfs_release_path(root, path);
713 goto again;
714 }
655 key.offset = other_start; 715 key.offset = other_start;
656 del_slot = path->slots[0]; 716 del_slot = path->slots[0];
657 del_nr++; 717 del_nr++;
@@ -660,22 +720,22 @@ again:
660 inode->i_ino, orig_offset); 720 inode->i_ino, orig_offset);
661 BUG_ON(ret); 721 BUG_ON(ret);
662 } 722 }
723 fi = btrfs_item_ptr(leaf, path->slots[0],
724 struct btrfs_file_extent_item);
663 if (del_nr == 0) { 725 if (del_nr == 0) {
664 btrfs_set_file_extent_type(leaf, fi, 726 btrfs_set_file_extent_type(leaf, fi,
665 BTRFS_FILE_EXTENT_REG); 727 BTRFS_FILE_EXTENT_REG);
666 btrfs_mark_buffer_dirty(leaf); 728 btrfs_mark_buffer_dirty(leaf);
667 goto out; 729 } else {
668 } 730 btrfs_set_file_extent_type(leaf, fi,
669 731 BTRFS_FILE_EXTENT_REG);
670 fi = btrfs_item_ptr(leaf, del_slot - 1, 732 btrfs_set_file_extent_num_bytes(leaf, fi,
671 struct btrfs_file_extent_item); 733 extent_end - key.offset);
672 btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG); 734 btrfs_mark_buffer_dirty(leaf);
673 btrfs_set_file_extent_num_bytes(leaf, fi,
674 extent_end - key.offset);
675 btrfs_mark_buffer_dirty(leaf);
676 735
677 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 736 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
678 BUG_ON(ret); 737 BUG_ON(ret);
738 }
679out: 739out:
680 btrfs_free_path(path); 740 btrfs_free_path(path);
681 return 0; 741 return 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5440bab23635..b330e27c2d8b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3796,6 +3796,12 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3796 3796
3797 if (location.type == BTRFS_INODE_ITEM_KEY) { 3797 if (location.type == BTRFS_INODE_ITEM_KEY) {
3798 inode = btrfs_iget(dir->i_sb, &location, root); 3798 inode = btrfs_iget(dir->i_sb, &location, root);
3799 if (unlikely(root->clean_orphans) &&
3800 !(inode->i_sb->s_flags & MS_RDONLY)) {
3801 down_read(&root->fs_info->cleanup_work_sem);
3802 btrfs_orphan_cleanup(root);
3803 up_read(&root->fs_info->cleanup_work_sem);
3804 }
3799 return inode; 3805 return inode;
3800 } 3806 }
3801 3807
@@ -3995,7 +4001,11 @@ skip:
3995 4001
3996 /* Reached end of directory/root. Bump pos past the last item. */ 4002 /* Reached end of directory/root. Bump pos past the last item. */
3997 if (key_type == BTRFS_DIR_INDEX_KEY) 4003 if (key_type == BTRFS_DIR_INDEX_KEY)
3998 filp->f_pos = INT_LIMIT(off_t); 4004 /*
4005 * 32-bit glibc will use getdents64, but then strtol -
4006 * so the last number we can serve is this.
4007 */
4008 filp->f_pos = 0x7fffffff;
3999 else 4009 else
4000 filp->f_pos++; 4010 filp->f_pos++;
4001nopos: 4011nopos:
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index b10a49d4bc6a..5c2a9e78a949 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -626,6 +626,8 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
626 626
627 if (ordered) 627 if (ordered)
628 offset = entry_end(ordered); 628 offset = entry_end(ordered);
629 else
630 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
629 631
630 mutex_lock(&tree->mutex); 632 mutex_lock(&tree->mutex);
631 disk_i_size = BTRFS_I(inode)->disk_i_size; 633 disk_i_size = BTRFS_I(inode)->disk_i_size;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index a9728680eca8..ed3e4a2ec2c8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3281,8 +3281,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3281 return -ENOMEM; 3281 return -ENOMEM;
3282 3282
3283 path = btrfs_alloc_path(); 3283 path = btrfs_alloc_path();
3284 if (!path) 3284 if (!path) {
3285 kfree(cluster);
3285 return -ENOMEM; 3286 return -ENOMEM;
3287 }
3286 3288
3287 rc->extents_found = 0; 3289 rc->extents_found = 0;
3288 rc->extents_skipped = 0; 3290 rc->extents_skipped = 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 198cff28766d..220dad5db017 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2649,8 +2649,10 @@ again:
2649 em = lookup_extent_mapping(em_tree, logical, *length); 2649 em = lookup_extent_mapping(em_tree, logical, *length);
2650 read_unlock(&em_tree->lock); 2650 read_unlock(&em_tree->lock);
2651 2651
2652 if (!em && unplug_page) 2652 if (!em && unplug_page) {
2653 kfree(multi);
2653 return 0; 2654 return 0;
2655 }
2654 2656
2655 if (!em) { 2657 if (!em) {
2656 printk(KERN_CRIT "unable to find logical %llu len %llu\n", 2658 printk(KERN_CRIT "unable to find logical %llu len %llu\n",
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 094ea65afc85..7b2600b380d7 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -5,7 +5,9 @@ have duplicated data). Fix oops in cifs_lookup. Workaround problem
5mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session. 5mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session.
6Disable use of server inode numbers when server only 6Disable use of server inode numbers when server only
7partially supports them (e.g. for one server querying inode numbers on 7partially supports them (e.g. for one server querying inode numbers on
8FindFirst fails but QPathInfo queries works). 8FindFirst fails but QPathInfo queries works). Fix oops with dfs in
9cifs_put_smb_ses. Fix mmap to work on directio mounts (needed
10for OpenOffice when on forcedirectio mount e.g.)
9 11
10Version 1.60 12Version 1.60
11------------- 13-------------
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index fea9e898c4ba..b44ce0a0711c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -269,7 +269,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
269 int err; 269 int err;
270 270
271 mntget(newmnt); 271 mntget(newmnt);
272 err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist); 272 err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags | MNT_SHRINKABLE, mntlist);
273 switch (err) { 273 switch (err) {
274 case 0: 274 case 0:
275 path_put(&nd->path); 275 path_put(&nd->path);
@@ -371,7 +371,6 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
371 if (IS_ERR(mnt)) 371 if (IS_ERR(mnt))
372 goto out_err; 372 goto out_err;
373 373
374 nd->path.mnt->mnt_flags |= MNT_SHRINKABLE;
375 rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list); 374 rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list);
376 375
377out: 376out:
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 29f1da761bbf..8c6a03627176 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -758,7 +758,7 @@ const struct file_operations cifs_file_ops = {
758}; 758};
759 759
760const struct file_operations cifs_file_direct_ops = { 760const struct file_operations cifs_file_direct_ops = {
761 /* no mmap, no aio, no readv - 761 /* no aio, no readv -
762 BB reevaluate whether they can be done with directio, no cache */ 762 BB reevaluate whether they can be done with directio, no cache */
763 .read = cifs_user_read, 763 .read = cifs_user_read,
764 .write = cifs_user_write, 764 .write = cifs_user_write,
@@ -767,6 +767,7 @@ const struct file_operations cifs_file_direct_ops = {
767 .lock = cifs_lock, 767 .lock = cifs_lock,
768 .fsync = cifs_fsync, 768 .fsync = cifs_fsync,
769 .flush = cifs_flush, 769 .flush = cifs_flush,
770 .mmap = cifs_file_mmap,
770 .splice_read = generic_file_splice_read, 771 .splice_read = generic_file_splice_read,
771#ifdef CONFIG_CIFS_POSIX 772#ifdef CONFIG_CIFS_POSIX
772 .unlocked_ioctl = cifs_ioctl, 773 .unlocked_ioctl = cifs_ioctl,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 63ea83ff687f..3bbcaa716b3c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2287,12 +2287,12 @@ int
2287cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 2287cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2288 char *mount_data_global, const char *devname) 2288 char *mount_data_global, const char *devname)
2289{ 2289{
2290 int rc = 0; 2290 int rc;
2291 int xid; 2291 int xid;
2292 struct smb_vol *volume_info; 2292 struct smb_vol *volume_info;
2293 struct cifsSesInfo *pSesInfo = NULL; 2293 struct cifsSesInfo *pSesInfo;
2294 struct cifsTconInfo *tcon = NULL; 2294 struct cifsTconInfo *tcon;
2295 struct TCP_Server_Info *srvTcp = NULL; 2295 struct TCP_Server_Info *srvTcp;
2296 char *full_path; 2296 char *full_path;
2297 char *mount_data = mount_data_global; 2297 char *mount_data = mount_data_global;
2298#ifdef CONFIG_CIFS_DFS_UPCALL 2298#ifdef CONFIG_CIFS_DFS_UPCALL
@@ -2301,6 +2301,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2301 int referral_walks_count = 0; 2301 int referral_walks_count = 0;
2302try_mount_again: 2302try_mount_again:
2303#endif 2303#endif
2304 rc = 0;
2305 tcon = NULL;
2306 pSesInfo = NULL;
2307 srvTcp = NULL;
2304 full_path = NULL; 2308 full_path = NULL;
2305 2309
2306 xid = GetXid(); 2310 xid = GetXid();
@@ -2597,6 +2601,7 @@ remote_path_check:
2597 2601
2598 cleanup_volume_info(&volume_info); 2602 cleanup_volume_info(&volume_info);
2599 referral_walks_count++; 2603 referral_walks_count++;
2604 FreeXid(xid);
2600 goto try_mount_again; 2605 goto try_mount_again;
2601 } 2606 }
2602#else /* No DFS support, return error on mount */ 2607#else /* No DFS support, return error on mount */
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 332dd00f0894..c5c45de1a2ee 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1005,6 +1005,9 @@ COMPATIBLE_IOCTL(SCSI_IOCTL_SEND_COMMAND)
1005COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST) 1005COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST)
1006COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI) 1006COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI)
1007#endif 1007#endif
1008/* Big V (don't complain on serial console) */
1009IGNORE_IOCTL(VT_OPENQRY)
1010IGNORE_IOCTL(VT_GETMODE)
1008/* Little p (/dev/rtc, /dev/envctrl, etc.) */ 1011/* Little p (/dev/rtc, /dev/envctrl, etc.) */
1009COMPATIBLE_IOCTL(RTC_AIE_ON) 1012COMPATIBLE_IOCTL(RTC_AIE_ON)
1010COMPATIBLE_IOCTL(RTC_AIE_OFF) 1013COMPATIBLE_IOCTL(RTC_AIE_OFF)
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index c8afa6b1d91d..32a5f46b1157 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -121,8 +121,10 @@ static int get_target(const char *symname, struct path *path,
121 ret = -ENOENT; 121 ret = -ENOENT;
122 path_put(path); 122 path_put(path);
123 } 123 }
124 } else 124 } else {
125 ret = -EPERM; 125 ret = -EPERM;
126 path_put(path);
127 }
126 } 128 }
127 129
128 return ret; 130 return ret;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index fbb6e5eed697..7cb0a59f4b9d 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1748,7 +1748,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1748 char *cipher_name, size_t *key_size) 1748 char *cipher_name, size_t *key_size)
1749{ 1749{
1750 char dummy_key[ECRYPTFS_MAX_KEY_BYTES]; 1750 char dummy_key[ECRYPTFS_MAX_KEY_BYTES];
1751 char *full_alg_name; 1751 char *full_alg_name = NULL;
1752 int rc; 1752 int rc;
1753 1753
1754 *key_tfm = NULL; 1754 *key_tfm = NULL;
@@ -1763,7 +1763,6 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1763 if (rc) 1763 if (rc)
1764 goto out; 1764 goto out;
1765 *key_tfm = crypto_alloc_blkcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC); 1765 *key_tfm = crypto_alloc_blkcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC);
1766 kfree(full_alg_name);
1767 if (IS_ERR(*key_tfm)) { 1766 if (IS_ERR(*key_tfm)) {
1768 rc = PTR_ERR(*key_tfm); 1767 rc = PTR_ERR(*key_tfm);
1769 printk(KERN_ERR "Unable to allocate crypto cipher with name " 1768 printk(KERN_ERR "Unable to allocate crypto cipher with name "
@@ -1786,6 +1785,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
1786 goto out; 1785 goto out;
1787 } 1786 }
1788out: 1787out:
1788 kfree(full_alg_name);
1789 return rc; 1789 return rc;
1790} 1790}
1791 1791
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 9e944057001b..678172b61be2 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -158,7 +158,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
158 struct dentry *ecryptfs_dentry = file->f_path.dentry; 158 struct dentry *ecryptfs_dentry = file->f_path.dentry;
159 /* Private value of ecryptfs_dentry allocated in 159 /* Private value of ecryptfs_dentry allocated in
160 * ecryptfs_lookup() */ 160 * ecryptfs_lookup() */
161 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 161 struct dentry *lower_dentry;
162 struct ecryptfs_file_info *file_info; 162 struct ecryptfs_file_info *file_info;
163 163
164 mount_crypt_stat = &ecryptfs_superblock_to_private( 164 mount_crypt_stat = &ecryptfs_superblock_to_private(
@@ -191,13 +191,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
191 | ECRYPTFS_ENCRYPTED); 191 | ECRYPTFS_ENCRYPTED);
192 } 192 }
193 mutex_unlock(&crypt_stat->cs_mutex); 193 mutex_unlock(&crypt_stat->cs_mutex);
194 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
195 && !(file->f_flags & O_RDONLY)) {
196 rc = -EPERM;
197 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
198 "file must hence be opened RO\n", __func__);
199 goto out;
200 }
201 if (!ecryptfs_inode_to_private(inode)->lower_file) { 194 if (!ecryptfs_inode_to_private(inode)->lower_file) {
202 rc = ecryptfs_init_persistent_file(ecryptfs_dentry); 195 rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
203 if (rc) { 196 if (rc) {
@@ -208,6 +201,13 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
208 goto out; 201 goto out;
209 } 202 }
210 } 203 }
204 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
205 && !(file->f_flags & O_RDONLY)) {
206 rc = -EPERM;
207 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
208 "file must hence be opened RO\n", __func__);
209 goto out;
210 }
211 ecryptfs_set_file_lower( 211 ecryptfs_set_file_lower(
212 file, ecryptfs_inode_to_private(inode)->lower_file); 212 file, ecryptfs_inode_to_private(inode)->lower_file);
213 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 213 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
@@ -299,7 +299,6 @@ static int ecryptfs_ioctl(struct inode *inode, struct file *file,
299const struct file_operations ecryptfs_dir_fops = { 299const struct file_operations ecryptfs_dir_fops = {
300 .readdir = ecryptfs_readdir, 300 .readdir = ecryptfs_readdir,
301 .ioctl = ecryptfs_ioctl, 301 .ioctl = ecryptfs_ioctl,
302 .mmap = generic_file_mmap,
303 .open = ecryptfs_open, 302 .open = ecryptfs_open,
304 .flush = ecryptfs_flush, 303 .flush = ecryptfs_flush,
305 .release = ecryptfs_release, 304 .release = ecryptfs_release,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 429ca0b3ba08..4a430ab4115c 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -282,7 +282,8 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
282 goto out; 282 goto out;
283 } 283 }
284 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, 284 rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
285 ecryptfs_dir_inode->i_sb, 1); 285 ecryptfs_dir_inode->i_sb,
286 ECRYPTFS_INTERPOSE_FLAG_D_ADD);
286 if (rc) { 287 if (rc) {
287 printk(KERN_ERR "%s: Error interposing; rc = [%d]\n", 288 printk(KERN_ERR "%s: Error interposing; rc = [%d]\n",
288 __func__, rc); 289 __func__, rc);
@@ -463,9 +464,6 @@ out_lock:
463 unlock_dir(lower_dir_dentry); 464 unlock_dir(lower_dir_dentry);
464 dput(lower_new_dentry); 465 dput(lower_new_dentry);
465 dput(lower_old_dentry); 466 dput(lower_old_dentry);
466 d_drop(lower_old_dentry);
467 d_drop(new_dentry);
468 d_drop(old_dentry);
469 return rc; 467 return rc;
470} 468}
471 469
@@ -614,6 +612,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
614 struct dentry *lower_new_dentry; 612 struct dentry *lower_new_dentry;
615 struct dentry *lower_old_dir_dentry; 613 struct dentry *lower_old_dir_dentry;
616 struct dentry *lower_new_dir_dentry; 614 struct dentry *lower_new_dir_dentry;
615 struct dentry *trap = NULL;
617 616
618 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); 617 lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
619 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); 618 lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
@@ -621,7 +620,17 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
621 dget(lower_new_dentry); 620 dget(lower_new_dentry);
622 lower_old_dir_dentry = dget_parent(lower_old_dentry); 621 lower_old_dir_dentry = dget_parent(lower_old_dentry);
623 lower_new_dir_dentry = dget_parent(lower_new_dentry); 622 lower_new_dir_dentry = dget_parent(lower_new_dentry);
624 lock_rename(lower_old_dir_dentry, lower_new_dir_dentry); 623 trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
624 /* source should not be ancestor of target */
625 if (trap == lower_old_dentry) {
626 rc = -EINVAL;
627 goto out_lock;
628 }
629 /* target should not be ancestor of source */
630 if (trap == lower_new_dentry) {
631 rc = -ENOTEMPTY;
632 goto out_lock;
633 }
625 rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, 634 rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
626 lower_new_dir_dentry->d_inode, lower_new_dentry); 635 lower_new_dir_dentry->d_inode, lower_new_dentry);
627 if (rc) 636 if (rc)
@@ -715,31 +724,31 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
715 /* Released in ecryptfs_put_link(); only release here on error */ 724 /* Released in ecryptfs_put_link(); only release here on error */
716 buf = kmalloc(len, GFP_KERNEL); 725 buf = kmalloc(len, GFP_KERNEL);
717 if (!buf) { 726 if (!buf) {
718 rc = -ENOMEM; 727 buf = ERR_PTR(-ENOMEM);
719 goto out; 728 goto out;
720 } 729 }
721 old_fs = get_fs(); 730 old_fs = get_fs();
722 set_fs(get_ds()); 731 set_fs(get_ds());
723 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); 732 rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
724 set_fs(old_fs); 733 set_fs(old_fs);
725 if (rc < 0) 734 if (rc < 0) {
726 goto out_free; 735 kfree(buf);
727 else 736 buf = ERR_PTR(rc);
737 } else
728 buf[rc] = '\0'; 738 buf[rc] = '\0';
729 rc = 0;
730 nd_set_link(nd, buf);
731 goto out;
732out_free:
733 kfree(buf);
734out: 739out:
735 return ERR_PTR(rc); 740 nd_set_link(nd, buf);
741 return NULL;
736} 742}
737 743
738static void 744static void
739ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr) 745ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
740{ 746{
741 /* Free the char* */ 747 char *buf = nd_get_link(nd);
742 kfree(nd_get_link(nd)); 748 if (!IS_ERR(buf)) {
749 /* Free the char* */
750 kfree(buf);
751 }
743} 752}
744 753
745/** 754/**
@@ -772,18 +781,23 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat,
772} 781}
773 782
774/** 783/**
775 * ecryptfs_truncate 784 * truncate_upper
776 * @dentry: The ecryptfs layer dentry 785 * @dentry: The ecryptfs layer dentry
777 * @new_length: The length to expand the file to 786 * @ia: Address of the ecryptfs inode's attributes
787 * @lower_ia: Address of the lower inode's attributes
778 * 788 *
779 * Function to handle truncations modifying the size of the file. Note 789 * Function to handle truncations modifying the size of the file. Note
780 * that the file sizes are interpolated. When expanding, we are simply 790 * that the file sizes are interpolated. When expanding, we are simply
781 * writing strings of 0's out. When truncating, we need to modify the 791 * writing strings of 0's out. When truncating, we truncate the upper
782 * underlying file size according to the page index interpolations. 792 * inode and update the lower_ia according to the page index
793 * interpolations. If ATTR_SIZE is set in lower_ia->ia_valid upon return,
794 * the caller must use lower_ia in a call to notify_change() to perform
795 * the truncation of the lower inode.
783 * 796 *
784 * Returns zero on success; non-zero otherwise 797 * Returns zero on success; non-zero otherwise
785 */ 798 */
786int ecryptfs_truncate(struct dentry *dentry, loff_t new_length) 799static int truncate_upper(struct dentry *dentry, struct iattr *ia,
800 struct iattr *lower_ia)
787{ 801{
788 int rc = 0; 802 int rc = 0;
789 struct inode *inode = dentry->d_inode; 803 struct inode *inode = dentry->d_inode;
@@ -794,8 +808,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
794 loff_t lower_size_before_truncate; 808 loff_t lower_size_before_truncate;
795 loff_t lower_size_after_truncate; 809 loff_t lower_size_after_truncate;
796 810
797 if (unlikely((new_length == i_size))) 811 if (unlikely((ia->ia_size == i_size))) {
812 lower_ia->ia_valid &= ~ATTR_SIZE;
798 goto out; 813 goto out;
814 }
799 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 815 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
800 /* Set up a fake ecryptfs file, this is used to interface with 816 /* Set up a fake ecryptfs file, this is used to interface with
801 * the file in the underlying filesystem so that the 817 * the file in the underlying filesystem so that the
@@ -815,28 +831,30 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
815 &fake_ecryptfs_file, 831 &fake_ecryptfs_file,
816 ecryptfs_inode_to_private(dentry->d_inode)->lower_file); 832 ecryptfs_inode_to_private(dentry->d_inode)->lower_file);
817 /* Switch on growing or shrinking file */ 833 /* Switch on growing or shrinking file */
818 if (new_length > i_size) { 834 if (ia->ia_size > i_size) {
819 char zero[] = { 0x00 }; 835 char zero[] = { 0x00 };
820 836
837 lower_ia->ia_valid &= ~ATTR_SIZE;
821 /* Write a single 0 at the last position of the file; 838 /* Write a single 0 at the last position of the file;
822 * this triggers code that will fill in 0's throughout 839 * this triggers code that will fill in 0's throughout
823 * the intermediate portion of the previous end of the 840 * the intermediate portion of the previous end of the
824 * file and the new and of the file */ 841 * file and the new and of the file */
825 rc = ecryptfs_write(&fake_ecryptfs_file, zero, 842 rc = ecryptfs_write(&fake_ecryptfs_file, zero,
826 (new_length - 1), 1); 843 (ia->ia_size - 1), 1);
827 } else { /* new_length < i_size_read(inode) */ 844 } else { /* ia->ia_size < i_size_read(inode) */
828 /* We're chopping off all the pages down do the page 845 /* We're chopping off all the pages down to the page
829 * in which new_length is located. Fill in the end of 846 * in which ia->ia_size is located. Fill in the end of
830 * that page from (new_length & ~PAGE_CACHE_MASK) to 847 * that page from (ia->ia_size & ~PAGE_CACHE_MASK) to
831 * PAGE_CACHE_SIZE with zeros. */ 848 * PAGE_CACHE_SIZE with zeros. */
832 size_t num_zeros = (PAGE_CACHE_SIZE 849 size_t num_zeros = (PAGE_CACHE_SIZE
833 - (new_length & ~PAGE_CACHE_MASK)); 850 - (ia->ia_size & ~PAGE_CACHE_MASK));
834 851
835 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) { 852 if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
836 rc = vmtruncate(inode, new_length); 853 rc = vmtruncate(inode, ia->ia_size);
837 if (rc) 854 if (rc)
838 goto out_free; 855 goto out_free;
839 rc = vmtruncate(lower_dentry->d_inode, new_length); 856 lower_ia->ia_size = ia->ia_size;
857 lower_ia->ia_valid |= ATTR_SIZE;
840 goto out_free; 858 goto out_free;
841 } 859 }
842 if (num_zeros) { 860 if (num_zeros) {
@@ -848,7 +866,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
848 goto out_free; 866 goto out_free;
849 } 867 }
850 rc = ecryptfs_write(&fake_ecryptfs_file, zeros_virt, 868 rc = ecryptfs_write(&fake_ecryptfs_file, zeros_virt,
851 new_length, num_zeros); 869 ia->ia_size, num_zeros);
852 kfree(zeros_virt); 870 kfree(zeros_virt);
853 if (rc) { 871 if (rc) {
854 printk(KERN_ERR "Error attempting to zero out " 872 printk(KERN_ERR "Error attempting to zero out "
@@ -857,7 +875,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
857 goto out_free; 875 goto out_free;
858 } 876 }
859 } 877 }
860 vmtruncate(inode, new_length); 878 vmtruncate(inode, ia->ia_size);
861 rc = ecryptfs_write_inode_size_to_metadata(inode); 879 rc = ecryptfs_write_inode_size_to_metadata(inode);
862 if (rc) { 880 if (rc) {
863 printk(KERN_ERR "Problem with " 881 printk(KERN_ERR "Problem with "
@@ -870,10 +888,12 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
870 lower_size_before_truncate = 888 lower_size_before_truncate =
871 upper_size_to_lower_size(crypt_stat, i_size); 889 upper_size_to_lower_size(crypt_stat, i_size);
872 lower_size_after_truncate = 890 lower_size_after_truncate =
873 upper_size_to_lower_size(crypt_stat, new_length); 891 upper_size_to_lower_size(crypt_stat, ia->ia_size);
874 if (lower_size_after_truncate < lower_size_before_truncate) 892 if (lower_size_after_truncate < lower_size_before_truncate) {
875 vmtruncate(lower_dentry->d_inode, 893 lower_ia->ia_size = lower_size_after_truncate;
876 lower_size_after_truncate); 894 lower_ia->ia_valid |= ATTR_SIZE;
895 } else
896 lower_ia->ia_valid &= ~ATTR_SIZE;
877 } 897 }
878out_free: 898out_free:
879 if (ecryptfs_file_to_private(&fake_ecryptfs_file)) 899 if (ecryptfs_file_to_private(&fake_ecryptfs_file))
@@ -883,6 +903,33 @@ out:
883 return rc; 903 return rc;
884} 904}
885 905
906/**
907 * ecryptfs_truncate
908 * @dentry: The ecryptfs layer dentry
909 * @new_length: The length to expand the file to
910 *
911 * Simple function that handles the truncation of an eCryptfs inode and
912 * its corresponding lower inode.
913 *
914 * Returns zero on success; non-zero otherwise
915 */
916int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
917{
918 struct iattr ia = { .ia_valid = ATTR_SIZE, .ia_size = new_length };
919 struct iattr lower_ia = { .ia_valid = 0 };
920 int rc;
921
922 rc = truncate_upper(dentry, &ia, &lower_ia);
923 if (!rc && lower_ia.ia_valid & ATTR_SIZE) {
924 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
925
926 mutex_lock(&lower_dentry->d_inode->i_mutex);
927 rc = notify_change(lower_dentry, &lower_ia);
928 mutex_unlock(&lower_dentry->d_inode->i_mutex);
929 }
930 return rc;
931}
932
886static int 933static int
887ecryptfs_permission(struct inode *inode, int mask) 934ecryptfs_permission(struct inode *inode, int mask)
888{ 935{
@@ -905,6 +952,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
905{ 952{
906 int rc = 0; 953 int rc = 0;
907 struct dentry *lower_dentry; 954 struct dentry *lower_dentry;
955 struct iattr lower_ia;
908 struct inode *inode; 956 struct inode *inode;
909 struct inode *lower_inode; 957 struct inode *lower_inode;
910 struct ecryptfs_crypt_stat *crypt_stat; 958 struct ecryptfs_crypt_stat *crypt_stat;
@@ -943,15 +991,11 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
943 } 991 }
944 } 992 }
945 mutex_unlock(&crypt_stat->cs_mutex); 993 mutex_unlock(&crypt_stat->cs_mutex);
994 memcpy(&lower_ia, ia, sizeof(lower_ia));
995 if (ia->ia_valid & ATTR_FILE)
996 lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file);
946 if (ia->ia_valid & ATTR_SIZE) { 997 if (ia->ia_valid & ATTR_SIZE) {
947 ecryptfs_printk(KERN_DEBUG, 998 rc = truncate_upper(dentry, ia, &lower_ia);
948 "ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n",
949 ia->ia_valid, ATTR_SIZE);
950 rc = ecryptfs_truncate(dentry, ia->ia_size);
951 /* ecryptfs_truncate handles resizing of the lower file */
952 ia->ia_valid &= ~ATTR_SIZE;
953 ecryptfs_printk(KERN_DEBUG, "ia->ia_valid = [%x]\n",
954 ia->ia_valid);
955 if (rc < 0) 999 if (rc < 0)
956 goto out; 1000 goto out;
957 } 1001 }
@@ -960,17 +1004,32 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
960 * mode change is for clearing setuid/setgid bits. Allow lower fs 1004 * mode change is for clearing setuid/setgid bits. Allow lower fs
961 * to interpret this in its own way. 1005 * to interpret this in its own way.
962 */ 1006 */
963 if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) 1007 if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
964 ia->ia_valid &= ~ATTR_MODE; 1008 lower_ia.ia_valid &= ~ATTR_MODE;
965 1009
966 mutex_lock(&lower_dentry->d_inode->i_mutex); 1010 mutex_lock(&lower_dentry->d_inode->i_mutex);
967 rc = notify_change(lower_dentry, ia); 1011 rc = notify_change(lower_dentry, &lower_ia);
968 mutex_unlock(&lower_dentry->d_inode->i_mutex); 1012 mutex_unlock(&lower_dentry->d_inode->i_mutex);
969out: 1013out:
970 fsstack_copy_attr_all(inode, lower_inode); 1014 fsstack_copy_attr_all(inode, lower_inode);
971 return rc; 1015 return rc;
972} 1016}
973 1017
1018int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1019 struct kstat *stat)
1020{
1021 struct kstat lower_stat;
1022 int rc;
1023
1024 rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry),
1025 ecryptfs_dentry_to_lower(dentry), &lower_stat);
1026 if (!rc) {
1027 generic_fillattr(dentry->d_inode, stat);
1028 stat->blocks = lower_stat.blocks;
1029 }
1030 return rc;
1031}
1032
974int 1033int
975ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, 1034ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
976 size_t size, int flags) 1035 size_t size, int flags)
@@ -1100,6 +1159,7 @@ const struct inode_operations ecryptfs_dir_iops = {
1100const struct inode_operations ecryptfs_main_iops = { 1159const struct inode_operations ecryptfs_main_iops = {
1101 .permission = ecryptfs_permission, 1160 .permission = ecryptfs_permission,
1102 .setattr = ecryptfs_setattr, 1161 .setattr = ecryptfs_setattr,
1162 .getattr = ecryptfs_getattr,
1103 .setxattr = ecryptfs_setxattr, 1163 .setxattr = ecryptfs_setxattr,
1104 .getxattr = ecryptfs_getxattr, 1164 .getxattr = ecryptfs_getxattr,
1105 .listxattr = ecryptfs_listxattr, 1165 .listxattr = ecryptfs_listxattr,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 567bc4b9f70a..ea2f92101dfe 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -585,8 +585,8 @@ out:
585 * with as much information as it can before needing 585 * with as much information as it can before needing
586 * the lower filesystem. 586 * the lower filesystem.
587 * ecryptfs_read_super(): this accesses the lower filesystem and uses 587 * ecryptfs_read_super(): this accesses the lower filesystem and uses
588 * ecryptfs_interpolate to perform most of the linking 588 * ecryptfs_interpose to perform most of the linking
589 * ecryptfs_interpolate(): links the lower filesystem into ecryptfs 589 * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c)
590 */ 590 */
591static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags, 591static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
592 const char *dev_name, void *raw_data, 592 const char *dev_name, void *raw_data,
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 698a8636d39c..2afbcebeda71 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -738,13 +738,28 @@ static int exofs_write_begin_export(struct file *file,
738 fsdata); 738 fsdata);
739} 739}
740 740
741static int exofs_write_end(struct file *file, struct address_space *mapping,
742 loff_t pos, unsigned len, unsigned copied,
743 struct page *page, void *fsdata)
744{
745 struct inode *inode = mapping->host;
746 /* According to comment in simple_write_end i_mutex is held */
747 loff_t i_size = inode->i_size;
748 int ret;
749
750 ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata);
751 if (i_size != inode->i_size)
752 mark_inode_dirty(inode);
753 return ret;
754}
755
741const struct address_space_operations exofs_aops = { 756const struct address_space_operations exofs_aops = {
742 .readpage = exofs_readpage, 757 .readpage = exofs_readpage,
743 .readpages = exofs_readpages, 758 .readpages = exofs_readpages,
744 .writepage = exofs_writepage, 759 .writepage = exofs_writepage,
745 .writepages = exofs_writepages, 760 .writepages = exofs_writepages,
746 .write_begin = exofs_write_begin_export, 761 .write_begin = exofs_write_begin_export,
747 .write_end = simple_write_end, 762 .write_end = exofs_write_end,
748}; 763};
749 764
750/****************************************************************************** 765/******************************************************************************
diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h
index 423033addd1f..c52e9888b8ab 100644
--- a/fs/exofs/pnfs.h
+++ b/fs/exofs/pnfs.h
@@ -15,13 +15,7 @@
15#ifndef __EXOFS_PNFS_H__ 15#ifndef __EXOFS_PNFS_H__
16#define __EXOFS_PNFS_H__ 16#define __EXOFS_PNFS_H__
17 17
18#if defined(CONFIG_PNFS) 18#if ! defined(__PNFS_OSD_XDR_H__)
19
20
21/* FIXME: move this file to: linux/exportfs/pnfs_osd_xdr.h */
22#include "../nfs/objlayout/pnfs_osd_xdr.h"
23
24#else /* defined(CONFIG_PNFS) */
25 19
26enum pnfs_iomode { 20enum pnfs_iomode {
27 IOMODE_READ = 1, 21 IOMODE_READ = 1,
@@ -46,6 +40,6 @@ struct pnfs_osd_data_map {
46 u32 odm_raid_algorithm; 40 u32 odm_raid_algorithm;
47}; 41};
48 42
49#endif /* else defined(CONFIG_PNFS) */ 43#endif /* ! defined(__PNFS_OSD_XDR_H__) */
50 44
51#endif /* __EXOFS_PNFS_H__ */ 45#endif /* __EXOFS_PNFS_H__ */
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 9acf7e808139..9ed1bb1f319f 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -28,6 +28,7 @@ config EXT4_FS
28 28
29config EXT4_USE_FOR_EXT23 29config EXT4_USE_FOR_EXT23
30 bool "Use ext4 for ext2/ext3 file systems" 30 bool "Use ext4 for ext2/ext3 file systems"
31 depends on EXT4_FS
31 depends on EXT3_FS=n || EXT2_FS=n 32 depends on EXT3_FS=n || EXT2_FS=n
32 default y 33 default y
33 help 34 help
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 4df8621ec31c..a60ab9aad57d 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -16,7 +16,6 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/swap.h> 17#include <linux/swap.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/version.h>
20#include <linux/blkdev.h> 19#include <linux/blkdev.h>
21#include <linux/mutex.h> 20#include <linux/mutex.h>
22#include "ext4.h" 21#include "ext4.h"
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 56f9271ee8cc..af7b62699ea9 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -699,6 +699,8 @@ struct ext4_inode_info {
699 unsigned int i_reserved_meta_blocks; 699 unsigned int i_reserved_meta_blocks;
700 unsigned int i_allocated_meta_blocks; 700 unsigned int i_allocated_meta_blocks;
701 unsigned short i_delalloc_reserved_flag; 701 unsigned short i_delalloc_reserved_flag;
702 sector_t i_da_metadata_calc_last_lblock;
703 int i_da_metadata_calc_len;
702 704
703 /* on-disk additional length */ 705 /* on-disk additional length */
704 __u16 i_extra_isize; 706 __u16 i_extra_isize;
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 2ca686454e87..bdb6ce7e2eb4 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); 225 ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
226} 226}
227 227
228extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); 228extern int ext4_ext_calc_metadata_amount(struct inode *inode,
229 sector_t lblocks);
229extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); 230extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
230extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); 231extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
231extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); 232extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3a7928f825e4..7d7b74e94687 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
296 * to allocate @blocks 296 * to allocate @blocks
297 * Worse case is one block per extent 297 * Worse case is one block per extent
298 */ 298 */
299int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) 299int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
300{ 300{
301 int lcap, icap, rcap, leafs, idxs, num; 301 struct ext4_inode_info *ei = EXT4_I(inode);
302 int newextents = blocks; 302 int idxs, num = 0;
303
304 rcap = ext4_ext_space_root_idx(inode, 0);
305 lcap = ext4_ext_space_block(inode, 0);
306 icap = ext4_ext_space_block_idx(inode, 0);
307 303
308 /* number of new leaf blocks needed */ 304 idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
309 num = leafs = (newextents + lcap - 1) / lcap; 305 / sizeof(struct ext4_extent_idx));
310 306
311 /* 307 /*
312 * Worse case, we need separate index block(s) 308 * If the new delayed allocation block is contiguous with the
313 * to link all new leaf blocks 309 * previous da block, it can share index blocks with the
310 * previous block, so we only need to allocate a new index
311 * block every idxs leaf blocks. At ldxs**2 blocks, we need
312 * an additional index block, and at ldxs**3 blocks, yet
313 * another index blocks.
314 */ 314 */
315 idxs = (leafs + icap - 1) / icap; 315 if (ei->i_da_metadata_calc_len &&
316 do { 316 ei->i_da_metadata_calc_last_lblock+1 == lblock) {
317 num += idxs; 317 if ((ei->i_da_metadata_calc_len % idxs) == 0)
318 idxs = (idxs + icap - 1) / icap; 318 num++;
319 } while (idxs > rcap); 319 if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
320 num++;
321 if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
322 num++;
323 ei->i_da_metadata_calc_len = 0;
324 } else
325 ei->i_da_metadata_calc_len++;
326 ei->i_da_metadata_calc_last_lblock++;
327 return num;
328 }
320 329
321 return num; 330 /*
331 * In the worst case we need a new set of index blocks at
332 * every level of the inode's extent tree.
333 */
334 ei->i_da_metadata_calc_len = 1;
335 ei->i_da_metadata_calc_last_lblock = lblock;
336 return ext_depth(inode) + 1;
322} 337}
323 338
324static int 339static int
@@ -3023,6 +3038,14 @@ out:
3023 return err; 3038 return err;
3024} 3039}
3025 3040
3041static void unmap_underlying_metadata_blocks(struct block_device *bdev,
3042 sector_t block, int count)
3043{
3044 int i;
3045 for (i = 0; i < count; i++)
3046 unmap_underlying_metadata(bdev, block + i);
3047}
3048
3026static int 3049static int
3027ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3050ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3028 ext4_lblk_t iblock, unsigned int max_blocks, 3051 ext4_lblk_t iblock, unsigned int max_blocks,
@@ -3098,6 +3121,18 @@ out:
3098 } else 3121 } else
3099 allocated = ret; 3122 allocated = ret;
3100 set_buffer_new(bh_result); 3123 set_buffer_new(bh_result);
3124 /*
3125 * if we allocated more blocks than requested
3126 * we need to make sure we unmap the extra block
3127 * allocated. The actual needed block will get
3128 * unmapped later when we find the buffer_head marked
3129 * new.
3130 */
3131 if (allocated > max_blocks) {
3132 unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
3133 newblock + max_blocks,
3134 allocated - max_blocks);
3135 }
3101map_out: 3136map_out:
3102 set_buffer_mapped(bh_result); 3137 set_buffer_mapped(bh_result);
3103out1: 3138out1:
@@ -3190,7 +3225,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3190 * this situation is possible, though, _during_ tree modification; 3225 * this situation is possible, though, _during_ tree modification;
3191 * this is why assert can't be put in ext4_ext_find_extent() 3226 * this is why assert can't be put in ext4_ext_find_extent()
3192 */ 3227 */
3193 BUG_ON(path[depth].p_ext == NULL && depth != 0); 3228 if (path[depth].p_ext == NULL && depth != 0) {
3229 ext4_error(inode->i_sb, __func__, "bad extent address "
3230 "inode: %lu, iblock: %d, depth: %d",
3231 inode->i_ino, iblock, depth);
3232 err = -EIO;
3233 goto out2;
3234 }
3194 eh = path[depth].p_hdr; 3235 eh = path[depth].p_hdr;
3195 3236
3196 ex = path[depth].p_ext; 3237 ex = path[depth].p_ext;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0b22497d92e1..98bd140aad01 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
88 return ext4_force_commit(inode->i_sb); 88 return ext4_force_commit(inode->i_sb);
89 89
90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; 90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
91 if (jbd2_log_start_commit(journal, commit_tid)) 91 if (jbd2_log_start_commit(journal, commit_tid)) {
92 /*
93 * When the journal is on a different device than the
94 * fs data disk, we need to issue the barrier in
95 * writeback mode. (In ordered mode, the jbd2 layer
96 * will take care of issuing the barrier. In
97 * data=journal, all of the data blocks are written to
98 * the journal device.)
99 */
100 if (ext4_should_writeback_data(inode) &&
101 (journal->j_fs_dev != journal->j_dev) &&
102 (journal->j_flags & JBD2_BARRIER))
103 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
92 jbd2_log_wait_commit(journal, commit_tid); 104 jbd2_log_wait_commit(journal, commit_tid);
93 else if (journal->j_flags & JBD2_BARRIER) 105 } else if (journal->j_flags & JBD2_BARRIER)
94 blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 106 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
95 return ret; 107 return ret;
96} 108}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ab807963a614..c818972c8302 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1009,77 +1009,88 @@ qsize_t *ext4_get_reserved_space(struct inode *inode)
1009 return &EXT4_I(inode)->i_reserved_quota; 1009 return &EXT4_I(inode)->i_reserved_quota;
1010} 1010}
1011#endif 1011#endif
1012
1012/* 1013/*
1013 * Calculate the number of metadata blocks need to reserve 1014 * Calculate the number of metadata blocks need to reserve
1014 * to allocate @blocks for non extent file based file 1015 * to allocate a new block at @lblocks for non extent file based file
1015 */ 1016 */
1016static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) 1017static int ext4_indirect_calc_metadata_amount(struct inode *inode,
1018 sector_t lblock)
1017{ 1019{
1018 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); 1020 struct ext4_inode_info *ei = EXT4_I(inode);
1019 int ind_blks, dind_blks, tind_blks; 1021 int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
1020 1022 int blk_bits;
1021 /* number of new indirect blocks needed */
1022 ind_blks = (blocks + icap - 1) / icap;
1023 1023
1024 dind_blks = (ind_blks + icap - 1) / icap; 1024 if (lblock < EXT4_NDIR_BLOCKS)
1025 return 0;
1025 1026
1026 tind_blks = 1; 1027 lblock -= EXT4_NDIR_BLOCKS;
1027 1028
1028 return ind_blks + dind_blks + tind_blks; 1029 if (ei->i_da_metadata_calc_len &&
1030 (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
1031 ei->i_da_metadata_calc_len++;
1032 return 0;
1033 }
1034 ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
1035 ei->i_da_metadata_calc_len = 1;
1036 blk_bits = roundup_pow_of_two(lblock + 1);
1037 return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
1029} 1038}
1030 1039
1031/* 1040/*
1032 * Calculate the number of metadata blocks need to reserve 1041 * Calculate the number of metadata blocks need to reserve
1033 * to allocate given number of blocks 1042 * to allocate a block located at @lblock
1034 */ 1043 */
1035static int ext4_calc_metadata_amount(struct inode *inode, int blocks) 1044static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
1036{ 1045{
1037 if (!blocks)
1038 return 0;
1039
1040 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) 1046 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1041 return ext4_ext_calc_metadata_amount(inode, blocks); 1047 return ext4_ext_calc_metadata_amount(inode, lblock);
1042 1048
1043 return ext4_indirect_calc_metadata_amount(inode, blocks); 1049 return ext4_indirect_calc_metadata_amount(inode, lblock);
1044} 1050}
1045 1051
1052/*
1053 * Called with i_data_sem down, which is important since we can call
1054 * ext4_discard_preallocations() from here.
1055 */
1046static void ext4_da_update_reserve_space(struct inode *inode, int used) 1056static void ext4_da_update_reserve_space(struct inode *inode, int used)
1047{ 1057{
1048 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1058 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1049 int total, mdb, mdb_free, mdb_claim = 0; 1059 struct ext4_inode_info *ei = EXT4_I(inode);
1050 1060 int mdb_free = 0;
1051 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1061
1052 /* recalculate the number of metablocks still need to be reserved */ 1062 spin_lock(&ei->i_block_reservation_lock);
1053 total = EXT4_I(inode)->i_reserved_data_blocks - used; 1063 if (unlikely(used > ei->i_reserved_data_blocks)) {
1054 mdb = ext4_calc_metadata_amount(inode, total); 1064 ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
1055 1065 "with only %d reserved data blocks\n",
1056 /* figure out how many metablocks to release */ 1066 __func__, inode->i_ino, used,
1057 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1067 ei->i_reserved_data_blocks);
1058 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1068 WARN_ON(1);
1059 1069 used = ei->i_reserved_data_blocks;
1060 if (mdb_free) { 1070 }
1061 /* Account for allocated meta_blocks */ 1071
1062 mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks; 1072 /* Update per-inode reservations */
1063 BUG_ON(mdb_free < mdb_claim); 1073 ei->i_reserved_data_blocks -= used;
1064 mdb_free -= mdb_claim; 1074 used += ei->i_allocated_meta_blocks;
1065 1075 ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
1066 /* update fs dirty blocks counter */ 1076 ei->i_allocated_meta_blocks = 0;
1077 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
1078
1079 if (ei->i_reserved_data_blocks == 0) {
1080 /*
1081 * We can release all of the reserved metadata blocks
1082 * only when we have written all of the delayed
1083 * allocation blocks.
1084 */
1085 mdb_free = ei->i_reserved_meta_blocks;
1086 ei->i_reserved_meta_blocks = 0;
1087 ei->i_da_metadata_calc_len = 0;
1067 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); 1088 percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
1068 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1069 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1070 } 1089 }
1071
1072 /* update per-inode reservations */
1073 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1074 EXT4_I(inode)->i_reserved_data_blocks -= used;
1075 percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim);
1076 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1090 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1077 1091
1078 vfs_dq_claim_block(inode, used + mdb_claim); 1092 /* Update quota subsystem */
1079 1093 vfs_dq_claim_block(inode, used);
1080 /*
1081 * free those over-booking quota for metadata blocks
1082 */
1083 if (mdb_free) 1094 if (mdb_free)
1084 vfs_dq_release_reservation_block(inode, mdb_free); 1095 vfs_dq_release_reservation_block(inode, mdb_free);
1085 1096
@@ -1088,7 +1099,8 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
1088 * there aren't any writers on the inode, we can discard the 1099 * there aren't any writers on the inode, we can discard the
1089 * inode's preallocations. 1100 * inode's preallocations.
1090 */ 1101 */
1091 if (!total && (atomic_read(&inode->i_writecount) == 0)) 1102 if ((ei->i_reserved_data_blocks == 0) &&
1103 (atomic_read(&inode->i_writecount) == 0))
1092 ext4_discard_preallocations(inode); 1104 ext4_discard_preallocations(inode);
1093} 1105}
1094 1106
@@ -1797,11 +1809,15 @@ static int ext4_journalled_write_end(struct file *file,
1797 return ret ? ret : copied; 1809 return ret ? ret : copied;
1798} 1810}
1799 1811
1800static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1812/*
1813 * Reserve a single block located at lblock
1814 */
1815static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
1801{ 1816{
1802 int retries = 0; 1817 int retries = 0;
1803 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1818 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1804 unsigned long md_needed, mdblocks, total = 0; 1819 struct ext4_inode_info *ei = EXT4_I(inode);
1820 unsigned long md_needed, md_reserved;
1805 1821
1806 /* 1822 /*
1807 * recalculate the amount of metadata blocks to reserve 1823 * recalculate the amount of metadata blocks to reserve
@@ -1809,35 +1825,43 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1809 * worse case is one extent per block 1825 * worse case is one extent per block
1810 */ 1826 */
1811repeat: 1827repeat:
1812 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1828 spin_lock(&ei->i_block_reservation_lock);
1813 total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; 1829 md_reserved = ei->i_reserved_meta_blocks;
1814 mdblocks = ext4_calc_metadata_amount(inode, total); 1830 md_needed = ext4_calc_metadata_amount(inode, lblock);
1815 BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); 1831 spin_unlock(&ei->i_block_reservation_lock);
1816
1817 md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
1818 total = md_needed + nrblocks;
1819 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1820 1832
1821 /* 1833 /*
1822 * Make quota reservation here to prevent quota overflow 1834 * Make quota reservation here to prevent quota overflow
1823 * later. Real quota accounting is done at pages writeout 1835 * later. Real quota accounting is done at pages writeout
1824 * time. 1836 * time.
1825 */ 1837 */
1826 if (vfs_dq_reserve_block(inode, total)) 1838 if (vfs_dq_reserve_block(inode, md_needed + 1)) {
1839 /*
1840 * We tend to badly over-estimate the amount of
1841 * metadata blocks which are needed, so if we have
1842 * reserved any metadata blocks, try to force out the
1843 * inode and see if we have any better luck.
1844 */
1845 if (md_reserved && retries++ <= 3)
1846 goto retry;
1827 return -EDQUOT; 1847 return -EDQUOT;
1848 }
1828 1849
1829 if (ext4_claim_free_blocks(sbi, total)) { 1850 if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
1830 vfs_dq_release_reservation_block(inode, total); 1851 vfs_dq_release_reservation_block(inode, md_needed + 1);
1831 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1852 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1853 retry:
1854 if (md_reserved)
1855 write_inode_now(inode, (retries == 3));
1832 yield(); 1856 yield();
1833 goto repeat; 1857 goto repeat;
1834 } 1858 }
1835 return -ENOSPC; 1859 return -ENOSPC;
1836 } 1860 }
1837 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1861 spin_lock(&ei->i_block_reservation_lock);
1838 EXT4_I(inode)->i_reserved_data_blocks += nrblocks; 1862 ei->i_reserved_data_blocks++;
1839 EXT4_I(inode)->i_reserved_meta_blocks += md_needed; 1863 ei->i_reserved_meta_blocks += md_needed;
1840 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1864 spin_unlock(&ei->i_block_reservation_lock);
1841 1865
1842 return 0; /* success */ 1866 return 0; /* success */
1843} 1867}
@@ -1845,49 +1869,46 @@ repeat:
1845static void ext4_da_release_space(struct inode *inode, int to_free) 1869static void ext4_da_release_space(struct inode *inode, int to_free)
1846{ 1870{
1847 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1871 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1848 int total, mdb, mdb_free, release; 1872 struct ext4_inode_info *ei = EXT4_I(inode);
1849 1873
1850 if (!to_free) 1874 if (!to_free)
1851 return; /* Nothing to release, exit */ 1875 return; /* Nothing to release, exit */
1852 1876
1853 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1877 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1854 1878
1855 if (!EXT4_I(inode)->i_reserved_data_blocks) { 1879 if (unlikely(to_free > ei->i_reserved_data_blocks)) {
1856 /* 1880 /*
1857 * if there is no reserved blocks, but we try to free some 1881 * if there aren't enough reserved blocks, then the
1858 * then the counter is messed up somewhere. 1882 * counter is messed up somewhere. Since this
1859 * but since this function is called from invalidate 1883 * function is called from invalidate page, it's
1860 * page, it's harmless to return without any action 1884 * harmless to return without any action.
1861 */ 1885 */
1862 printk(KERN_INFO "ext4 delalloc try to release %d reserved " 1886 ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
1863 "blocks for inode %lu, but there is no reserved " 1887 "ino %lu, to_free %d with only %d reserved "
1864 "data blocks\n", to_free, inode->i_ino); 1888 "data blocks\n", inode->i_ino, to_free,
1865 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1889 ei->i_reserved_data_blocks);
1866 return; 1890 WARN_ON(1);
1891 to_free = ei->i_reserved_data_blocks;
1867 } 1892 }
1893 ei->i_reserved_data_blocks -= to_free;
1868 1894
1869 /* recalculate the number of metablocks still need to be reserved */ 1895 if (ei->i_reserved_data_blocks == 0) {
1870 total = EXT4_I(inode)->i_reserved_data_blocks - to_free; 1896 /*
1871 mdb = ext4_calc_metadata_amount(inode, total); 1897 * We can release all of the reserved metadata blocks
1872 1898 * only when we have written all of the delayed
1873 /* figure out how many metablocks to release */ 1899 * allocation blocks.
1874 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1900 */
1875 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1901 to_free += ei->i_reserved_meta_blocks;
1876 1902 ei->i_reserved_meta_blocks = 0;
1877 release = to_free + mdb_free; 1903 ei->i_da_metadata_calc_len = 0;
1878 1904 }
1879 /* update fs dirty blocks counter for truncate case */
1880 percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
1881 1905
1882 /* update per-inode reservations */ 1906 /* update fs dirty blocks counter */
1883 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); 1907 percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
1884 EXT4_I(inode)->i_reserved_data_blocks -= to_free;
1885 1908
1886 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1887 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1888 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1909 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1889 1910
1890 vfs_dq_release_reservation_block(inode, release); 1911 vfs_dq_release_reservation_block(inode, to_free);
1891} 1912}
1892 1913
1893static void ext4_da_page_release_reservation(struct page *page, 1914static void ext4_da_page_release_reservation(struct page *page,
@@ -2493,7 +2514,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
2493 * XXX: __block_prepare_write() unmaps passed block, 2514 * XXX: __block_prepare_write() unmaps passed block,
2494 * is it OK? 2515 * is it OK?
2495 */ 2516 */
2496 ret = ext4_da_reserve_space(inode, 1); 2517 ret = ext4_da_reserve_space(inode, iblock);
2497 if (ret) 2518 if (ret)
2498 /* not enough space to reserve */ 2519 /* not enough space to reserve */
2499 return ret; 2520 return ret;
@@ -2967,8 +2988,7 @@ retry:
2967out_writepages: 2988out_writepages:
2968 if (!no_nrwrite_index_update) 2989 if (!no_nrwrite_index_update)
2969 wbc->no_nrwrite_index_update = 0; 2990 wbc->no_nrwrite_index_update = 0;
2970 if (wbc->nr_to_write > nr_to_writebump) 2991 wbc->nr_to_write -= nr_to_writebump;
2971 wbc->nr_to_write -= nr_to_writebump;
2972 wbc->range_start = range_start; 2992 wbc->range_start = range_start;
2973 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); 2993 trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
2974 return ret; 2994 return ret;
@@ -2993,11 +3013,18 @@ static int ext4_nonda_switch(struct super_block *sb)
2993 if (2 * free_blocks < 3 * dirty_blocks || 3013 if (2 * free_blocks < 3 * dirty_blocks ||
2994 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { 3014 free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
2995 /* 3015 /*
2996 * free block count is less that 150% of dirty blocks 3016 * free block count is less than 150% of dirty blocks
2997 * or free blocks is less that watermark 3017 * or free blocks is less than watermark
2998 */ 3018 */
2999 return 1; 3019 return 1;
3000 } 3020 }
3021 /*
3022 * Even if we don't switch but are nearing capacity,
3023 * start pushing delalloc when 1/2 of free blocks are dirty.
3024 */
3025 if (free_blocks < 2 * dirty_blocks)
3026 writeback_inodes_sb_if_idle(sb);
3027
3001 return 0; 3028 return 0;
3002} 3029}
3003 3030
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 0ca811061bc7..436521cae456 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -17,7 +17,6 @@
17#include <linux/proc_fs.h> 17#include <linux/proc_fs.h>
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/version.h>
21#include <linux/blkdev.h> 20#include <linux/blkdev.h>
22#include <linux/mutex.h> 21#include <linux/mutex.h>
23#include "ext4_jbd2.h" 22#include "ext4_jbd2.h"
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6ed9aa91f27d..735c20d5fd56 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -702,6 +702,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
702 ei->i_reserved_data_blocks = 0; 702 ei->i_reserved_data_blocks = 0;
703 ei->i_reserved_meta_blocks = 0; 703 ei->i_reserved_meta_blocks = 0;
704 ei->i_allocated_meta_blocks = 0; 704 ei->i_allocated_meta_blocks = 0;
705 ei->i_da_metadata_calc_len = 0;
705 ei->i_delalloc_reserved_flag = 0; 706 ei->i_delalloc_reserved_flag = 0;
706 spin_lock_init(&(ei->i_block_reservation_lock)); 707 spin_lock_init(&(ei->i_block_reservation_lock));
707#ifdef CONFIG_QUOTA 708#ifdef CONFIG_QUOTA
@@ -2174,9 +2175,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
2174 struct super_block *sb = sbi->s_buddy_cache->i_sb; 2175 struct super_block *sb = sbi->s_buddy_cache->i_sb;
2175 2176
2176 return snprintf(buf, PAGE_SIZE, "%llu\n", 2177 return snprintf(buf, PAGE_SIZE, "%llu\n",
2177 sbi->s_kbytes_written + 2178 (unsigned long long)(sbi->s_kbytes_written +
2178 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - 2179 ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
2179 EXT4_SB(sb)->s_sectors_written_start) >> 1)); 2180 EXT4_SB(sb)->s_sectors_written_start) >> 1)));
2180} 2181}
2181 2182
2182static ssize_t inode_readahead_blks_store(struct ext4_attr *a, 2183static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
@@ -4005,6 +4006,7 @@ static inline void unregister_as_ext2(void)
4005{ 4006{
4006 unregister_filesystem(&ext2_fs_type); 4007 unregister_filesystem(&ext2_fs_type);
4007} 4008}
4009MODULE_ALIAS("ext2");
4008#else 4010#else
4009static inline void register_as_ext2(void) { } 4011static inline void register_as_ext2(void) { }
4010static inline void unregister_as_ext2(void) { } 4012static inline void unregister_as_ext2(void) { }
@@ -4031,6 +4033,7 @@ static inline void unregister_as_ext3(void)
4031{ 4033{
4032 unregister_filesystem(&ext3_fs_type); 4034 unregister_filesystem(&ext3_fs_type);
4033} 4035}
4036MODULE_ALIAS("ext3");
4034#else 4037#else
4035static inline void register_as_ext3(void) { } 4038static inline void register_as_ext3(void) { }
4036static inline void unregister_as_ext3(void) { } 4039static inline void unregister_as_ext3(void) { }
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 83218bebbc7c..f3a2f7ed45aa 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1332,6 +1332,8 @@ retry:
1332 goto cleanup; 1332 goto cleanup;
1333 kfree(b_entry_name); 1333 kfree(b_entry_name);
1334 kfree(buffer); 1334 kfree(buffer);
1335 b_entry_name = NULL;
1336 buffer = NULL;
1335 brelse(is->iloc.bh); 1337 brelse(is->iloc.bh);
1336 kfree(is); 1338 kfree(is);
1337 kfree(bs); 1339 kfree(bs);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 2cf93ec40a67..97e01dc0d95f 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -618,60 +618,90 @@ static DEFINE_RWLOCK(fasync_lock);
618static struct kmem_cache *fasync_cache __read_mostly; 618static struct kmem_cache *fasync_cache __read_mostly;
619 619
620/* 620/*
621 * fasync_helper() is used by almost all character device drivers 621 * Remove a fasync entry. If successfully removed, return
622 * to set up the fasync queue. It returns negative on error, 0 if it did 622 * positive and clear the FASYNC flag. If no entry exists,
623 * no changes and positive if it added/deleted the entry. 623 * do nothing and return 0.
624 *
625 * NOTE! It is very important that the FASYNC flag always
626 * match the state "is the filp on a fasync list".
627 *
628 * We always take the 'filp->f_lock', in since fasync_lock
629 * needs to be irq-safe.
624 */ 630 */
625int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp) 631static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
626{ 632{
627 struct fasync_struct *fa, **fp; 633 struct fasync_struct *fa, **fp;
628 struct fasync_struct *new = NULL;
629 int result = 0; 634 int result = 0;
630 635
631 if (on) { 636 spin_lock(&filp->f_lock);
632 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); 637 write_lock_irq(&fasync_lock);
633 if (!new) 638 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
634 return -ENOMEM; 639 if (fa->fa_file != filp)
640 continue;
641 *fp = fa->fa_next;
642 kmem_cache_free(fasync_cache, fa);
643 filp->f_flags &= ~FASYNC;
644 result = 1;
645 break;
635 } 646 }
647 write_unlock_irq(&fasync_lock);
648 spin_unlock(&filp->f_lock);
649 return result;
650}
651
652/*
653 * Add a fasync entry. Return negative on error, positive if
654 * added, and zero if did nothing but change an existing one.
655 *
656 * NOTE! It is very important that the FASYNC flag always
657 * match the state "is the filp on a fasync list".
658 */
659static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
660{
661 struct fasync_struct *new, *fa, **fp;
662 int result = 0;
663
664 new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
665 if (!new)
666 return -ENOMEM;
636 667
637 /*
638 * We need to take f_lock first since it's not an IRQ-safe
639 * lock.
640 */
641 spin_lock(&filp->f_lock); 668 spin_lock(&filp->f_lock);
642 write_lock_irq(&fasync_lock); 669 write_lock_irq(&fasync_lock);
643 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) { 670 for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
644 if (fa->fa_file == filp) { 671 if (fa->fa_file != filp)
645 if(on) { 672 continue;
646 fa->fa_fd = fd; 673 fa->fa_fd = fd;
647 kmem_cache_free(fasync_cache, new); 674 kmem_cache_free(fasync_cache, new);
648 } else { 675 goto out;
649 *fp = fa->fa_next;
650 kmem_cache_free(fasync_cache, fa);
651 result = 1;
652 }
653 goto out;
654 }
655 } 676 }
656 677
657 if (on) { 678 new->magic = FASYNC_MAGIC;
658 new->magic = FASYNC_MAGIC; 679 new->fa_file = filp;
659 new->fa_file = filp; 680 new->fa_fd = fd;
660 new->fa_fd = fd; 681 new->fa_next = *fapp;
661 new->fa_next = *fapp; 682 *fapp = new;
662 *fapp = new; 683 result = 1;
663 result = 1; 684 filp->f_flags |= FASYNC;
664 } 685
665out: 686out:
666 if (on)
667 filp->f_flags |= FASYNC;
668 else
669 filp->f_flags &= ~FASYNC;
670 write_unlock_irq(&fasync_lock); 687 write_unlock_irq(&fasync_lock);
671 spin_unlock(&filp->f_lock); 688 spin_unlock(&filp->f_lock);
672 return result; 689 return result;
673} 690}
674 691
692/*
693 * fasync_helper() is used by almost all character device drivers
694 * to set up the fasync queue, and for regular files by the file
695 * lease code. It returns negative on error, 0 if it did no changes
696 * and positive if it added/deleted the entry.
697 */
698int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
699{
700 if (!on)
701 return fasync_remove_entry(filp, fapp);
702 return fasync_add_entry(fd, filp, fapp);
703}
704
675EXPORT_SYMBOL(fasync_helper); 705EXPORT_SYMBOL(fasync_helper);
676 706
677void __kill_fasync(struct fasync_struct *fa, int sig, int band) 707void __kill_fasync(struct fasync_struct *fa, int sig, int band)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 49bc1b8e8f19..1a7c42c64ff4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -242,6 +242,7 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
242/** 242/**
243 * bdi_start_writeback - start writeback 243 * bdi_start_writeback - start writeback
244 * @bdi: the backing device to write from 244 * @bdi: the backing device to write from
245 * @sb: write inodes from this super_block
245 * @nr_pages: the number of pages to write 246 * @nr_pages: the number of pages to write
246 * 247 *
247 * Description: 248 * Description:
@@ -1187,6 +1188,23 @@ void writeback_inodes_sb(struct super_block *sb)
1187EXPORT_SYMBOL(writeback_inodes_sb); 1188EXPORT_SYMBOL(writeback_inodes_sb);
1188 1189
1189/** 1190/**
1191 * writeback_inodes_sb_if_idle - start writeback if none underway
1192 * @sb: the superblock
1193 *
1194 * Invoke writeback_inodes_sb if no writeback is currently underway.
1195 * Returns 1 if writeback was started, 0 if not.
1196 */
1197int writeback_inodes_sb_if_idle(struct super_block *sb)
1198{
1199 if (!writeback_in_progress(sb->s_bdi)) {
1200 writeback_inodes_sb(sb);
1201 return 1;
1202 } else
1203 return 0;
1204}
1205EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1206
1207/**
1190 * sync_inodes_sb - sync sb inode pages 1208 * sync_inodes_sb - sync sb inode pages
1191 * @sb: the superblock 1209 * @sb: the superblock
1192 * 1210 *
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4eb308aa3234..a6abbae8a278 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -569,6 +569,40 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
569 return ret; 569 return ret;
570} 570}
571 571
572/**
573 * gfs2_file_aio_write - Perform a write to a file
574 * @iocb: The io context
575 * @iov: The data to write
576 * @nr_segs: Number of @iov segments
577 * @pos: The file position
578 *
579 * We have to do a lock/unlock here to refresh the inode size for
580 * O_APPEND writes, otherwise we can land up writing at the wrong
581 * offset. There is still a race, but provided the app is using its
582 * own file locking, this will make O_APPEND work as expected.
583 *
584 */
585
586static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
587 unsigned long nr_segs, loff_t pos)
588{
589 struct file *file = iocb->ki_filp;
590
591 if (file->f_flags & O_APPEND) {
592 struct dentry *dentry = file->f_dentry;
593 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
594 struct gfs2_holder gh;
595 int ret;
596
597 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
598 if (ret)
599 return ret;
600 gfs2_glock_dq_uninit(&gh);
601 }
602
603 return generic_file_aio_write(iocb, iov, nr_segs, pos);
604}
605
572#ifdef CONFIG_GFS2_FS_LOCKING_DLM 606#ifdef CONFIG_GFS2_FS_LOCKING_DLM
573 607
574/** 608/**
@@ -711,7 +745,7 @@ const struct file_operations gfs2_file_fops = {
711 .read = do_sync_read, 745 .read = do_sync_read,
712 .aio_read = generic_file_aio_read, 746 .aio_read = generic_file_aio_read,
713 .write = do_sync_write, 747 .write = do_sync_write,
714 .aio_write = generic_file_aio_write, 748 .aio_write = gfs2_file_aio_write,
715 .unlocked_ioctl = gfs2_ioctl, 749 .unlocked_ioctl = gfs2_ioctl,
716 .mmap = gfs2_mmap, 750 .mmap = gfs2_mmap,
717 .open = gfs2_open, 751 .open = gfs2_open,
@@ -741,7 +775,7 @@ const struct file_operations gfs2_file_fops_nolock = {
741 .read = do_sync_read, 775 .read = do_sync_read,
742 .aio_read = generic_file_aio_read, 776 .aio_read = generic_file_aio_read,
743 .write = do_sync_write, 777 .write = do_sync_write,
744 .aio_write = generic_file_aio_write, 778 .aio_write = gfs2_file_aio_write,
745 .unlocked_ioctl = gfs2_ioctl, 779 .unlocked_ioctl = gfs2_ioctl,
746 .mmap = gfs2_mmap, 780 .mmap = gfs2_mmap,
747 .open = gfs2_open, 781 .open = gfs2_open,
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index cb8d7a93d5ec..6f68a5f18eb8 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -121,7 +121,7 @@ struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
121 if (aspace) { 121 if (aspace) {
122 mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS); 122 mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS);
123 aspace->i_mapping->a_ops = &aspace_aops; 123 aspace->i_mapping->a_ops = &aspace_aops;
124 aspace->i_size = ~0ULL; 124 aspace->i_size = MAX_LFS_FILESIZE;
125 ip = GFS2_I(aspace); 125 ip = GFS2_I(aspace);
126 clear_bit(GIF_USER, &ip->i_flags); 126 clear_bit(GIF_USER, &ip->i_flags);
127 insert_inode_hash(aspace); 127 insert_inode_hash(aspace);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 247436c10deb..78f73ca1ef3e 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -748,7 +748,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
748 struct gfs2_rgrpd *nrgd; 748 struct gfs2_rgrpd *nrgd;
749 unsigned int num_gh; 749 unsigned int num_gh;
750 int dir_rename = 0; 750 int dir_rename = 0;
751 int alloc_required; 751 int alloc_required = 0;
752 unsigned int x; 752 unsigned int x;
753 int error; 753 int error;
754 754
@@ -867,7 +867,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
867 goto out_gunlock; 867 goto out_gunlock;
868 } 868 }
869 869
870 alloc_required = error = gfs2_diradd_alloc_required(ndir, &ndentry->d_name); 870 if (nip == NULL)
871 alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
872 error = alloc_required;
871 if (error < 0) 873 if (error < 0)
872 goto out_gunlock; 874 goto out_gunlock;
873 error = 0; 875 error = 0;
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 8a04108e0c22..c2ebdf2c01d4 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1296,6 +1296,7 @@ fail:
1296 1296
1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) 1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1298{ 1298{
1299 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1299 struct gfs2_ea_location el; 1300 struct gfs2_ea_location el;
1300 struct buffer_head *dibh; 1301 struct buffer_head *dibh;
1301 int error; 1302 int error;
@@ -1305,16 +1306,17 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1305 return error; 1306 return error;
1306 1307
1307 if (GFS2_EA_IS_STUFFED(el.el_ea)) { 1308 if (GFS2_EA_IS_STUFFED(el.el_ea)) {
1308 error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0); 1309 error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0);
1309 if (error) 1310 if (error == 0) {
1310 return error; 1311 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1);
1311 1312 memcpy(GFS2_EA2DATA(el.el_ea), data,
1312 gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1); 1313 GFS2_EA_DATA_LEN(el.el_ea));
1313 memcpy(GFS2_EA2DATA(el.el_ea), data, 1314 }
1314 GFS2_EA_DATA_LEN(el.el_ea)); 1315 } else {
1315 } else
1316 error = ea_acl_chmod_unstuffed(ip, el.el_ea, data); 1316 error = ea_acl_chmod_unstuffed(ip, el.el_ea, data);
1317 }
1317 1318
1319 brelse(el.el_bh);
1318 if (error) 1320 if (error)
1319 return error; 1321 return error;
1320 1322
@@ -1327,8 +1329,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1327 brelse(dibh); 1329 brelse(dibh);
1328 } 1330 }
1329 1331
1330 gfs2_trans_end(GFS2_SB(&ip->i_inode)); 1332 gfs2_trans_end(sdp);
1331
1332 return error; 1333 return error;
1333} 1334}
1334 1335
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index a5089a6dd67a..7239efc690d8 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -646,22 +646,27 @@ static const struct super_operations hppfs_sbops = {
646static int hppfs_readlink(struct dentry *dentry, char __user *buffer, 646static int hppfs_readlink(struct dentry *dentry, char __user *buffer,
647 int buflen) 647 int buflen)
648{ 648{
649 struct dentry *proc_dentry; 649 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
650
651 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
652 return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, 650 return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer,
653 buflen); 651 buflen);
654} 652}
655 653
656static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) 654static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
657{ 655{
658 struct dentry *proc_dentry; 656 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
659
660 proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
661 657
662 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); 658 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
663} 659}
664 660
661static void hppfs_put_link(struct dentry *dentry, struct nameidata *nd,
662 void *cookie)
663{
664 struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
665
666 if (proc_dentry->d_inode->i_op->put_link)
667 proc_dentry->d_inode->i_op->put_link(proc_dentry, nd, cookie);
668}
669
665static const struct inode_operations hppfs_dir_iops = { 670static const struct inode_operations hppfs_dir_iops = {
666 .lookup = hppfs_lookup, 671 .lookup = hppfs_lookup,
667}; 672};
@@ -669,6 +674,7 @@ static const struct inode_operations hppfs_dir_iops = {
669static const struct inode_operations hppfs_link_iops = { 674static const struct inode_operations hppfs_link_iops = {
670 .readlink = hppfs_readlink, 675 .readlink = hppfs_readlink,
671 .follow_link = hppfs_follow_link, 676 .follow_link = hppfs_follow_link,
677 .put_link = hppfs_put_link,
672}; 678};
673 679
674static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) 680static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index ca0f5eb62b20..886849370950 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -22,6 +22,7 @@
22#include <linux/jbd2.h> 22#include <linux/jbd2.h>
23#include <linux/errno.h> 23#include <linux/errno.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/blkdev.h>
25#include <trace/events/jbd2.h> 26#include <trace/events/jbd2.h>
26 27
27/* 28/*
@@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
515 journal->j_tail_sequence = first_tid; 516 journal->j_tail_sequence = first_tid;
516 journal->j_tail = blocknr; 517 journal->j_tail = blocknr;
517 spin_unlock(&journal->j_state_lock); 518 spin_unlock(&journal->j_state_lock);
519
520 /*
521 * If there is an external journal, we need to make sure that
522 * any data blocks that were recently written out --- perhaps
523 * by jbd2_log_do_checkpoint() --- are flushed out before we
524 * drop the transactions from the external journal. It's
525 * unlikely this will be necessary, especially with a
526 * appropriately sized journal, but we need this to guarantee
527 * correctness. Fortunately jbd2_cleanup_journal_tail()
528 * doesn't get called all that often.
529 */
530 if ((journal->j_fs_dev != journal->j_dev) &&
531 (journal->j_flags & JBD2_BARRIER))
532 blkdev_issue_flush(journal->j_fs_dev, NULL);
518 if (!(journal->j_flags & JBD2_ABORT)) 533 if (!(journal->j_flags & JBD2_ABORT))
519 jbd2_journal_update_superblock(journal, 1); 534 jbd2_journal_update_superblock(journal, 1);
520 return 0; 535 return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6a10238d2c63..1bc74b6f26d2 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal,
259 ret = err; 259 ret = err;
260 spin_lock(&journal->j_list_lock); 260 spin_lock(&journal->j_list_lock);
261 J_ASSERT(jinode->i_transaction == commit_transaction); 261 J_ASSERT(jinode->i_transaction == commit_transaction);
262 commit_transaction->t_flushed_data_blocks = 1;
262 jinode->i_flags &= ~JI_COMMIT_RUNNING; 263 jinode->i_flags &= ~JI_COMMIT_RUNNING;
263 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 264 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
264 } 265 }
@@ -708,8 +709,17 @@ start_journal_io:
708 } 709 }
709 } 710 }
710 711
711 /* Done it all: now write the commit record asynchronously. */ 712 /*
713 * If the journal is not located on the file system device,
714 * then we must flush the file system device before we issue
715 * the commit record
716 */
717 if (commit_transaction->t_flushed_data_blocks &&
718 (journal->j_fs_dev != journal->j_dev) &&
719 (journal->j_flags & JBD2_BARRIER))
720 blkdev_issue_flush(journal->j_fs_dev, NULL);
712 721
722 /* Done it all: now write the commit record asynchronously. */
713 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 723 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
714 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { 724 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
715 err = journal_submit_commit_record(journal, commit_transaction, 725 err = journal_submit_commit_record(journal, commit_transaction,
@@ -720,13 +730,6 @@ start_journal_io:
720 blkdev_issue_flush(journal->j_dev, NULL); 730 blkdev_issue_flush(journal->j_dev, NULL);
721 } 731 }
722 732
723 /*
724 * This is the right place to wait for data buffers both for ASYNC
725 * and !ASYNC commit. If commit is ASYNC, we need to wait only after
726 * the commit block went to disk (which happens above). If commit is
727 * SYNC, we need to wait for data buffers before we start writing
728 * commit block, which happens below in such setting.
729 */
730 err = journal_finish_inode_data_buffers(journal, commit_transaction); 733 err = journal_finish_inode_data_buffers(journal, commit_transaction);
731 if (err) { 734 if (err) {
732 printk(KERN_WARNING 735 printk(KERN_WARNING
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 17af879e6e9e..ac0d027595d0 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -814,7 +814,7 @@ static journal_t * journal_init_common (void)
814 journal_t *journal; 814 journal_t *journal;
815 int err; 815 int err;
816 816
817 journal = kzalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL); 817 journal = kzalloc(sizeof(*journal), GFP_KERNEL);
818 if (!journal) 818 if (!journal)
819 goto fail; 819 goto fail;
820 820
diff --git a/fs/namei.c b/fs/namei.c
index 68921d9b5302..94a5e60779f9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -232,6 +232,7 @@ int generic_permission(struct inode *inode, int mask,
232 /* 232 /*
233 * Searching includes executable on directories, else just read. 233 * Searching includes executable on directories, else just read.
234 */ 234 */
235 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
235 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 236 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
236 if (capable(CAP_DAC_READ_SEARCH)) 237 if (capable(CAP_DAC_READ_SEARCH))
237 return 0; 238 return 0;
@@ -560,6 +561,7 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
560 dget(dentry); 561 dget(dentry);
561 } 562 }
562 mntget(path->mnt); 563 mntget(path->mnt);
564 nd->last_type = LAST_BIND;
563 cookie = dentry->d_inode->i_op->follow_link(dentry, nd); 565 cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
564 error = PTR_ERR(cookie); 566 error = PTR_ERR(cookie);
565 if (!IS_ERR(cookie)) { 567 if (!IS_ERR(cookie)) {
@@ -1602,11 +1604,12 @@ struct file *do_filp_open(int dfd, const char *pathname,
1602 struct file *filp; 1604 struct file *filp;
1603 struct nameidata nd; 1605 struct nameidata nd;
1604 int error; 1606 int error;
1605 struct path path, save; 1607 struct path path;
1606 struct dentry *dir; 1608 struct dentry *dir;
1607 int count = 0; 1609 int count = 0;
1608 int will_truncate; 1610 int will_truncate;
1609 int flag = open_to_namei_flags(open_flag); 1611 int flag = open_to_namei_flags(open_flag);
1612 int force_reval = 0;
1610 1613
1611 /* 1614 /*
1612 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 1615 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
@@ -1618,7 +1621,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1618 open_flag |= O_DSYNC; 1621 open_flag |= O_DSYNC;
1619 1622
1620 if (!acc_mode) 1623 if (!acc_mode)
1621 acc_mode = MAY_OPEN | ACC_MODE(flag); 1624 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
1622 1625
1623 /* O_TRUNC implies we need access checks for write permissions */ 1626 /* O_TRUNC implies we need access checks for write permissions */
1624 if (flag & O_TRUNC) 1627 if (flag & O_TRUNC)
@@ -1658,9 +1661,12 @@ struct file *do_filp_open(int dfd, const char *pathname,
1658 /* 1661 /*
1659 * Create - we need to know the parent. 1662 * Create - we need to know the parent.
1660 */ 1663 */
1664reval:
1661 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 1665 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
1662 if (error) 1666 if (error)
1663 return ERR_PTR(error); 1667 return ERR_PTR(error);
1668 if (force_reval)
1669 nd.flags |= LOOKUP_REVAL;
1664 error = path_walk(pathname, &nd); 1670 error = path_walk(pathname, &nd);
1665 if (error) { 1671 if (error) {
1666 if (nd.root.mnt) 1672 if (nd.root.mnt)
@@ -1852,17 +1858,7 @@ do_link:
1852 error = security_inode_follow_link(path.dentry, &nd); 1858 error = security_inode_follow_link(path.dentry, &nd);
1853 if (error) 1859 if (error)
1854 goto exit_dput; 1860 goto exit_dput;
1855 save = nd.path;
1856 path_get(&save);
1857 error = __do_follow_link(&path, &nd); 1861 error = __do_follow_link(&path, &nd);
1858 if (error == -ESTALE) {
1859 /* nd.path had been dropped */
1860 nd.path = save;
1861 path_get(&nd.path);
1862 nd.flags |= LOOKUP_REVAL;
1863 error = __do_follow_link(&path, &nd);
1864 }
1865 path_put(&save);
1866 path_put(&path); 1862 path_put(&path);
1867 if (error) { 1863 if (error) {
1868 /* Does someone understand code flow here? Or it is only 1864 /* Does someone understand code flow here? Or it is only
@@ -1872,6 +1868,10 @@ do_link:
1872 release_open_intent(&nd); 1868 release_open_intent(&nd);
1873 if (nd.root.mnt) 1869 if (nd.root.mnt)
1874 path_put(&nd.root); 1870 path_put(&nd.root);
1871 if (error == -ESTALE && !force_reval) {
1872 force_reval = 1;
1873 goto reval;
1874 }
1875 return ERR_PTR(error); 1875 return ERR_PTR(error);
1876 } 1876 }
1877 nd.flags &= ~LOOKUP_PARENT; 1877 nd.flags &= ~LOOKUP_PARENT;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7d70d63ceb29..c768f733c8d6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -965,10 +965,12 @@ EXPORT_SYMBOL(may_umount_tree);
965int may_umount(struct vfsmount *mnt) 965int may_umount(struct vfsmount *mnt)
966{ 966{
967 int ret = 1; 967 int ret = 1;
968 down_read(&namespace_sem);
968 spin_lock(&vfsmount_lock); 969 spin_lock(&vfsmount_lock);
969 if (propagate_mount_busy(mnt, 2)) 970 if (propagate_mount_busy(mnt, 2))
970 ret = 0; 971 ret = 0;
971 spin_unlock(&vfsmount_lock); 972 spin_unlock(&vfsmount_lock);
973 up_read(&namespace_sem);
972 return ret; 974 return ret;
973} 975}
974 976
@@ -1352,12 +1354,12 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1352 if (err) 1354 if (err)
1353 goto out_cleanup_ids; 1355 goto out_cleanup_ids;
1354 1356
1357 spin_lock(&vfsmount_lock);
1358
1355 if (IS_MNT_SHARED(dest_mnt)) { 1359 if (IS_MNT_SHARED(dest_mnt)) {
1356 for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 1360 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1357 set_mnt_shared(p); 1361 set_mnt_shared(p);
1358 } 1362 }
1359
1360 spin_lock(&vfsmount_lock);
1361 if (parent_path) { 1363 if (parent_path) {
1362 detach_mnt(source_mnt, parent_path); 1364 detach_mnt(source_mnt, parent_path);
1363 attach_mnt(source_mnt, path); 1365 attach_mnt(source_mnt, path);
@@ -1534,8 +1536,12 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1534 err = change_mount_flags(path->mnt, flags); 1536 err = change_mount_flags(path->mnt, flags);
1535 else 1537 else
1536 err = do_remount_sb(sb, flags, data, 0); 1538 err = do_remount_sb(sb, flags, data, 0);
1537 if (!err) 1539 if (!err) {
1540 spin_lock(&vfsmount_lock);
1541 mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK;
1538 path->mnt->mnt_flags = mnt_flags; 1542 path->mnt->mnt_flags = mnt_flags;
1543 spin_unlock(&vfsmount_lock);
1544 }
1539 up_write(&sb->s_umount); 1545 up_write(&sb->s_umount);
1540 if (!err) { 1546 if (!err) {
1541 security_sb_post_remount(path->mnt, flags, data); 1547 security_sb_post_remount(path->mnt, flags, data);
@@ -1665,6 +1671,8 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
1665{ 1671{
1666 int err; 1672 int err;
1667 1673
1674 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD);
1675
1668 down_write(&namespace_sem); 1676 down_write(&namespace_sem);
1669 /* Something was mounted here while we slept */ 1677 /* Something was mounted here while we slept */
1670 while (d_mountpoint(path->dentry) && 1678 while (d_mountpoint(path->dentry) &&
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2c5ace4f00a7..3c7f03b669fb 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1615,6 +1615,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1615 goto out; 1615 goto out;
1616 1616
1617 new_dentry = dentry; 1617 new_dentry = dentry;
1618 rehash = NULL;
1618 new_inode = NULL; 1619 new_inode = NULL;
1619 } 1620 }
1620 } 1621 }
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7c2e337d05af..c194793b642b 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -780,12 +780,9 @@ static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
780 int (*fsync) (struct file *, struct dentry *, int); 780 int (*fsync) (struct file *, struct dentry *, int);
781 int err; 781 int err;
782 782
783 err = filemap_fdatawrite(inode->i_mapping); 783 err = filemap_write_and_wait(inode->i_mapping);
784 if (err == 0 && fop && (fsync = fop->fsync)) 784 if (err == 0 && fop && (fsync = fop->fsync))
785 err = fsync(filp, dp, 0); 785 err = fsync(filp, dp, 0);
786 if (err == 0)
787 err = filemap_fdatawait(inode->i_mapping);
788
789 return err; 786 return err;
790} 787}
791 788
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index f4a14ea2ed9c..effdbdbe6c11 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -417,8 +417,8 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
417 417
418 key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT - 418 key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT -
419 bmap->b_inode->i_blkbits); 419 bmap->b_inode->i_blkbits);
420 for (pbh = page_buffers(bh->b_page); pbh != bh; 420 for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page)
421 pbh = pbh->b_this_page, key++); 421 key++;
422 422
423 return key; 423 return key;
424} 424}
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index d5ad54e204a5..18737818db63 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -328,19 +328,24 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
328 tnicps += nicps; 328 tnicps += nicps;
329 nilfs_mdt_mark_buffer_dirty(cp_bh); 329 nilfs_mdt_mark_buffer_dirty(cp_bh);
330 nilfs_mdt_mark_dirty(cpfile); 330 nilfs_mdt_mark_dirty(cpfile);
331 if (!nilfs_cpfile_is_in_first(cpfile, cno) && 331 if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
332 (count = nilfs_cpfile_block_sub_valid_checkpoints( 332 count =
333 cpfile, cp_bh, kaddr, nicps)) == 0) { 333 nilfs_cpfile_block_sub_valid_checkpoints(
334 /* make hole */ 334 cpfile, cp_bh, kaddr, nicps);
335 kunmap_atomic(kaddr, KM_USER0); 335 if (count == 0) {
336 brelse(cp_bh); 336 /* make hole */
337 ret = nilfs_cpfile_delete_checkpoint_block( 337 kunmap_atomic(kaddr, KM_USER0);
338 cpfile, cno); 338 brelse(cp_bh);
339 if (ret == 0) 339 ret =
340 continue; 340 nilfs_cpfile_delete_checkpoint_block(
341 printk(KERN_ERR "%s: cannot delete block\n", 341 cpfile, cno);
342 __func__); 342 if (ret == 0)
343 break; 343 continue;
344 printk(KERN_ERR
345 "%s: cannot delete block\n",
346 __func__);
347 break;
348 }
344 } 349 }
345 } 350 }
346 351
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index d369ac718277..236753df5cdf 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -51,11 +51,11 @@ static int nilfs_direct_lookup(const struct nilfs_bmap *bmap,
51 struct nilfs_direct *direct; 51 struct nilfs_direct *direct;
52 __u64 ptr; 52 __u64 ptr;
53 53
54 direct = (struct nilfs_direct *)bmap; 54 direct = (struct nilfs_direct *)bmap; /* XXX: use macro for level 1 */
55 if ((key > NILFS_DIRECT_KEY_MAX) || 55 if (key > NILFS_DIRECT_KEY_MAX || level != 1)
56 (level != 1) || /* XXX: use macro for level 1 */ 56 return -ENOENT;
57 ((ptr = nilfs_direct_get_ptr(direct, key)) == 57 ptr = nilfs_direct_get_ptr(direct, key);
58 NILFS_BMAP_INVALID_PTR)) 58 if (ptr == NILFS_BMAP_INVALID_PTR)
59 return -ENOENT; 59 return -ENOENT;
60 60
61 if (ptrp != NULL) 61 if (ptrp != NULL)
@@ -73,9 +73,10 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap,
73 sector_t blocknr; 73 sector_t blocknr;
74 int ret, cnt; 74 int ret, cnt;
75 75
76 if (key > NILFS_DIRECT_KEY_MAX || 76 if (key > NILFS_DIRECT_KEY_MAX)
77 (ptr = nilfs_direct_get_ptr(direct, key)) == 77 return -ENOENT;
78 NILFS_BMAP_INVALID_PTR) 78 ptr = nilfs_direct_get_ptr(direct, key);
79 if (ptr == NILFS_BMAP_INVALID_PTR)
79 return -ENOENT; 80 return -ENOENT;
80 81
81 if (NILFS_BMAP_USE_VBN(bmap)) { 82 if (NILFS_BMAP_USE_VBN(bmap)) {
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index f6af76042d80..d6b2b83de363 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -480,7 +480,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
480 unsigned int cmd, void __user *argp) 480 unsigned int cmd, void __user *argp)
481{ 481{
482 struct nilfs_argv argv[5]; 482 struct nilfs_argv argv[5];
483 const static size_t argsz[5] = { 483 static const size_t argsz[5] = {
484 sizeof(struct nilfs_vdesc), 484 sizeof(struct nilfs_vdesc),
485 sizeof(struct nilfs_period), 485 sizeof(struct nilfs_period),
486 sizeof(__u64), 486 sizeof(__u64),
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index c9ee67b442e1..1afb0a10229f 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -121,7 +121,7 @@ static int idr_callback(int id, void *p, void *data)
121 if (warned) 121 if (warned)
122 return 0; 122 return 0;
123 123
124 warned = false; 124 warned = true;
125 entry = p; 125 entry = p;
126 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 126 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
127 127
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 8271cf05c957..a94e8bd8eb1f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -552,7 +552,7 @@ retry:
552 552
553 spin_lock(&group->inotify_data.idr_lock); 553 spin_lock(&group->inotify_data.idr_lock);
554 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, 554 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
555 group->inotify_data.last_wd, 555 group->inotify_data.last_wd+1,
556 &tmp_ientry->wd); 556 &tmp_ientry->wd);
557 spin_unlock(&group->inotify_data.idr_lock); 557 spin_unlock(&group->inotify_data.idr_lock);
558 if (ret) { 558 if (ret) {
@@ -632,7 +632,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
632 632
633 spin_lock_init(&group->inotify_data.idr_lock); 633 spin_lock_init(&group->inotify_data.idr_lock);
634 idr_init(&group->inotify_data.idr); 634 idr_init(&group->inotify_data.idr);
635 group->inotify_data.last_wd = 1; 635 group->inotify_data.last_wd = 0;
636 group->inotify_data.user = user; 636 group->inotify_data.user = user;
637 group->inotify_data.fa = NULL; 637 group->inotify_data.fa = NULL;
638 638
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 3d30a1c974a8..06ccf6a86d35 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1772,7 +1772,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1772 loff_t *ppos, 1772 loff_t *ppos,
1773 size_t count, 1773 size_t count,
1774 int appending, 1774 int appending,
1775 int *direct_io) 1775 int *direct_io,
1776 int *has_refcount)
1776{ 1777{
1777 int ret = 0, meta_level = 0; 1778 int ret = 0, meta_level = 0;
1778 struct inode *inode = dentry->d_inode; 1779 struct inode *inode = dentry->d_inode;
@@ -1833,6 +1834,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1833 saved_pos, 1834 saved_pos,
1834 count, 1835 count,
1835 &meta_level); 1836 &meta_level);
1837 if (has_refcount)
1838 *has_refcount = 1;
1836 } 1839 }
1837 1840
1838 if (ret < 0) { 1841 if (ret < 0) {
@@ -1856,6 +1859,10 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1856 break; 1859 break;
1857 } 1860 }
1858 1861
1862 if (has_refcount && *has_refcount == 1) {
1863 *direct_io = 0;
1864 break;
1865 }
1859 /* 1866 /*
1860 * Allowing concurrent direct writes means 1867 * Allowing concurrent direct writes means
1861 * i_size changes wouldn't be synchronized, so 1868 * i_size changes wouldn't be synchronized, so
@@ -1899,7 +1906,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1899 loff_t pos) 1906 loff_t pos)
1900{ 1907{
1901 int ret, direct_io, appending, rw_level, have_alloc_sem = 0; 1908 int ret, direct_io, appending, rw_level, have_alloc_sem = 0;
1902 int can_do_direct; 1909 int can_do_direct, has_refcount = 0;
1903 ssize_t written = 0; 1910 ssize_t written = 0;
1904 size_t ocount; /* original count */ 1911 size_t ocount; /* original count */
1905 size_t count; /* after file limit checks */ 1912 size_t count; /* after file limit checks */
@@ -1942,7 +1949,7 @@ relock:
1942 can_do_direct = direct_io; 1949 can_do_direct = direct_io;
1943 ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos, 1950 ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos,
1944 iocb->ki_left, appending, 1951 iocb->ki_left, appending,
1945 &can_do_direct); 1952 &can_do_direct, &has_refcount);
1946 if (ret < 0) { 1953 if (ret < 0) {
1947 mlog_errno(ret); 1954 mlog_errno(ret);
1948 goto out; 1955 goto out;
@@ -2006,14 +2013,16 @@ out_dio:
2006 /* buffered aio wouldn't have proper lock coverage today */ 2013 /* buffered aio wouldn't have proper lock coverage today */
2007 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 2014 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
2008 2015
2009 if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode)) { 2016 if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode) ||
2017 (file->f_flags & O_DIRECT && has_refcount)) {
2010 ret = filemap_fdatawrite_range(file->f_mapping, pos, 2018 ret = filemap_fdatawrite_range(file->f_mapping, pos,
2011 pos + count - 1); 2019 pos + count - 1);
2012 if (ret < 0) 2020 if (ret < 0)
2013 written = ret; 2021 written = ret;
2014 2022
2015 if (!ret && (old_size != i_size_read(inode) || 2023 if (!ret && (old_size != i_size_read(inode) ||
2016 old_clusters != OCFS2_I(inode)->ip_clusters)) { 2024 old_clusters != OCFS2_I(inode)->ip_clusters ||
2025 has_refcount)) {
2017 ret = jbd2_journal_force_commit(osb->journal->j_journal); 2026 ret = jbd2_journal_force_commit(osb->journal->j_journal);
2018 if (ret < 0) 2027 if (ret < 0)
2019 written = ret; 2028 written = ret;
@@ -2062,7 +2071,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
2062 int ret; 2071 int ret;
2063 2072
2064 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, 2073 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
2065 sd->total_len, 0, NULL); 2074 sd->total_len, 0, NULL, NULL);
2066 if (ret < 0) { 2075 if (ret < 0) {
2067 mlog_errno(ret); 2076 mlog_errno(ret);
2068 return ret; 2077 return ret;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index f560325c444f..13b5d0708175 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -327,94 +327,6 @@ static inline void task_context_switch_counts(struct seq_file *m,
327 p->nivcsw); 327 p->nivcsw);
328} 328}
329 329
330#ifdef CONFIG_MMU
331
332struct stack_stats {
333 struct vm_area_struct *vma;
334 unsigned long startpage;
335 unsigned long usage;
336};
337
338static int stack_usage_pte_range(pmd_t *pmd, unsigned long addr,
339 unsigned long end, struct mm_walk *walk)
340{
341 struct stack_stats *ss = walk->private;
342 struct vm_area_struct *vma = ss->vma;
343 pte_t *pte, ptent;
344 spinlock_t *ptl;
345 int ret = 0;
346
347 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
348 for (; addr != end; pte++, addr += PAGE_SIZE) {
349 ptent = *pte;
350
351#ifdef CONFIG_STACK_GROWSUP
352 if (pte_present(ptent) || is_swap_pte(ptent))
353 ss->usage = addr - ss->startpage + PAGE_SIZE;
354#else
355 if (pte_present(ptent) || is_swap_pte(ptent)) {
356 ss->usage = ss->startpage - addr + PAGE_SIZE;
357 pte++;
358 ret = 1;
359 break;
360 }
361#endif
362 }
363 pte_unmap_unlock(pte - 1, ptl);
364 cond_resched();
365 return ret;
366}
367
368static inline unsigned long get_stack_usage_in_bytes(struct vm_area_struct *vma,
369 struct task_struct *task)
370{
371 struct stack_stats ss;
372 struct mm_walk stack_walk = {
373 .pmd_entry = stack_usage_pte_range,
374 .mm = vma->vm_mm,
375 .private = &ss,
376 };
377
378 if (!vma->vm_mm || is_vm_hugetlb_page(vma))
379 return 0;
380
381 ss.vma = vma;
382 ss.startpage = task->stack_start & PAGE_MASK;
383 ss.usage = 0;
384
385#ifdef CONFIG_STACK_GROWSUP
386 walk_page_range(KSTK_ESP(task) & PAGE_MASK, vma->vm_end,
387 &stack_walk);
388#else
389 walk_page_range(vma->vm_start, (KSTK_ESP(task) & PAGE_MASK) + PAGE_SIZE,
390 &stack_walk);
391#endif
392 return ss.usage;
393}
394
395static inline void task_show_stack_usage(struct seq_file *m,
396 struct task_struct *task)
397{
398 struct vm_area_struct *vma;
399 struct mm_struct *mm = get_task_mm(task);
400
401 if (mm) {
402 down_read(&mm->mmap_sem);
403 vma = find_vma(mm, task->stack_start);
404 if (vma)
405 seq_printf(m, "Stack usage:\t%lu kB\n",
406 get_stack_usage_in_bytes(vma, task) >> 10);
407
408 up_read(&mm->mmap_sem);
409 mmput(mm);
410 }
411}
412#else
413static void task_show_stack_usage(struct seq_file *m, struct task_struct *task)
414{
415}
416#endif /* CONFIG_MMU */
417
418static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) 330static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
419{ 331{
420 seq_printf(m, "Cpus_allowed:\t"); 332 seq_printf(m, "Cpus_allowed:\t");
@@ -445,7 +357,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
445 task_show_regs(m, task); 357 task_show_regs(m, task);
446#endif 358#endif
447 task_context_switch_counts(m, task); 359 task_context_switch_counts(m, task);
448 task_show_stack_usage(m, task);
449 return 0; 360 return 0;
450} 361}
451 362
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 18d5cc62d8ed..e42bbd843ed1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1419,7 +1419,6 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1419 goto out; 1419 goto out;
1420 1420
1421 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); 1421 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
1422 nd->last_type = LAST_BIND;
1423out: 1422out:
1424 return ERR_PTR(error); 1423 return ERR_PTR(error);
1425} 1424}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 47c03f4336b8..f277c4a111cb 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -361,12 +361,11 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
361 if (!pte_present(ptent)) 361 if (!pte_present(ptent))
362 continue; 362 continue;
363 363
364 mss->resident += PAGE_SIZE;
365
366 page = vm_normal_page(vma, addr, ptent); 364 page = vm_normal_page(vma, addr, ptent);
367 if (!page) 365 if (!page)
368 continue; 366 continue;
369 367
368 mss->resident += PAGE_SIZE;
370 /* Accumulate the size in pages that have been accessed. */ 369 /* Accumulate the size in pages that have been accessed. */
371 if (pte_young(ptent) || PageReferenced(page)) 370 if (pte_young(ptent) || PageReferenced(page))
372 mss->referenced += PAGE_SIZE; 371 mss->referenced += PAGE_SIZE;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index dea86abdf2e7..3fc62b097bed 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1377,6 +1377,9 @@ static void inode_sub_rsv_space(struct inode *inode, qsize_t number)
1377static qsize_t inode_get_rsv_space(struct inode *inode) 1377static qsize_t inode_get_rsv_space(struct inode *inode)
1378{ 1378{
1379 qsize_t ret; 1379 qsize_t ret;
1380
1381 if (!inode->i_sb->dq_op->get_reserved_space)
1382 return 0;
1380 spin_lock(&inode->i_lock); 1383 spin_lock(&inode->i_lock);
1381 ret = *inode_reserved_space(inode); 1384 ret = *inode_reserved_space(inode);
1382 spin_unlock(&inode->i_lock); 1385 spin_unlock(&inode->i_lock);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 2efc57173fd7..1739a4aba25f 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -123,30 +123,6 @@ add_error:
123 123
124/*****************************************************************************/ 124/*****************************************************************************/
125/* 125/*
126 * check that file shrinkage doesn't leave any VMAs dangling in midair
127 */
128static int ramfs_nommu_check_mappings(struct inode *inode,
129 size_t newsize, size_t size)
130{
131 struct vm_area_struct *vma;
132 struct prio_tree_iter iter;
133
134 /* search for VMAs that fall within the dead zone */
135 vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
136 newsize >> PAGE_SHIFT,
137 (size + PAGE_SIZE - 1) >> PAGE_SHIFT
138 ) {
139 /* found one - only interested if it's shared out of the page
140 * cache */
141 if (vma->vm_flags & VM_SHARED)
142 return -ETXTBSY; /* not quite true, but near enough */
143 }
144
145 return 0;
146}
147
148/*****************************************************************************/
149/*
150 * 126 *
151 */ 127 */
152static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) 128static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
@@ -164,7 +140,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
164 140
165 /* check that a decrease in size doesn't cut off any shared mappings */ 141 /* check that a decrease in size doesn't cut off any shared mappings */
166 if (newsize < size) { 142 if (newsize < size) {
167 ret = ramfs_nommu_check_mappings(inode, newsize, size); 143 ret = nommu_shrink_inode_mappings(inode, size, newsize);
168 if (ret < 0) 144 if (ret < 0)
169 return ret; 145 return ret;
170 } 146 }
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 685495707181..65c872761177 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1277,7 +1277,10 @@ int reiserfs_init_bitmap_cache(struct super_block *sb)
1277 struct reiserfs_bitmap_info *bitmap; 1277 struct reiserfs_bitmap_info *bitmap;
1278 unsigned int bmap_nr = reiserfs_bmap_count(sb); 1278 unsigned int bmap_nr = reiserfs_bmap_count(sb);
1279 1279
1280 /* Avoid lock recursion in fault case */
1281 reiserfs_write_unlock(sb);
1280 bitmap = vmalloc(sizeof(*bitmap) * bmap_nr); 1282 bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
1283 reiserfs_write_lock(sb);
1281 if (bitmap == NULL) 1284 if (bitmap == NULL)
1282 return -ENOMEM; 1285 return -ENOMEM;
1283 1286
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 290ae38fca8a..9087b10209e6 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -31,11 +31,12 @@ void reiserfs_delete_inode(struct inode *inode)
31 JOURNAL_PER_BALANCE_CNT * 2 + 31 JOURNAL_PER_BALANCE_CNT * 2 +
32 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb); 32 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
33 struct reiserfs_transaction_handle th; 33 struct reiserfs_transaction_handle th;
34 int depth;
34 int err; 35 int err;
35 36
36 truncate_inode_pages(&inode->i_data, 0); 37 truncate_inode_pages(&inode->i_data, 0);
37 38
38 reiserfs_write_lock(inode->i_sb); 39 depth = reiserfs_write_lock_once(inode->i_sb);
39 40
40 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ 41 /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
41 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ 42 if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
@@ -74,7 +75,7 @@ void reiserfs_delete_inode(struct inode *inode)
74 out: 75 out:
75 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ 76 clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */
76 inode->i_blocks = 0; 77 inode->i_blocks = 0;
77 reiserfs_write_unlock(inode->i_sb); 78 reiserfs_write_unlock_once(inode->i_sb, depth);
78} 79}
79 80
80static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid, 81static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
@@ -3061,13 +3062,14 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3061int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) 3062int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3062{ 3063{
3063 struct inode *inode = dentry->d_inode; 3064 struct inode *inode = dentry->d_inode;
3064 int error;
3065 unsigned int ia_valid; 3065 unsigned int ia_valid;
3066 int depth;
3067 int error;
3066 3068
3067 /* must be turned off for recursive notify_change calls */ 3069 /* must be turned off for recursive notify_change calls */
3068 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID); 3070 ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
3069 3071
3070 reiserfs_write_lock(inode->i_sb); 3072 depth = reiserfs_write_lock_once(inode->i_sb);
3071 if (attr->ia_valid & ATTR_SIZE) { 3073 if (attr->ia_valid & ATTR_SIZE) {
3072 /* version 2 items will be caught by the s_maxbytes check 3074 /* version 2 items will be caught by the s_maxbytes check
3073 ** done for us in vmtruncate 3075 ** done for us in vmtruncate
@@ -3148,8 +3150,17 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3148 journal_end(&th, inode->i_sb, jbegin_count); 3150 journal_end(&th, inode->i_sb, jbegin_count);
3149 } 3151 }
3150 } 3152 }
3151 if (!error) 3153 if (!error) {
3154 /*
3155 * Relax the lock here, as it might truncate the
3156 * inode pages and wait for inode pages locks.
3157 * To release such page lock, the owner needs the
3158 * reiserfs lock
3159 */
3160 reiserfs_write_unlock_once(inode->i_sb, depth);
3152 error = inode_setattr(inode, attr); 3161 error = inode_setattr(inode, attr);
3162 depth = reiserfs_write_lock_once(inode->i_sb);
3163 }
3153 } 3164 }
3154 3165
3155 if (!error && reiserfs_posixacl(inode->i_sb)) { 3166 if (!error && reiserfs_posixacl(inode->i_sb)) {
@@ -3158,7 +3169,8 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3158 } 3169 }
3159 3170
3160 out: 3171 out:
3161 reiserfs_write_unlock(inode->i_sb); 3172 reiserfs_write_unlock_once(inode->i_sb, depth);
3173
3162 return error; 3174 return error;
3163} 3175}
3164 3176
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index ace77451ceb1..f53505de0712 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -104,9 +104,10 @@ setflags_out:
104 err = put_user(inode->i_generation, (int __user *)arg); 104 err = put_user(inode->i_generation, (int __user *)arg);
105 break; 105 break;
106 case REISERFS_IOC_SETVERSION: 106 case REISERFS_IOC_SETVERSION:
107 if (!is_owner_or_cap(inode)) 107 if (!is_owner_or_cap(inode)) {
108 err = -EPERM; 108 err = -EPERM;
109 break; 109 break;
110 }
110 err = mnt_want_write(filp->f_path.mnt); 111 err = mnt_want_write(filp->f_path.mnt);
111 if (err) 112 if (err)
112 break; 113 break;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 2f8a7e7b8dab..83ac4d3b3cb0 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2009,10 +2009,11 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
2009 destroy_workqueue(commit_wq); 2009 destroy_workqueue(commit_wq);
2010 commit_wq = NULL; 2010 commit_wq = NULL;
2011 } 2011 }
2012 reiserfs_write_lock(sb);
2013 2012
2014 free_journal_ram(sb); 2013 free_journal_ram(sb);
2015 2014
2015 reiserfs_write_lock(sb);
2016
2016 return 0; 2017 return 0;
2017} 2018}
2018 2019
@@ -2758,11 +2759,18 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2758 struct reiserfs_journal *journal; 2759 struct reiserfs_journal *journal;
2759 struct reiserfs_journal_list *jl; 2760 struct reiserfs_journal_list *jl;
2760 char b[BDEVNAME_SIZE]; 2761 char b[BDEVNAME_SIZE];
2762 int ret;
2761 2763
2764 /*
2765 * Unlock here to avoid various RECLAIM-FS-ON <-> IN-RECLAIM-FS
2766 * dependency inversion warnings.
2767 */
2768 reiserfs_write_unlock(sb);
2762 journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal)); 2769 journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal));
2763 if (!journal) { 2770 if (!journal) {
2764 reiserfs_warning(sb, "journal-1256", 2771 reiserfs_warning(sb, "journal-1256",
2765 "unable to get memory for journal structure"); 2772 "unable to get memory for journal structure");
2773 reiserfs_write_lock(sb);
2766 return 1; 2774 return 1;
2767 } 2775 }
2768 memset(journal, 0, sizeof(struct reiserfs_journal)); 2776 memset(journal, 0, sizeof(struct reiserfs_journal));
@@ -2771,10 +2779,12 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2771 INIT_LIST_HEAD(&journal->j_working_list); 2779 INIT_LIST_HEAD(&journal->j_working_list);
2772 INIT_LIST_HEAD(&journal->j_journal_list); 2780 INIT_LIST_HEAD(&journal->j_journal_list);
2773 journal->j_persistent_trans = 0; 2781 journal->j_persistent_trans = 0;
2774 if (reiserfs_allocate_list_bitmaps(sb, 2782 ret = reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap,
2775 journal->j_list_bitmap, 2783 reiserfs_bmap_count(sb));
2776 reiserfs_bmap_count(sb))) 2784 reiserfs_write_lock(sb);
2785 if (ret)
2777 goto free_and_return; 2786 goto free_and_return;
2787
2778 allocate_bitmap_nodes(sb); 2788 allocate_bitmap_nodes(sb);
2779 2789
2780 /* reserved for journal area support */ 2790 /* reserved for journal area support */
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index ee2cfc0fd8a7..b87aa2c1afc1 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -86,3 +86,12 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
86 reiserfs_panic(sb, "%s called without kernel lock held %d", 86 reiserfs_panic(sb, "%s called without kernel lock held %d",
87 caller); 87 caller);
88} 88}
89
90#ifdef CONFIG_REISERFS_CHECK
91void reiserfs_lock_check_recursive(struct super_block *sb)
92{
93 struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
94
95 WARN_ONCE((sb_i->lock_depth > 0), "Unwanted recursive reiserfs lock!\n");
96}
97#endif
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index e296ff72a6cc..9d4dcf0b07cb 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -921,6 +921,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
921 struct reiserfs_transaction_handle th; 921 struct reiserfs_transaction_handle th;
922 int jbegin_count; 922 int jbegin_count;
923 unsigned long savelink; 923 unsigned long savelink;
924 int depth;
924 925
925 inode = dentry->d_inode; 926 inode = dentry->d_inode;
926 927
@@ -932,7 +933,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
932 JOURNAL_PER_BALANCE_CNT * 2 + 2 + 933 JOURNAL_PER_BALANCE_CNT * 2 + 2 +
933 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); 934 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
934 935
935 reiserfs_write_lock(dir->i_sb); 936 depth = reiserfs_write_lock_once(dir->i_sb);
936 retval = journal_begin(&th, dir->i_sb, jbegin_count); 937 retval = journal_begin(&th, dir->i_sb, jbegin_count);
937 if (retval) 938 if (retval)
938 goto out_unlink; 939 goto out_unlink;
@@ -993,7 +994,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
993 994
994 retval = journal_end(&th, dir->i_sb, jbegin_count); 995 retval = journal_end(&th, dir->i_sb, jbegin_count);
995 reiserfs_check_path(&path); 996 reiserfs_check_path(&path);
996 reiserfs_write_unlock(dir->i_sb); 997 reiserfs_write_unlock_once(dir->i_sb, depth);
997 return retval; 998 return retval;
998 999
999 end_unlink: 1000 end_unlink:
@@ -1003,7 +1004,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
1003 if (err) 1004 if (err)
1004 retval = err; 1005 retval = err;
1005 out_unlink: 1006 out_unlink:
1006 reiserfs_write_unlock(dir->i_sb); 1007 reiserfs_write_unlock_once(dir->i_sb, depth);
1007 return retval; 1008 return retval;
1008} 1009}
1009 1010
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8c7033a8b67e..81f09fab8ae4 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -83,7 +83,8 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
83 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 83 BUG_ON(!mutex_is_locked(&dir->i_mutex));
84 vfs_dq_init(dir); 84 vfs_dq_init(dir);
85 85
86 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 86 reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
87 I_MUTEX_CHILD, dir->i_sb);
87 error = dir->i_op->unlink(dir, dentry); 88 error = dir->i_op->unlink(dir, dentry);
88 mutex_unlock(&dentry->d_inode->i_mutex); 89 mutex_unlock(&dentry->d_inode->i_mutex);
89 90
@@ -98,7 +99,8 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
98 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 99 BUG_ON(!mutex_is_locked(&dir->i_mutex));
99 vfs_dq_init(dir); 100 vfs_dq_init(dir);
100 101
101 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 102 reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
103 I_MUTEX_CHILD, dir->i_sb);
102 dentry_unhash(dentry); 104 dentry_unhash(dentry);
103 error = dir->i_op->rmdir(dir, dentry); 105 error = dir->i_op->rmdir(dir, dentry);
104 if (!error) 106 if (!error)
@@ -235,16 +237,22 @@ static int reiserfs_for_each_xattr(struct inode *inode,
235 if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1) 237 if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1)
236 return 0; 238 return 0;
237 239
240 reiserfs_write_unlock(inode->i_sb);
238 dir = open_xa_dir(inode, XATTR_REPLACE); 241 dir = open_xa_dir(inode, XATTR_REPLACE);
239 if (IS_ERR(dir)) { 242 if (IS_ERR(dir)) {
240 err = PTR_ERR(dir); 243 err = PTR_ERR(dir);
244 reiserfs_write_lock(inode->i_sb);
241 goto out; 245 goto out;
242 } else if (!dir->d_inode) { 246 } else if (!dir->d_inode) {
243 err = 0; 247 err = 0;
248 reiserfs_write_lock(inode->i_sb);
244 goto out_dir; 249 goto out_dir;
245 } 250 }
246 251
247 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); 252 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
253
254 reiserfs_write_lock(inode->i_sb);
255
248 buf.xadir = dir; 256 buf.xadir = dir;
249 err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos); 257 err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos);
250 while ((err == 0 || err == -ENOSPC) && buf.count) { 258 while ((err == 0 || err == -ENOSPC) && buf.count) {
@@ -283,8 +291,9 @@ static int reiserfs_for_each_xattr(struct inode *inode,
283 err = journal_begin(&th, inode->i_sb, blocks); 291 err = journal_begin(&th, inode->i_sb, blocks);
284 if (!err) { 292 if (!err) {
285 int jerror; 293 int jerror;
286 mutex_lock_nested(&dir->d_parent->d_inode->i_mutex, 294 reiserfs_mutex_lock_nested_safe(
287 I_MUTEX_XATTR); 295 &dir->d_parent->d_inode->i_mutex,
296 I_MUTEX_XATTR, inode->i_sb);
288 err = action(dir, data); 297 err = action(dir, data);
289 jerror = journal_end(&th, inode->i_sb, blocks); 298 jerror = journal_end(&th, inode->i_sb, blocks);
290 mutex_unlock(&dir->d_parent->d_inode->i_mutex); 299 mutex_unlock(&dir->d_parent->d_inode->i_mutex);
@@ -443,7 +452,9 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
443 } 452 }
444 453
445 if (dentry->d_inode) { 454 if (dentry->d_inode) {
455 reiserfs_write_lock(inode->i_sb);
446 err = xattr_unlink(xadir->d_inode, dentry); 456 err = xattr_unlink(xadir->d_inode, dentry);
457 reiserfs_write_unlock(inode->i_sb);
447 update_ctime(inode); 458 update_ctime(inode);
448 } 459 }
449 460
@@ -477,15 +488,24 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
477 if (get_inode_sd_version(inode) == STAT_DATA_V1) 488 if (get_inode_sd_version(inode) == STAT_DATA_V1)
478 return -EOPNOTSUPP; 489 return -EOPNOTSUPP;
479 490
480 if (!buffer) 491 reiserfs_write_unlock(inode->i_sb);
481 return lookup_and_delete_xattr(inode, name); 492
493 if (!buffer) {
494 err = lookup_and_delete_xattr(inode, name);
495 reiserfs_write_lock(inode->i_sb);
496 return err;
497 }
482 498
483 dentry = xattr_lookup(inode, name, flags); 499 dentry = xattr_lookup(inode, name, flags);
484 if (IS_ERR(dentry)) 500 if (IS_ERR(dentry)) {
501 reiserfs_write_lock(inode->i_sb);
485 return PTR_ERR(dentry); 502 return PTR_ERR(dentry);
503 }
486 504
487 down_write(&REISERFS_I(inode)->i_xattr_sem); 505 down_write(&REISERFS_I(inode)->i_xattr_sem);
488 506
507 reiserfs_write_lock(inode->i_sb);
508
489 xahash = xattr_hash(buffer, buffer_size); 509 xahash = xattr_hash(buffer, buffer_size);
490 while (buffer_pos < buffer_size || buffer_pos == 0) { 510 while (buffer_pos < buffer_size || buffer_pos == 0) {
491 size_t chunk; 511 size_t chunk;
@@ -540,8 +560,12 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
540 .ia_size = buffer_size, 560 .ia_size = buffer_size,
541 .ia_valid = ATTR_SIZE | ATTR_CTIME, 561 .ia_valid = ATTR_SIZE | ATTR_CTIME,
542 }; 562 };
563
564 reiserfs_write_unlock(inode->i_sb);
543 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); 565 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
544 down_write(&dentry->d_inode->i_alloc_sem); 566 down_write(&dentry->d_inode->i_alloc_sem);
567 reiserfs_write_lock(inode->i_sb);
568
545 err = reiserfs_setattr(dentry, &newattrs); 569 err = reiserfs_setattr(dentry, &newattrs);
546 up_write(&dentry->d_inode->i_alloc_sem); 570 up_write(&dentry->d_inode->i_alloc_sem);
547 mutex_unlock(&dentry->d_inode->i_mutex); 571 mutex_unlock(&dentry->d_inode->i_mutex);
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index cc32e6ada67b..dd20a7883f0f 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -455,7 +455,9 @@ int reiserfs_acl_chmod(struct inode *inode)
455 return 0; 455 return 0;
456 } 456 }
457 457
458 reiserfs_write_unlock(inode->i_sb);
458 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); 459 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
460 reiserfs_write_lock(inode->i_sb);
459 if (!acl) 461 if (!acl)
460 return 0; 462 return 0;
461 if (IS_ERR(acl)) 463 if (IS_ERR(acl))
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index f05f2303a8b8..699f371b9f12 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -106,8 +106,10 @@ static struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
106 return NULL; 106 return NULL;
107 107
108 t = atomic_cmpxchg(&sd->s_active, v, v + 1); 108 t = atomic_cmpxchg(&sd->s_active, v, v + 1);
109 if (likely(t == v)) 109 if (likely(t == v)) {
110 rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
110 return sd; 111 return sd;
112 }
111 if (t < 0) 113 if (t < 0)
112 return NULL; 114 return NULL;
113 115
@@ -130,6 +132,7 @@ static void sysfs_put_active(struct sysfs_dirent *sd)
130 if (unlikely(!sd)) 132 if (unlikely(!sd))
131 return; 133 return;
132 134
135 rwsem_release(&sd->dep_map, 1, _RET_IP_);
133 v = atomic_dec_return(&sd->s_active); 136 v = atomic_dec_return(&sd->s_active);
134 if (likely(v != SD_DEACTIVATED_BIAS)) 137 if (likely(v != SD_DEACTIVATED_BIAS))
135 return; 138 return;
@@ -194,15 +197,21 @@ static void sysfs_deactivate(struct sysfs_dirent *sd)
194 BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED)); 197 BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
195 sd->s_sibling = (void *)&wait; 198 sd->s_sibling = (void *)&wait;
196 199
200 rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_);
197 /* atomic_add_return() is a mb(), put_active() will always see 201 /* atomic_add_return() is a mb(), put_active() will always see
198 * the updated sd->s_sibling. 202 * the updated sd->s_sibling.
199 */ 203 */
200 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); 204 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
201 205
202 if (v != SD_DEACTIVATED_BIAS) 206 if (v != SD_DEACTIVATED_BIAS) {
207 lock_contended(&sd->dep_map, _RET_IP_);
203 wait_for_completion(&wait); 208 wait_for_completion(&wait);
209 }
204 210
205 sd->s_sibling = NULL; 211 sd->s_sibling = NULL;
212
213 lock_acquired(&sd->dep_map, _RET_IP_);
214 rwsem_release(&sd->dep_map, 1, _RET_IP_);
206} 215}
207 216
208static int sysfs_alloc_ino(ino_t *pino) 217static int sysfs_alloc_ino(ino_t *pino)
@@ -345,6 +354,7 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
345 354
346 atomic_set(&sd->s_count, 1); 355 atomic_set(&sd->s_count, 1);
347 atomic_set(&sd->s_active, 0); 356 atomic_set(&sd->s_active, 0);
357 sysfs_dirent_init_lockdep(sd);
348 358
349 sd->s_name = name; 359 sd->s_name = name;
350 sd->s_mode = mode; 360 sd->s_mode = mode;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ca52e7b9d8f8..cdd9377a6e06 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -8,6 +8,7 @@
8 * This file is released under the GPLv2. 8 * This file is released under the GPLv2.
9 */ 9 */
10 10
11#include <linux/lockdep.h>
11#include <linux/fs.h> 12#include <linux/fs.h>
12 13
13struct sysfs_open_dirent; 14struct sysfs_open_dirent;
@@ -50,6 +51,9 @@ struct sysfs_inode_attrs {
50struct sysfs_dirent { 51struct sysfs_dirent {
51 atomic_t s_count; 52 atomic_t s_count;
52 atomic_t s_active; 53 atomic_t s_active;
54#ifdef CONFIG_DEBUG_LOCK_ALLOC
55 struct lockdep_map dep_map;
56#endif
53 struct sysfs_dirent *s_parent; 57 struct sysfs_dirent *s_parent;
54 struct sysfs_dirent *s_sibling; 58 struct sysfs_dirent *s_sibling;
55 const char *s_name; 59 const char *s_name;
@@ -84,6 +88,17 @@ static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
84 return sd->s_flags & SYSFS_TYPE_MASK; 88 return sd->s_flags & SYSFS_TYPE_MASK;
85} 89}
86 90
91#ifdef CONFIG_DEBUG_LOCK_ALLOC
92#define sysfs_dirent_init_lockdep(sd) \
93do { \
94 static struct lock_class_key __key; \
95 \
96 lockdep_init_map(&sd->dep_map, "s_active", &__key, 0); \
97} while(0)
98#else
99#define sysfs_dirent_init_lockdep(sd) do {} while(0)
100#endif
101
87/* 102/*
88 * Context structure to be used while adding/removing nodes. 103 * Context structure to be used while adding/removing nodes.
89 */ 104 */
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 618c2701d3a7..e5a3d8e96bb7 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -54,6 +54,7 @@
54 */ 54 */
55 55
56#include <linux/pagemap.h> 56#include <linux/pagemap.h>
57#include <linux/list_sort.h>
57#include "ubifs.h" 58#include "ubifs.h"
58 59
59/* 60/*
@@ -108,101 +109,6 @@ static int switch_gc_head(struct ubifs_info *c)
108} 109}
109 110
110/** 111/**
111 * list_sort - sort a list.
112 * @priv: private data, passed to @cmp
113 * @head: the list to sort
114 * @cmp: the elements comparison function
115 *
116 * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
117 * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
118 * in ascending order.
119 *
120 * The comparison function @cmp is supposed to return a negative value if @a is
121 * than @b, and a positive value if @a is greater than @b. If @a and @b are
122 * equivalent, then it does not matter what this function returns.
123 */
124static void list_sort(void *priv, struct list_head *head,
125 int (*cmp)(void *priv, struct list_head *a,
126 struct list_head *b))
127{
128 struct list_head *p, *q, *e, *list, *tail, *oldhead;
129 int insize, nmerges, psize, qsize, i;
130
131 if (list_empty(head))
132 return;
133
134 list = head->next;
135 list_del(head);
136 insize = 1;
137 for (;;) {
138 p = oldhead = list;
139 list = tail = NULL;
140 nmerges = 0;
141
142 while (p) {
143 nmerges++;
144 q = p;
145 psize = 0;
146 for (i = 0; i < insize; i++) {
147 psize++;
148 q = q->next == oldhead ? NULL : q->next;
149 if (!q)
150 break;
151 }
152
153 qsize = insize;
154 while (psize > 0 || (qsize > 0 && q)) {
155 if (!psize) {
156 e = q;
157 q = q->next;
158 qsize--;
159 if (q == oldhead)
160 q = NULL;
161 } else if (!qsize || !q) {
162 e = p;
163 p = p->next;
164 psize--;
165 if (p == oldhead)
166 p = NULL;
167 } else if (cmp(priv, p, q) <= 0) {
168 e = p;
169 p = p->next;
170 psize--;
171 if (p == oldhead)
172 p = NULL;
173 } else {
174 e = q;
175 q = q->next;
176 qsize--;
177 if (q == oldhead)
178 q = NULL;
179 }
180 if (tail)
181 tail->next = e;
182 else
183 list = e;
184 e->prev = tail;
185 tail = e;
186 }
187 p = q;
188 }
189
190 tail->next = list;
191 list->prev = tail;
192
193 if (nmerges <= 1)
194 break;
195
196 insize *= 2;
197 }
198
199 head->next = list;
200 head->prev = list->prev;
201 list->prev->next = head;
202 list->prev = head;
203}
204
205/**
206 * data_nodes_cmp - compare 2 data nodes. 112 * data_nodes_cmp - compare 2 data nodes.
207 * @priv: UBIFS file-system description object 113 * @priv: UBIFS file-system description object
208 * @a: first data node 114 * @a: first data node
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 2512125dfa7c..883ca5ab8af5 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -251,8 +251,9 @@ xfs_set_mode(struct inode *inode, mode_t mode)
251 if (mode != inode->i_mode) { 251 if (mode != inode->i_mode) {
252 struct iattr iattr; 252 struct iattr iattr;
253 253
254 iattr.ia_valid = ATTR_MODE; 254 iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
255 iattr.ia_mode = mode; 255 iattr.ia_mode = mode;
256 iattr.ia_ctime = current_fs_time(inode->i_sb);
256 257
257 error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); 258 error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
258 } 259 }
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 09783cc444ac..77414db10dc2 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -954,16 +954,14 @@ xfs_fs_destroy_inode(
954 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); 954 ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
955 955
956 /* 956 /*
957 * If we have nothing to flush with this inode then complete the 957 * We always use background reclaim here because even if the
958 * teardown now, otherwise delay the flush operation. 958 * inode is clean, it still may be under IO and hence we have
959 * to take the flush lock. The background reclaim path handles
960 * this more efficiently than we can here, so simply let background
961 * reclaim tear down all inodes.
959 */ 962 */
960 if (!xfs_inode_clean(ip)) {
961 xfs_inode_set_reclaim_tag(ip);
962 return;
963 }
964
965out_reclaim: 963out_reclaim:
966 xfs_ireclaim(ip); 964 xfs_inode_set_reclaim_tag(ip);
967} 965}
968 966
969/* 967/*
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 6fed97a8cd3e..1f5e4bb5e970 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -65,7 +65,6 @@ xfs_inode_ag_lookup(
65 * as the tree is sparse and a gang lookup walks to find 65 * as the tree is sparse and a gang lookup walks to find
66 * the number of objects requested. 66 * the number of objects requested.
67 */ 67 */
68 read_lock(&pag->pag_ici_lock);
69 if (tag == XFS_ICI_NO_TAG) { 68 if (tag == XFS_ICI_NO_TAG) {
70 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 69 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
71 (void **)&ip, *first_index, 1); 70 (void **)&ip, *first_index, 1);
@@ -74,7 +73,7 @@ xfs_inode_ag_lookup(
74 (void **)&ip, *first_index, 1, tag); 73 (void **)&ip, *first_index, 1, tag);
75 } 74 }
76 if (!nr_found) 75 if (!nr_found)
77 goto unlock; 76 return NULL;
78 77
79 /* 78 /*
80 * Update the index for the next lookup. Catch overflows 79 * Update the index for the next lookup. Catch overflows
@@ -84,13 +83,8 @@ xfs_inode_ag_lookup(
84 */ 83 */
85 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); 84 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
86 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) 85 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
87 goto unlock; 86 return NULL;
88
89 return ip; 87 return ip;
90
91unlock:
92 read_unlock(&pag->pag_ici_lock);
93 return NULL;
94} 88}
95 89
96STATIC int 90STATIC int
@@ -100,7 +94,8 @@ xfs_inode_ag_walk(
100 int (*execute)(struct xfs_inode *ip, 94 int (*execute)(struct xfs_inode *ip,
101 struct xfs_perag *pag, int flags), 95 struct xfs_perag *pag, int flags),
102 int flags, 96 int flags,
103 int tag) 97 int tag,
98 int exclusive)
104{ 99{
105 struct xfs_perag *pag = &mp->m_perag[ag]; 100 struct xfs_perag *pag = &mp->m_perag[ag];
106 uint32_t first_index; 101 uint32_t first_index;
@@ -114,10 +109,20 @@ restart:
114 int error = 0; 109 int error = 0;
115 xfs_inode_t *ip; 110 xfs_inode_t *ip;
116 111
112 if (exclusive)
113 write_lock(&pag->pag_ici_lock);
114 else
115 read_lock(&pag->pag_ici_lock);
117 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); 116 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
118 if (!ip) 117 if (!ip) {
118 if (exclusive)
119 write_unlock(&pag->pag_ici_lock);
120 else
121 read_unlock(&pag->pag_ici_lock);
119 break; 122 break;
123 }
120 124
125 /* execute releases pag->pag_ici_lock */
121 error = execute(ip, pag, flags); 126 error = execute(ip, pag, flags);
122 if (error == EAGAIN) { 127 if (error == EAGAIN) {
123 skipped++; 128 skipped++;
@@ -125,9 +130,8 @@ restart:
125 } 130 }
126 if (error) 131 if (error)
127 last_error = error; 132 last_error = error;
128 /* 133
129 * bail out if the filesystem is corrupted. 134 /* bail out if the filesystem is corrupted. */
130 */
131 if (error == EFSCORRUPTED) 135 if (error == EFSCORRUPTED)
132 break; 136 break;
133 137
@@ -148,7 +152,8 @@ xfs_inode_ag_iterator(
148 int (*execute)(struct xfs_inode *ip, 152 int (*execute)(struct xfs_inode *ip,
149 struct xfs_perag *pag, int flags), 153 struct xfs_perag *pag, int flags),
150 int flags, 154 int flags,
151 int tag) 155 int tag,
156 int exclusive)
152{ 157{
153 int error = 0; 158 int error = 0;
154 int last_error = 0; 159 int last_error = 0;
@@ -157,7 +162,8 @@ xfs_inode_ag_iterator(
157 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 162 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
158 if (!mp->m_perag[ag].pag_ici_init) 163 if (!mp->m_perag[ag].pag_ici_init)
159 continue; 164 continue;
160 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); 165 error = xfs_inode_ag_walk(mp, ag, execute, flags, tag,
166 exclusive);
161 if (error) { 167 if (error) {
162 last_error = error; 168 last_error = error;
163 if (error == EFSCORRUPTED) 169 if (error == EFSCORRUPTED)
@@ -174,30 +180,31 @@ xfs_sync_inode_valid(
174 struct xfs_perag *pag) 180 struct xfs_perag *pag)
175{ 181{
176 struct inode *inode = VFS_I(ip); 182 struct inode *inode = VFS_I(ip);
183 int error = EFSCORRUPTED;
177 184
178 /* nothing to sync during shutdown */ 185 /* nothing to sync during shutdown */
179 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 186 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
180 read_unlock(&pag->pag_ici_lock); 187 goto out_unlock;
181 return EFSCORRUPTED;
182 }
183 188
184 /* 189 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
185 * If we can't get a reference on the inode, it must be in reclaim. 190 error = ENOENT;
186 * Leave it for the reclaim code to flush. Also avoid inodes that 191 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
187 * haven't been fully initialised. 192 goto out_unlock;
188 */
189 if (!igrab(inode)) {
190 read_unlock(&pag->pag_ici_lock);
191 return ENOENT;
192 }
193 read_unlock(&pag->pag_ici_lock);
194 193
195 if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) { 194 /* If we can't grab the inode, it must on it's way to reclaim. */
195 if (!igrab(inode))
196 goto out_unlock;
197
198 if (is_bad_inode(inode)) {
196 IRELE(ip); 199 IRELE(ip);
197 return ENOENT; 200 goto out_unlock;
198 } 201 }
199 202
200 return 0; 203 /* inode is valid */
204 error = 0;
205out_unlock:
206 read_unlock(&pag->pag_ici_lock);
207 return error;
201} 208}
202 209
203STATIC int 210STATIC int
@@ -282,7 +289,7 @@ xfs_sync_data(
282 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 289 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
283 290
284 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 291 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
285 XFS_ICI_NO_TAG); 292 XFS_ICI_NO_TAG, 0);
286 if (error) 293 if (error)
287 return XFS_ERROR(error); 294 return XFS_ERROR(error);
288 295
@@ -304,7 +311,7 @@ xfs_sync_attr(
304 ASSERT((flags & ~SYNC_WAIT) == 0); 311 ASSERT((flags & ~SYNC_WAIT) == 0);
305 312
306 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 313 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
307 XFS_ICI_NO_TAG); 314 XFS_ICI_NO_TAG, 0);
308} 315}
309 316
310STATIC int 317STATIC int
@@ -664,60 +671,6 @@ xfs_syncd_stop(
664 kthread_stop(mp->m_sync_task); 671 kthread_stop(mp->m_sync_task);
665} 672}
666 673
667STATIC int
668xfs_reclaim_inode(
669 xfs_inode_t *ip,
670 int sync_mode)
671{
672 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
673
674 /* The hash lock here protects a thread in xfs_iget_core from
675 * racing with us on linking the inode back with a vnode.
676 * Once we have the XFS_IRECLAIM flag set it will not touch
677 * us.
678 */
679 write_lock(&pag->pag_ici_lock);
680 spin_lock(&ip->i_flags_lock);
681 if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
682 !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
683 spin_unlock(&ip->i_flags_lock);
684 write_unlock(&pag->pag_ici_lock);
685 return -EAGAIN;
686 }
687 __xfs_iflags_set(ip, XFS_IRECLAIM);
688 spin_unlock(&ip->i_flags_lock);
689 write_unlock(&pag->pag_ici_lock);
690 xfs_put_perag(ip->i_mount, pag);
691
692 /*
693 * If the inode is still dirty, then flush it out. If the inode
694 * is not in the AIL, then it will be OK to flush it delwri as
695 * long as xfs_iflush() does not keep any references to the inode.
696 * We leave that decision up to xfs_iflush() since it has the
697 * knowledge of whether it's OK to simply do a delwri flush of
698 * the inode or whether we need to wait until the inode is
699 * pulled from the AIL.
700 * We get the flush lock regardless, though, just to make sure
701 * we don't free it while it is being flushed.
702 */
703 xfs_ilock(ip, XFS_ILOCK_EXCL);
704 xfs_iflock(ip);
705
706 /*
707 * In the case of a forced shutdown we rely on xfs_iflush() to
708 * wait for the inode to be unpinned before returning an error.
709 */
710 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
711 /* synchronize with xfs_iflush_done */
712 xfs_iflock(ip);
713 xfs_ifunlock(ip);
714 }
715
716 xfs_iunlock(ip, XFS_ILOCK_EXCL);
717 xfs_ireclaim(ip);
718 return 0;
719}
720
721void 674void
722__xfs_inode_set_reclaim_tag( 675__xfs_inode_set_reclaim_tag(
723 struct xfs_perag *pag, 676 struct xfs_perag *pag,
@@ -760,19 +713,55 @@ __xfs_inode_clear_reclaim_tag(
760} 713}
761 714
762STATIC int 715STATIC int
763xfs_reclaim_inode_now( 716xfs_reclaim_inode(
764 struct xfs_inode *ip, 717 struct xfs_inode *ip,
765 struct xfs_perag *pag, 718 struct xfs_perag *pag,
766 int flags) 719 int sync_mode)
767{ 720{
768 /* ignore if already under reclaim */ 721 /*
769 if (xfs_iflags_test(ip, XFS_IRECLAIM)) { 722 * The radix tree lock here protects a thread in xfs_iget from racing
770 read_unlock(&pag->pag_ici_lock); 723 * with us starting reclaim on the inode. Once we have the
724 * XFS_IRECLAIM flag set it will not touch us.
725 */
726 spin_lock(&ip->i_flags_lock);
727 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
728 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
729 /* ignore as it is already under reclaim */
730 spin_unlock(&ip->i_flags_lock);
731 write_unlock(&pag->pag_ici_lock);
771 return 0; 732 return 0;
772 } 733 }
773 read_unlock(&pag->pag_ici_lock); 734 __xfs_iflags_set(ip, XFS_IRECLAIM);
735 spin_unlock(&ip->i_flags_lock);
736 write_unlock(&pag->pag_ici_lock);
774 737
775 return xfs_reclaim_inode(ip, flags); 738 /*
739 * If the inode is still dirty, then flush it out. If the inode
740 * is not in the AIL, then it will be OK to flush it delwri as
741 * long as xfs_iflush() does not keep any references to the inode.
742 * We leave that decision up to xfs_iflush() since it has the
743 * knowledge of whether it's OK to simply do a delwri flush of
744 * the inode or whether we need to wait until the inode is
745 * pulled from the AIL.
746 * We get the flush lock regardless, though, just to make sure
747 * we don't free it while it is being flushed.
748 */
749 xfs_ilock(ip, XFS_ILOCK_EXCL);
750 xfs_iflock(ip);
751
752 /*
753 * In the case of a forced shutdown we rely on xfs_iflush() to
754 * wait for the inode to be unpinned before returning an error.
755 */
756 if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
757 /* synchronize with xfs_iflush_done */
758 xfs_iflock(ip);
759 xfs_ifunlock(ip);
760 }
761
762 xfs_iunlock(ip, XFS_ILOCK_EXCL);
763 xfs_ireclaim(ip);
764 return 0;
776} 765}
777 766
778int 767int
@@ -780,6 +769,6 @@ xfs_reclaim_inodes(
780 xfs_mount_t *mp, 769 xfs_mount_t *mp,
781 int mode) 770 int mode)
782{ 771{
783 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, 772 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
784 XFS_ICI_RECLAIM_TAG); 773 XFS_ICI_RECLAIM_TAG, 1);
785} 774}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index a500b4d91835..ea932b43335d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -54,6 +54,6 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
54int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); 54int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
55int xfs_inode_ag_iterator(struct xfs_mount *mp, 55int xfs_inode_ag_iterator(struct xfs_mount *mp,
56 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 56 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
57 int flags, int tag); 57 int flags, int tag, int write_lock);
58 58
59#endif 59#endif
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index c40834bdee58..c22a608321a3 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -33,51 +33,55 @@ struct xfs_dquot;
33struct xlog_ticket; 33struct xlog_ticket;
34struct log; 34struct log;
35 35
36DECLARE_EVENT_CLASS(xfs_attr_list_class,
37 TP_PROTO(struct xfs_attr_list_context *ctx),
38 TP_ARGS(ctx),
39 TP_STRUCT__entry(
40 __field(dev_t, dev)
41 __field(xfs_ino_t, ino)
42 __field(u32, hashval)
43 __field(u32, blkno)
44 __field(u32, offset)
45 __field(void *, alist)
46 __field(int, bufsize)
47 __field(int, count)
48 __field(int, firstu)
49 __field(int, dupcnt)
50 __field(int, flags)
51 ),
52 TP_fast_assign(
53 __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
54 __entry->ino = ctx->dp->i_ino;
55 __entry->hashval = ctx->cursor->hashval;
56 __entry->blkno = ctx->cursor->blkno;
57 __entry->offset = ctx->cursor->offset;
58 __entry->alist = ctx->alist;
59 __entry->bufsize = ctx->bufsize;
60 __entry->count = ctx->count;
61 __entry->firstu = ctx->firstu;
62 __entry->flags = ctx->flags;
63 ),
64 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
65 "alist 0x%p size %u count %u firstu %u flags %d %s",
66 MAJOR(__entry->dev), MINOR(__entry->dev),
67 __entry->ino,
68 __entry->hashval,
69 __entry->blkno,
70 __entry->offset,
71 __entry->dupcnt,
72 __entry->alist,
73 __entry->bufsize,
74 __entry->count,
75 __entry->firstu,
76 __entry->flags,
77 __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
78 )
79)
80
36#define DEFINE_ATTR_LIST_EVENT(name) \ 81#define DEFINE_ATTR_LIST_EVENT(name) \
37TRACE_EVENT(name, \ 82DEFINE_EVENT(xfs_attr_list_class, name, \
38 TP_PROTO(struct xfs_attr_list_context *ctx), \ 83 TP_PROTO(struct xfs_attr_list_context *ctx), \
39 TP_ARGS(ctx), \ 84 TP_ARGS(ctx))
40 TP_STRUCT__entry( \
41 __field(dev_t, dev) \
42 __field(xfs_ino_t, ino) \
43 __field(u32, hashval) \
44 __field(u32, blkno) \
45 __field(u32, offset) \
46 __field(void *, alist) \
47 __field(int, bufsize) \
48 __field(int, count) \
49 __field(int, firstu) \
50 __field(int, dupcnt) \
51 __field(int, flags) \
52 ), \
53 TP_fast_assign( \
54 __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; \
55 __entry->ino = ctx->dp->i_ino; \
56 __entry->hashval = ctx->cursor->hashval; \
57 __entry->blkno = ctx->cursor->blkno; \
58 __entry->offset = ctx->cursor->offset; \
59 __entry->alist = ctx->alist; \
60 __entry->bufsize = ctx->bufsize; \
61 __entry->count = ctx->count; \
62 __entry->firstu = ctx->firstu; \
63 __entry->flags = ctx->flags; \
64 ), \
65 TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " \
66 "alist 0x%p size %u count %u firstu %u flags %d %s", \
67 MAJOR(__entry->dev), MINOR(__entry->dev), \
68 __entry->ino, \
69 __entry->hashval, \
70 __entry->blkno, \
71 __entry->offset, \
72 __entry->dupcnt, \
73 __entry->alist, \
74 __entry->bufsize, \
75 __entry->count, \
76 __entry->firstu, \
77 __entry->flags, \
78 __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS) \
79 ) \
80)
81DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf); 85DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
82DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all); 86DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
83DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf); 87DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
@@ -178,91 +182,99 @@ TRACE_EVENT(xfs_iext_insert,
178 (char *)__entry->caller_ip) 182 (char *)__entry->caller_ip)
179); 183);
180 184
185DECLARE_EVENT_CLASS(xfs_bmap_class,
186 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
187 unsigned long caller_ip),
188 TP_ARGS(ip, idx, state, caller_ip),
189 TP_STRUCT__entry(
190 __field(dev_t, dev)
191 __field(xfs_ino_t, ino)
192 __field(xfs_extnum_t, idx)
193 __field(xfs_fileoff_t, startoff)
194 __field(xfs_fsblock_t, startblock)
195 __field(xfs_filblks_t, blockcount)
196 __field(xfs_exntst_t, state)
197 __field(int, bmap_state)
198 __field(unsigned long, caller_ip)
199 ),
200 TP_fast_assign(
201 struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ?
202 ip->i_afp : &ip->i_df;
203 struct xfs_bmbt_irec r;
204
205 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
206 __entry->dev = VFS_I(ip)->i_sb->s_dev;
207 __entry->ino = ip->i_ino;
208 __entry->idx = idx;
209 __entry->startoff = r.br_startoff;
210 __entry->startblock = r.br_startblock;
211 __entry->blockcount = r.br_blockcount;
212 __entry->state = r.br_state;
213 __entry->bmap_state = state;
214 __entry->caller_ip = caller_ip;
215 ),
216 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
217 "offset %lld block %s count %lld flag %d caller %pf",
218 MAJOR(__entry->dev), MINOR(__entry->dev),
219 __entry->ino,
220 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
221 (long)__entry->idx,
222 __entry->startoff,
223 xfs_fmtfsblock(__entry->startblock),
224 __entry->blockcount,
225 __entry->state,
226 (char *)__entry->caller_ip)
227)
228
181#define DEFINE_BMAP_EVENT(name) \ 229#define DEFINE_BMAP_EVENT(name) \
182TRACE_EVENT(name, \ 230DEFINE_EVENT(xfs_bmap_class, name, \
183 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \ 231 TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
184 unsigned long caller_ip), \ 232 unsigned long caller_ip), \
185 TP_ARGS(ip, idx, state, caller_ip), \ 233 TP_ARGS(ip, idx, state, caller_ip))
186 TP_STRUCT__entry( \
187 __field(dev_t, dev) \
188 __field(xfs_ino_t, ino) \
189 __field(xfs_extnum_t, idx) \
190 __field(xfs_fileoff_t, startoff) \
191 __field(xfs_fsblock_t, startblock) \
192 __field(xfs_filblks_t, blockcount) \
193 __field(xfs_exntst_t, state) \
194 __field(int, bmap_state) \
195 __field(unsigned long, caller_ip) \
196 ), \
197 TP_fast_assign( \
198 struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ? \
199 ip->i_afp : &ip->i_df; \
200 struct xfs_bmbt_irec r; \
201 \
202 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r); \
203 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
204 __entry->ino = ip->i_ino; \
205 __entry->idx = idx; \
206 __entry->startoff = r.br_startoff; \
207 __entry->startblock = r.br_startblock; \
208 __entry->blockcount = r.br_blockcount; \
209 __entry->state = r.br_state; \
210 __entry->bmap_state = state; \
211 __entry->caller_ip = caller_ip; \
212 ), \
213 TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " \
214 "offset %lld block %s count %lld flag %d caller %pf", \
215 MAJOR(__entry->dev), MINOR(__entry->dev), \
216 __entry->ino, \
217 __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), \
218 (long)__entry->idx, \
219 __entry->startoff, \
220 xfs_fmtfsblock(__entry->startblock), \
221 __entry->blockcount, \
222 __entry->state, \
223 (char *)__entry->caller_ip) \
224)
225
226DEFINE_BMAP_EVENT(xfs_iext_remove); 234DEFINE_BMAP_EVENT(xfs_iext_remove);
227DEFINE_BMAP_EVENT(xfs_bmap_pre_update); 235DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
228DEFINE_BMAP_EVENT(xfs_bmap_post_update); 236DEFINE_BMAP_EVENT(xfs_bmap_post_update);
229DEFINE_BMAP_EVENT(xfs_extlist); 237DEFINE_BMAP_EVENT(xfs_extlist);
230 238
231#define DEFINE_BUF_EVENT(tname) \ 239DECLARE_EVENT_CLASS(xfs_buf_class,
232TRACE_EVENT(tname, \ 240 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
233 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \ 241 TP_ARGS(bp, caller_ip),
234 TP_ARGS(bp, caller_ip), \ 242 TP_STRUCT__entry(
235 TP_STRUCT__entry( \ 243 __field(dev_t, dev)
236 __field(dev_t, dev) \ 244 __field(xfs_daddr_t, bno)
237 __field(xfs_daddr_t, bno) \ 245 __field(size_t, buffer_length)
238 __field(size_t, buffer_length) \ 246 __field(int, hold)
239 __field(int, hold) \ 247 __field(int, pincount)
240 __field(int, pincount) \ 248 __field(unsigned, lockval)
241 __field(unsigned, lockval) \ 249 __field(unsigned, flags)
242 __field(unsigned, flags) \ 250 __field(unsigned long, caller_ip)
243 __field(unsigned long, caller_ip) \ 251 ),
244 ), \ 252 TP_fast_assign(
245 TP_fast_assign( \ 253 __entry->dev = bp->b_target->bt_dev;
246 __entry->dev = bp->b_target->bt_dev; \ 254 __entry->bno = bp->b_bn;
247 __entry->bno = bp->b_bn; \ 255 __entry->buffer_length = bp->b_buffer_length;
248 __entry->buffer_length = bp->b_buffer_length; \ 256 __entry->hold = atomic_read(&bp->b_hold);
249 __entry->hold = atomic_read(&bp->b_hold); \ 257 __entry->pincount = atomic_read(&bp->b_pin_count);
250 __entry->pincount = atomic_read(&bp->b_pin_count); \ 258 __entry->lockval = xfs_buf_lock_value(bp);
251 __entry->lockval = xfs_buf_lock_value(bp); \ 259 __entry->flags = bp->b_flags;
252 __entry->flags = bp->b_flags; \ 260 __entry->caller_ip = caller_ip;
253 __entry->caller_ip = caller_ip; \ 261 ),
254 ), \ 262 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
255 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \ 263 "lock %d flags %s caller %pf",
256 "lock %d flags %s caller %pf", \ 264 MAJOR(__entry->dev), MINOR(__entry->dev),
257 MAJOR(__entry->dev), MINOR(__entry->dev), \ 265 (unsigned long long)__entry->bno,
258 (unsigned long long)__entry->bno, \ 266 __entry->buffer_length,
259 __entry->buffer_length, \ 267 __entry->hold,
260 __entry->hold, \ 268 __entry->pincount,
261 __entry->pincount, \ 269 __entry->lockval,
262 __entry->lockval, \ 270 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
263 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), \ 271 (void *)__entry->caller_ip)
264 (void *)__entry->caller_ip) \
265) 272)
273
274#define DEFINE_BUF_EVENT(name) \
275DEFINE_EVENT(xfs_buf_class, name, \
276 TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
277 TP_ARGS(bp, caller_ip))
266DEFINE_BUF_EVENT(xfs_buf_init); 278DEFINE_BUF_EVENT(xfs_buf_init);
267DEFINE_BUF_EVENT(xfs_buf_free); 279DEFINE_BUF_EVENT(xfs_buf_free);
268DEFINE_BUF_EVENT(xfs_buf_hold); 280DEFINE_BUF_EVENT(xfs_buf_hold);
@@ -299,41 +311,45 @@ DEFINE_BUF_EVENT(xfs_reset_dqcounts);
299DEFINE_BUF_EVENT(xfs_inode_item_push); 311DEFINE_BUF_EVENT(xfs_inode_item_push);
300 312
301/* pass flags explicitly */ 313/* pass flags explicitly */
302#define DEFINE_BUF_FLAGS_EVENT(tname) \ 314DECLARE_EVENT_CLASS(xfs_buf_flags_class,
303TRACE_EVENT(tname, \ 315 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
304 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \ 316 TP_ARGS(bp, flags, caller_ip),
305 TP_ARGS(bp, flags, caller_ip), \ 317 TP_STRUCT__entry(
306 TP_STRUCT__entry( \ 318 __field(dev_t, dev)
307 __field(dev_t, dev) \ 319 __field(xfs_daddr_t, bno)
308 __field(xfs_daddr_t, bno) \ 320 __field(size_t, buffer_length)
309 __field(size_t, buffer_length) \ 321 __field(int, hold)
310 __field(int, hold) \ 322 __field(int, pincount)
311 __field(int, pincount) \ 323 __field(unsigned, lockval)
312 __field(unsigned, lockval) \ 324 __field(unsigned, flags)
313 __field(unsigned, flags) \ 325 __field(unsigned long, caller_ip)
314 __field(unsigned long, caller_ip) \ 326 ),
315 ), \ 327 TP_fast_assign(
316 TP_fast_assign( \ 328 __entry->dev = bp->b_target->bt_dev;
317 __entry->dev = bp->b_target->bt_dev; \ 329 __entry->bno = bp->b_bn;
318 __entry->bno = bp->b_bn; \ 330 __entry->buffer_length = bp->b_buffer_length;
319 __entry->buffer_length = bp->b_buffer_length; \ 331 __entry->flags = flags;
320 __entry->flags = flags; \ 332 __entry->hold = atomic_read(&bp->b_hold);
321 __entry->hold = atomic_read(&bp->b_hold); \ 333 __entry->pincount = atomic_read(&bp->b_pin_count);
322 __entry->pincount = atomic_read(&bp->b_pin_count); \ 334 __entry->lockval = xfs_buf_lock_value(bp);
323 __entry->lockval = xfs_buf_lock_value(bp); \ 335 __entry->caller_ip = caller_ip;
324 __entry->caller_ip = caller_ip; \ 336 ),
325 ), \ 337 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
326 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \ 338 "lock %d flags %s caller %pf",
327 "lock %d flags %s caller %pf", \ 339 MAJOR(__entry->dev), MINOR(__entry->dev),
328 MAJOR(__entry->dev), MINOR(__entry->dev), \ 340 (unsigned long long)__entry->bno,
329 (unsigned long long)__entry->bno, \ 341 __entry->buffer_length,
330 __entry->buffer_length, \ 342 __entry->hold,
331 __entry->hold, \ 343 __entry->pincount,
332 __entry->pincount, \ 344 __entry->lockval,
333 __entry->lockval, \ 345 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
334 __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), \ 346 (void *)__entry->caller_ip)
335 (void *)__entry->caller_ip) \
336) 347)
348
349#define DEFINE_BUF_FLAGS_EVENT(name) \
350DEFINE_EVENT(xfs_buf_flags_class, name, \
351 TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
352 TP_ARGS(bp, flags, caller_ip))
337DEFINE_BUF_FLAGS_EVENT(xfs_buf_find); 353DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
338DEFINE_BUF_FLAGS_EVENT(xfs_buf_get); 354DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
339DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); 355DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
@@ -376,55 +392,58 @@ TRACE_EVENT(xfs_buf_ioerror,
376 (void *)__entry->caller_ip) 392 (void *)__entry->caller_ip)
377); 393);
378 394
379#define DEFINE_BUF_ITEM_EVENT(tname) \ 395DECLARE_EVENT_CLASS(xfs_buf_item_class,
380TRACE_EVENT(tname, \ 396 TP_PROTO(struct xfs_buf_log_item *bip),
381 TP_PROTO(struct xfs_buf_log_item *bip), \ 397 TP_ARGS(bip),
382 TP_ARGS(bip), \ 398 TP_STRUCT__entry(
383 TP_STRUCT__entry( \ 399 __field(dev_t, dev)
384 __field(dev_t, dev) \ 400 __field(xfs_daddr_t, buf_bno)
385 __field(xfs_daddr_t, buf_bno) \ 401 __field(size_t, buf_len)
386 __field(size_t, buf_len) \ 402 __field(int, buf_hold)
387 __field(int, buf_hold) \ 403 __field(int, buf_pincount)
388 __field(int, buf_pincount) \ 404 __field(int, buf_lockval)
389 __field(int, buf_lockval) \ 405 __field(unsigned, buf_flags)
390 __field(unsigned, buf_flags) \ 406 __field(unsigned, bli_recur)
391 __field(unsigned, bli_recur) \ 407 __field(int, bli_refcount)
392 __field(int, bli_refcount) \ 408 __field(unsigned, bli_flags)
393 __field(unsigned, bli_flags) \ 409 __field(void *, li_desc)
394 __field(void *, li_desc) \ 410 __field(unsigned, li_flags)
395 __field(unsigned, li_flags) \ 411 ),
396 ), \ 412 TP_fast_assign(
397 TP_fast_assign( \ 413 __entry->dev = bip->bli_buf->b_target->bt_dev;
398 __entry->dev = bip->bli_buf->b_target->bt_dev; \ 414 __entry->bli_flags = bip->bli_flags;
399 __entry->bli_flags = bip->bli_flags; \ 415 __entry->bli_recur = bip->bli_recur;
400 __entry->bli_recur = bip->bli_recur; \ 416 __entry->bli_refcount = atomic_read(&bip->bli_refcount);
401 __entry->bli_refcount = atomic_read(&bip->bli_refcount); \ 417 __entry->buf_bno = bip->bli_buf->b_bn;
402 __entry->buf_bno = bip->bli_buf->b_bn; \ 418 __entry->buf_len = bip->bli_buf->b_buffer_length;
403 __entry->buf_len = bip->bli_buf->b_buffer_length; \ 419 __entry->buf_flags = bip->bli_buf->b_flags;
404 __entry->buf_flags = bip->bli_buf->b_flags; \ 420 __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
405 __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); \ 421 __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
406 __entry->buf_pincount = \ 422 __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf);
407 atomic_read(&bip->bli_buf->b_pin_count); \ 423 __entry->li_desc = bip->bli_item.li_desc;
408 __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf); \ 424 __entry->li_flags = bip->bli_item.li_flags;
409 __entry->li_desc = bip->bli_item.li_desc; \ 425 ),
410 __entry->li_flags = bip->bli_item.li_flags; \ 426 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
411 ), \ 427 "lock %d flags %s recur %d refcount %d bliflags %s "
412 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \ 428 "lidesc 0x%p liflags %s",
413 "lock %d flags %s recur %d refcount %d bliflags %s " \ 429 MAJOR(__entry->dev), MINOR(__entry->dev),
414 "lidesc 0x%p liflags %s", \ 430 (unsigned long long)__entry->buf_bno,
415 MAJOR(__entry->dev), MINOR(__entry->dev), \ 431 __entry->buf_len,
416 (unsigned long long)__entry->buf_bno, \ 432 __entry->buf_hold,
417 __entry->buf_len, \ 433 __entry->buf_pincount,
418 __entry->buf_hold, \ 434 __entry->buf_lockval,
419 __entry->buf_pincount, \ 435 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
420 __entry->buf_lockval, \ 436 __entry->bli_recur,
421 __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS), \ 437 __entry->bli_refcount,
422 __entry->bli_recur, \ 438 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
423 __entry->bli_refcount, \ 439 __entry->li_desc,
424 __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS), \ 440 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
425 __entry->li_desc, \
426 __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS)) \
427) 441)
442
443#define DEFINE_BUF_ITEM_EVENT(name) \
444DEFINE_EVENT(xfs_buf_item_class, name, \
445 TP_PROTO(struct xfs_buf_log_item *bip), \
446 TP_ARGS(bip))
428DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); 447DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
429DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); 448DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
430DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); 449DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
@@ -450,78 +469,90 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
450DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); 469DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
451DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); 470DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
452 471
472DECLARE_EVENT_CLASS(xfs_lock_class,
473 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
474 unsigned long caller_ip),
475 TP_ARGS(ip, lock_flags, caller_ip),
476 TP_STRUCT__entry(
477 __field(dev_t, dev)
478 __field(xfs_ino_t, ino)
479 __field(int, lock_flags)
480 __field(unsigned long, caller_ip)
481 ),
482 TP_fast_assign(
483 __entry->dev = VFS_I(ip)->i_sb->s_dev;
484 __entry->ino = ip->i_ino;
485 __entry->lock_flags = lock_flags;
486 __entry->caller_ip = caller_ip;
487 ),
488 TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
489 MAJOR(__entry->dev), MINOR(__entry->dev),
490 __entry->ino,
491 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
492 (void *)__entry->caller_ip)
493)
494
453#define DEFINE_LOCK_EVENT(name) \ 495#define DEFINE_LOCK_EVENT(name) \
454TRACE_EVENT(name, \ 496DEFINE_EVENT(xfs_lock_class, name, \
455 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \ 497 TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
456 unsigned long caller_ip), \ 498 unsigned long caller_ip), \
457 TP_ARGS(ip, lock_flags, caller_ip), \ 499 TP_ARGS(ip, lock_flags, caller_ip))
458 TP_STRUCT__entry( \
459 __field(dev_t, dev) \
460 __field(xfs_ino_t, ino) \
461 __field(int, lock_flags) \
462 __field(unsigned long, caller_ip) \
463 ), \
464 TP_fast_assign( \
465 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
466 __entry->ino = ip->i_ino; \
467 __entry->lock_flags = lock_flags; \
468 __entry->caller_ip = caller_ip; \
469 ), \
470 TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf", \
471 MAJOR(__entry->dev), MINOR(__entry->dev), \
472 __entry->ino, \
473 __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), \
474 (void *)__entry->caller_ip) \
475)
476
477DEFINE_LOCK_EVENT(xfs_ilock); 500DEFINE_LOCK_EVENT(xfs_ilock);
478DEFINE_LOCK_EVENT(xfs_ilock_nowait); 501DEFINE_LOCK_EVENT(xfs_ilock_nowait);
479DEFINE_LOCK_EVENT(xfs_ilock_demote); 502DEFINE_LOCK_EVENT(xfs_ilock_demote);
480DEFINE_LOCK_EVENT(xfs_iunlock); 503DEFINE_LOCK_EVENT(xfs_iunlock);
481 504
505DECLARE_EVENT_CLASS(xfs_iget_class,
506 TP_PROTO(struct xfs_inode *ip),
507 TP_ARGS(ip),
508 TP_STRUCT__entry(
509 __field(dev_t, dev)
510 __field(xfs_ino_t, ino)
511 ),
512 TP_fast_assign(
513 __entry->dev = VFS_I(ip)->i_sb->s_dev;
514 __entry->ino = ip->i_ino;
515 ),
516 TP_printk("dev %d:%d ino 0x%llx",
517 MAJOR(__entry->dev), MINOR(__entry->dev),
518 __entry->ino)
519)
520
482#define DEFINE_IGET_EVENT(name) \ 521#define DEFINE_IGET_EVENT(name) \
483TRACE_EVENT(name, \ 522DEFINE_EVENT(xfs_iget_class, name, \
484 TP_PROTO(struct xfs_inode *ip), \ 523 TP_PROTO(struct xfs_inode *ip), \
485 TP_ARGS(ip), \ 524 TP_ARGS(ip))
486 TP_STRUCT__entry( \
487 __field(dev_t, dev) \
488 __field(xfs_ino_t, ino) \
489 ), \
490 TP_fast_assign( \
491 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
492 __entry->ino = ip->i_ino; \
493 ), \
494 TP_printk("dev %d:%d ino 0x%llx", \
495 MAJOR(__entry->dev), MINOR(__entry->dev), \
496 __entry->ino) \
497)
498DEFINE_IGET_EVENT(xfs_iget_skip); 525DEFINE_IGET_EVENT(xfs_iget_skip);
499DEFINE_IGET_EVENT(xfs_iget_reclaim); 526DEFINE_IGET_EVENT(xfs_iget_reclaim);
500DEFINE_IGET_EVENT(xfs_iget_found); 527DEFINE_IGET_EVENT(xfs_iget_found);
501DEFINE_IGET_EVENT(xfs_iget_alloc); 528DEFINE_IGET_EVENT(xfs_iget_alloc);
502 529
530DECLARE_EVENT_CLASS(xfs_inode_class,
531 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
532 TP_ARGS(ip, caller_ip),
533 TP_STRUCT__entry(
534 __field(dev_t, dev)
535 __field(xfs_ino_t, ino)
536 __field(int, count)
537 __field(unsigned long, caller_ip)
538 ),
539 TP_fast_assign(
540 __entry->dev = VFS_I(ip)->i_sb->s_dev;
541 __entry->ino = ip->i_ino;
542 __entry->count = atomic_read(&VFS_I(ip)->i_count);
543 __entry->caller_ip = caller_ip;
544 ),
545 TP_printk("dev %d:%d ino 0x%llx count %d caller %pf",
546 MAJOR(__entry->dev), MINOR(__entry->dev),
547 __entry->ino,
548 __entry->count,
549 (char *)__entry->caller_ip)
550)
551
503#define DEFINE_INODE_EVENT(name) \ 552#define DEFINE_INODE_EVENT(name) \
504TRACE_EVENT(name, \ 553DEFINE_EVENT(xfs_inode_class, name, \
505 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ 554 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
506 TP_ARGS(ip, caller_ip), \ 555 TP_ARGS(ip, caller_ip))
507 TP_STRUCT__entry( \
508 __field(dev_t, dev) \
509 __field(xfs_ino_t, ino) \
510 __field(int, count) \
511 __field(unsigned long, caller_ip) \
512 ), \
513 TP_fast_assign( \
514 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
515 __entry->ino = ip->i_ino; \
516 __entry->count = atomic_read(&VFS_I(ip)->i_count); \
517 __entry->caller_ip = caller_ip; \
518 ), \
519 TP_printk("dev %d:%d ino 0x%llx count %d caller %pf", \
520 MAJOR(__entry->dev), MINOR(__entry->dev), \
521 __entry->ino, \
522 __entry->count, \
523 (char *)__entry->caller_ip) \
524)
525DEFINE_INODE_EVENT(xfs_ihold); 556DEFINE_INODE_EVENT(xfs_ihold);
526DEFINE_INODE_EVENT(xfs_irele); 557DEFINE_INODE_EVENT(xfs_irele);
527/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */ 558/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */
@@ -529,55 +560,59 @@ DEFINE_INODE_EVENT(xfs_inode);
529#define xfs_itrace_entry(ip) \ 560#define xfs_itrace_entry(ip) \
530 trace_xfs_inode(ip, _THIS_IP_) 561 trace_xfs_inode(ip, _THIS_IP_)
531 562
532#define DEFINE_DQUOT_EVENT(tname) \ 563DECLARE_EVENT_CLASS(xfs_dquot_class,
533TRACE_EVENT(tname, \ 564 TP_PROTO(struct xfs_dquot *dqp),
534 TP_PROTO(struct xfs_dquot *dqp), \ 565 TP_ARGS(dqp),
535 TP_ARGS(dqp), \ 566 TP_STRUCT__entry(
536 TP_STRUCT__entry( \ 567 __field(dev_t, dev)
537 __field(dev_t, dev) \ 568 __field(__be32, id)
538 __field(__be32, id) \ 569 __field(unsigned, flags)
539 __field(unsigned, flags) \ 570 __field(unsigned, nrefs)
540 __field(unsigned, nrefs) \ 571 __field(unsigned long long, res_bcount)
541 __field(unsigned long long, res_bcount) \ 572 __field(unsigned long long, bcount)
542 __field(unsigned long long, bcount) \ 573 __field(unsigned long long, icount)
543 __field(unsigned long long, icount) \ 574 __field(unsigned long long, blk_hardlimit)
544 __field(unsigned long long, blk_hardlimit) \ 575 __field(unsigned long long, blk_softlimit)
545 __field(unsigned long long, blk_softlimit) \ 576 __field(unsigned long long, ino_hardlimit)
546 __field(unsigned long long, ino_hardlimit) \ 577 __field(unsigned long long, ino_softlimit)
547 __field(unsigned long long, ino_softlimit) \
548 ), \
549 TP_fast_assign( \
550 __entry->dev = dqp->q_mount->m_super->s_dev; \
551 __entry->id = dqp->q_core.d_id; \
552 __entry->flags = dqp->dq_flags; \
553 __entry->nrefs = dqp->q_nrefs; \
554 __entry->res_bcount = dqp->q_res_bcount; \
555 __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount); \
556 __entry->icount = be64_to_cpu(dqp->q_core.d_icount); \
557 __entry->blk_hardlimit = \
558 be64_to_cpu(dqp->q_core.d_blk_hardlimit); \
559 __entry->blk_softlimit = \
560 be64_to_cpu(dqp->q_core.d_blk_softlimit); \
561 __entry->ino_hardlimit = \
562 be64_to_cpu(dqp->q_core.d_ino_hardlimit); \
563 __entry->ino_softlimit = \
564 be64_to_cpu(dqp->q_core.d_ino_softlimit); \
565 ), \ 578 ), \
566 TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx " \ 579 TP_fast_assign(
567 "bcnt 0x%llx [hard 0x%llx | soft 0x%llx] " \ 580 __entry->dev = dqp->q_mount->m_super->s_dev;
568 "icnt 0x%llx [hard 0x%llx | soft 0x%llx]", \ 581 __entry->id = dqp->q_core.d_id;
569 MAJOR(__entry->dev), MINOR(__entry->dev), \ 582 __entry->flags = dqp->dq_flags;
570 be32_to_cpu(__entry->id), \ 583 __entry->nrefs = dqp->q_nrefs;
571 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS), \ 584 __entry->res_bcount = dqp->q_res_bcount;
572 __entry->nrefs, \ 585 __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
573 __entry->res_bcount, \ 586 __entry->icount = be64_to_cpu(dqp->q_core.d_icount);
574 __entry->bcount, \ 587 __entry->blk_hardlimit =
575 __entry->blk_hardlimit, \ 588 be64_to_cpu(dqp->q_core.d_blk_hardlimit);
576 __entry->blk_softlimit, \ 589 __entry->blk_softlimit =
577 __entry->icount, \ 590 be64_to_cpu(dqp->q_core.d_blk_softlimit);
578 __entry->ino_hardlimit, \ 591 __entry->ino_hardlimit =
579 __entry->ino_softlimit) \ 592 be64_to_cpu(dqp->q_core.d_ino_hardlimit);
593 __entry->ino_softlimit =
594 be64_to_cpu(dqp->q_core.d_ino_softlimit);
595 ),
596 TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
597 "bcnt 0x%llx [hard 0x%llx | soft 0x%llx] "
598 "icnt 0x%llx [hard 0x%llx | soft 0x%llx]",
599 MAJOR(__entry->dev), MINOR(__entry->dev),
600 be32_to_cpu(__entry->id),
601 __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
602 __entry->nrefs,
603 __entry->res_bcount,
604 __entry->bcount,
605 __entry->blk_hardlimit,
606 __entry->blk_softlimit,
607 __entry->icount,
608 __entry->ino_hardlimit,
609 __entry->ino_softlimit)
580) 610)
611
612#define DEFINE_DQUOT_EVENT(name) \
613DEFINE_EVENT(xfs_dquot_class, name, \
614 TP_PROTO(struct xfs_dquot *dqp), \
615 TP_ARGS(dqp))
581DEFINE_DQUOT_EVENT(xfs_dqadjust); 616DEFINE_DQUOT_EVENT(xfs_dqadjust);
582DEFINE_DQUOT_EVENT(xfs_dqshake_dirty); 617DEFINE_DQUOT_EVENT(xfs_dqshake_dirty);
583DEFINE_DQUOT_EVENT(xfs_dqshake_unlink); 618DEFINE_DQUOT_EVENT(xfs_dqshake_unlink);
@@ -610,72 +645,75 @@ DEFINE_DQUOT_EVENT(xfs_dqflush_done);
610DEFINE_IGET_EVENT(xfs_dquot_dqalloc); 645DEFINE_IGET_EVENT(xfs_dquot_dqalloc);
611DEFINE_IGET_EVENT(xfs_dquot_dqdetach); 646DEFINE_IGET_EVENT(xfs_dquot_dqdetach);
612 647
648DECLARE_EVENT_CLASS(xfs_loggrant_class,
649 TP_PROTO(struct log *log, struct xlog_ticket *tic),
650 TP_ARGS(log, tic),
651 TP_STRUCT__entry(
652 __field(dev_t, dev)
653 __field(unsigned, trans_type)
654 __field(char, ocnt)
655 __field(char, cnt)
656 __field(int, curr_res)
657 __field(int, unit_res)
658 __field(unsigned int, flags)
659 __field(void *, reserve_headq)
660 __field(void *, write_headq)
661 __field(int, grant_reserve_cycle)
662 __field(int, grant_reserve_bytes)
663 __field(int, grant_write_cycle)
664 __field(int, grant_write_bytes)
665 __field(int, curr_cycle)
666 __field(int, curr_block)
667 __field(xfs_lsn_t, tail_lsn)
668 ),
669 TP_fast_assign(
670 __entry->dev = log->l_mp->m_super->s_dev;
671 __entry->trans_type = tic->t_trans_type;
672 __entry->ocnt = tic->t_ocnt;
673 __entry->cnt = tic->t_cnt;
674 __entry->curr_res = tic->t_curr_res;
675 __entry->unit_res = tic->t_unit_res;
676 __entry->flags = tic->t_flags;
677 __entry->reserve_headq = log->l_reserve_headq;
678 __entry->write_headq = log->l_write_headq;
679 __entry->grant_reserve_cycle = log->l_grant_reserve_cycle;
680 __entry->grant_reserve_bytes = log->l_grant_reserve_bytes;
681 __entry->grant_write_cycle = log->l_grant_write_cycle;
682 __entry->grant_write_bytes = log->l_grant_write_bytes;
683 __entry->curr_cycle = log->l_curr_cycle;
684 __entry->curr_block = log->l_curr_block;
685 __entry->tail_lsn = log->l_tail_lsn;
686 ),
687 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
688 "t_unit_res %u t_flags %s reserve_headq 0x%p "
689 "write_headq 0x%p grant_reserve_cycle %d "
690 "grant_reserve_bytes %d grant_write_cycle %d "
691 "grant_write_bytes %d curr_cycle %d curr_block %d "
692 "tail_cycle %d tail_block %d",
693 MAJOR(__entry->dev), MINOR(__entry->dev),
694 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
695 __entry->ocnt,
696 __entry->cnt,
697 __entry->curr_res,
698 __entry->unit_res,
699 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
700 __entry->reserve_headq,
701 __entry->write_headq,
702 __entry->grant_reserve_cycle,
703 __entry->grant_reserve_bytes,
704 __entry->grant_write_cycle,
705 __entry->grant_write_bytes,
706 __entry->curr_cycle,
707 __entry->curr_block,
708 CYCLE_LSN(__entry->tail_lsn),
709 BLOCK_LSN(__entry->tail_lsn)
710 )
711)
613 712
614#define DEFINE_LOGGRANT_EVENT(tname) \ 713#define DEFINE_LOGGRANT_EVENT(name) \
615TRACE_EVENT(tname, \ 714DEFINE_EVENT(xfs_loggrant_class, name, \
616 TP_PROTO(struct log *log, struct xlog_ticket *tic), \ 715 TP_PROTO(struct log *log, struct xlog_ticket *tic), \
617 TP_ARGS(log, tic), \ 716 TP_ARGS(log, tic))
618 TP_STRUCT__entry( \
619 __field(dev_t, dev) \
620 __field(unsigned, trans_type) \
621 __field(char, ocnt) \
622 __field(char, cnt) \
623 __field(int, curr_res) \
624 __field(int, unit_res) \
625 __field(unsigned int, flags) \
626 __field(void *, reserve_headq) \
627 __field(void *, write_headq) \
628 __field(int, grant_reserve_cycle) \
629 __field(int, grant_reserve_bytes) \
630 __field(int, grant_write_cycle) \
631 __field(int, grant_write_bytes) \
632 __field(int, curr_cycle) \
633 __field(int, curr_block) \
634 __field(xfs_lsn_t, tail_lsn) \
635 ), \
636 TP_fast_assign( \
637 __entry->dev = log->l_mp->m_super->s_dev; \
638 __entry->trans_type = tic->t_trans_type; \
639 __entry->ocnt = tic->t_ocnt; \
640 __entry->cnt = tic->t_cnt; \
641 __entry->curr_res = tic->t_curr_res; \
642 __entry->unit_res = tic->t_unit_res; \
643 __entry->flags = tic->t_flags; \
644 __entry->reserve_headq = log->l_reserve_headq; \
645 __entry->write_headq = log->l_write_headq; \
646 __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; \
647 __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; \
648 __entry->grant_write_cycle = log->l_grant_write_cycle; \
649 __entry->grant_write_bytes = log->l_grant_write_bytes; \
650 __entry->curr_cycle = log->l_curr_cycle; \
651 __entry->curr_block = log->l_curr_block; \
652 __entry->tail_lsn = log->l_tail_lsn; \
653 ), \
654 TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " \
655 "t_unit_res %u t_flags %s reserve_headq 0x%p " \
656 "write_headq 0x%p grant_reserve_cycle %d " \
657 "grant_reserve_bytes %d grant_write_cycle %d " \
658 "grant_write_bytes %d curr_cycle %d curr_block %d " \
659 "tail_cycle %d tail_block %d", \
660 MAJOR(__entry->dev), MINOR(__entry->dev), \
661 __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES), \
662 __entry->ocnt, \
663 __entry->cnt, \
664 __entry->curr_res, \
665 __entry->unit_res, \
666 __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), \
667 __entry->reserve_headq, \
668 __entry->write_headq, \
669 __entry->grant_reserve_cycle, \
670 __entry->grant_reserve_bytes, \
671 __entry->grant_write_cycle, \
672 __entry->grant_write_bytes, \
673 __entry->curr_cycle, \
674 __entry->curr_block, \
675 CYCLE_LSN(__entry->tail_lsn), \
676 BLOCK_LSN(__entry->tail_lsn) \
677 ) \
678)
679DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); 717DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
680DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); 718DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
681DEFINE_LOGGRANT_EVENT(xfs_log_reserve); 719DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
@@ -815,7 +853,7 @@ TRACE_EVENT(name, \
815 ), \ 853 ), \
816 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \ 854 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
817 "offset 0x%llx count %zd flags %s " \ 855 "offset 0x%llx count %zd flags %s " \
818 "startoff 0x%llx startblock 0x%llx blockcount 0x%llx", \ 856 "startoff 0x%llx startblock %s blockcount 0x%llx", \
819 MAJOR(__entry->dev), MINOR(__entry->dev), \ 857 MAJOR(__entry->dev), MINOR(__entry->dev), \
820 __entry->ino, \ 858 __entry->ino, \
821 __entry->size, \ 859 __entry->size, \
@@ -824,7 +862,7 @@ TRACE_EVENT(name, \
824 __entry->count, \ 862 __entry->count, \
825 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \ 863 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \
826 __entry->startoff, \ 864 __entry->startoff, \
827 __entry->startblock, \ 865 xfs_fmtfsblock(__entry->startblock), \
828 __entry->blockcount) \ 866 __entry->blockcount) \
829) 867)
830DEFINE_IOMAP_EVENT(xfs_iomap_enter); 868DEFINE_IOMAP_EVENT(xfs_iomap_enter);
@@ -897,28 +935,32 @@ TRACE_EVENT(xfs_itruncate_start,
897 __entry->toss_finish) 935 __entry->toss_finish)
898); 936);
899 937
938DECLARE_EVENT_CLASS(xfs_itrunc_class,
939 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
940 TP_ARGS(ip, new_size),
941 TP_STRUCT__entry(
942 __field(dev_t, dev)
943 __field(xfs_ino_t, ino)
944 __field(xfs_fsize_t, size)
945 __field(xfs_fsize_t, new_size)
946 ),
947 TP_fast_assign(
948 __entry->dev = VFS_I(ip)->i_sb->s_dev;
949 __entry->ino = ip->i_ino;
950 __entry->size = ip->i_d.di_size;
951 __entry->new_size = new_size;
952 ),
953 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
954 MAJOR(__entry->dev), MINOR(__entry->dev),
955 __entry->ino,
956 __entry->size,
957 __entry->new_size)
958)
959
900#define DEFINE_ITRUNC_EVENT(name) \ 960#define DEFINE_ITRUNC_EVENT(name) \
901TRACE_EVENT(name, \ 961DEFINE_EVENT(xfs_itrunc_class, name, \
902 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ 962 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
903 TP_ARGS(ip, new_size), \ 963 TP_ARGS(ip, new_size))
904 TP_STRUCT__entry( \
905 __field(dev_t, dev) \
906 __field(xfs_ino_t, ino) \
907 __field(xfs_fsize_t, size) \
908 __field(xfs_fsize_t, new_size) \
909 ), \
910 TP_fast_assign( \
911 __entry->dev = VFS_I(ip)->i_sb->s_dev; \
912 __entry->ino = ip->i_ino; \
913 __entry->size = ip->i_d.di_size; \
914 __entry->new_size = new_size; \
915 ), \
916 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx", \
917 MAJOR(__entry->dev), MINOR(__entry->dev), \
918 __entry->ino, \
919 __entry->size, \
920 __entry->new_size) \
921)
922DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start); 964DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start);
923DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end); 965DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end);
924 966
@@ -1037,28 +1079,28 @@ TRACE_EVENT(xfs_alloc_unbusy,
1037 1079
1038TRACE_EVENT(xfs_alloc_busysearch, 1080TRACE_EVENT(xfs_alloc_busysearch,
1039 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, 1081 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1040 xfs_extlen_t len, int found), 1082 xfs_extlen_t len, xfs_lsn_t lsn),
1041 TP_ARGS(mp, agno, agbno, len, found), 1083 TP_ARGS(mp, agno, agbno, len, lsn),
1042 TP_STRUCT__entry( 1084 TP_STRUCT__entry(
1043 __field(dev_t, dev) 1085 __field(dev_t, dev)
1044 __field(xfs_agnumber_t, agno) 1086 __field(xfs_agnumber_t, agno)
1045 __field(xfs_agblock_t, agbno) 1087 __field(xfs_agblock_t, agbno)
1046 __field(xfs_extlen_t, len) 1088 __field(xfs_extlen_t, len)
1047 __field(int, found) 1089 __field(xfs_lsn_t, lsn)
1048 ), 1090 ),
1049 TP_fast_assign( 1091 TP_fast_assign(
1050 __entry->dev = mp->m_super->s_dev; 1092 __entry->dev = mp->m_super->s_dev;
1051 __entry->agno = agno; 1093 __entry->agno = agno;
1052 __entry->agbno = agbno; 1094 __entry->agbno = agbno;
1053 __entry->len = len; 1095 __entry->len = len;
1054 __entry->found = found; 1096 __entry->lsn = lsn;
1055 ), 1097 ),
1056 TP_printk("dev %d:%d agno %u agbno %u len %u %s", 1098 TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx",
1057 MAJOR(__entry->dev), MINOR(__entry->dev), 1099 MAJOR(__entry->dev), MINOR(__entry->dev),
1058 __entry->agno, 1100 __entry->agno,
1059 __entry->agbno, 1101 __entry->agbno,
1060 __entry->len, 1102 __entry->len,
1061 __print_symbolic(__entry->found, XFS_BUSY_STATES)) 1103 __entry->lsn)
1062); 1104);
1063 1105
1064TRACE_EVENT(xfs_agf, 1106TRACE_EVENT(xfs_agf,
@@ -1152,77 +1194,80 @@ TRACE_EVENT(xfs_free_extent,
1152 1194
1153); 1195);
1154 1196
1155#define DEFINE_ALLOC_EVENT(name) \ 1197DECLARE_EVENT_CLASS(xfs_alloc_class,
1156TRACE_EVENT(name, \ 1198 TP_PROTO(struct xfs_alloc_arg *args),
1157 TP_PROTO(struct xfs_alloc_arg *args), \ 1199 TP_ARGS(args),
1158 TP_ARGS(args), \ 1200 TP_STRUCT__entry(
1159 TP_STRUCT__entry( \ 1201 __field(dev_t, dev)
1160 __field(dev_t, dev) \ 1202 __field(xfs_agnumber_t, agno)
1161 __field(xfs_agnumber_t, agno) \ 1203 __field(xfs_agblock_t, agbno)
1162 __field(xfs_agblock_t, agbno) \ 1204 __field(xfs_extlen_t, minlen)
1163 __field(xfs_extlen_t, minlen) \ 1205 __field(xfs_extlen_t, maxlen)
1164 __field(xfs_extlen_t, maxlen) \ 1206 __field(xfs_extlen_t, mod)
1165 __field(xfs_extlen_t, mod) \ 1207 __field(xfs_extlen_t, prod)
1166 __field(xfs_extlen_t, prod) \ 1208 __field(xfs_extlen_t, minleft)
1167 __field(xfs_extlen_t, minleft) \ 1209 __field(xfs_extlen_t, total)
1168 __field(xfs_extlen_t, total) \ 1210 __field(xfs_extlen_t, alignment)
1169 __field(xfs_extlen_t, alignment) \ 1211 __field(xfs_extlen_t, minalignslop)
1170 __field(xfs_extlen_t, minalignslop) \ 1212 __field(xfs_extlen_t, len)
1171 __field(xfs_extlen_t, len) \ 1213 __field(short, type)
1172 __field(short, type) \ 1214 __field(short, otype)
1173 __field(short, otype) \ 1215 __field(char, wasdel)
1174 __field(char, wasdel) \ 1216 __field(char, wasfromfl)
1175 __field(char, wasfromfl) \ 1217 __field(char, isfl)
1176 __field(char, isfl) \ 1218 __field(char, userdata)
1177 __field(char, userdata) \ 1219 __field(xfs_fsblock_t, firstblock)
1178 __field(xfs_fsblock_t, firstblock) \ 1220 ),
1179 ), \ 1221 TP_fast_assign(
1180 TP_fast_assign( \ 1222 __entry->dev = args->mp->m_super->s_dev;
1181 __entry->dev = args->mp->m_super->s_dev; \ 1223 __entry->agno = args->agno;
1182 __entry->agno = args->agno; \ 1224 __entry->agbno = args->agbno;
1183 __entry->agbno = args->agbno; \ 1225 __entry->minlen = args->minlen;
1184 __entry->minlen = args->minlen; \ 1226 __entry->maxlen = args->maxlen;
1185 __entry->maxlen = args->maxlen; \ 1227 __entry->mod = args->mod;
1186 __entry->mod = args->mod; \ 1228 __entry->prod = args->prod;
1187 __entry->prod = args->prod; \ 1229 __entry->minleft = args->minleft;
1188 __entry->minleft = args->minleft; \ 1230 __entry->total = args->total;
1189 __entry->total = args->total; \ 1231 __entry->alignment = args->alignment;
1190 __entry->alignment = args->alignment; \ 1232 __entry->minalignslop = args->minalignslop;
1191 __entry->minalignslop = args->minalignslop; \ 1233 __entry->len = args->len;
1192 __entry->len = args->len; \ 1234 __entry->type = args->type;
1193 __entry->type = args->type; \ 1235 __entry->otype = args->otype;
1194 __entry->otype = args->otype; \ 1236 __entry->wasdel = args->wasdel;
1195 __entry->wasdel = args->wasdel; \ 1237 __entry->wasfromfl = args->wasfromfl;
1196 __entry->wasfromfl = args->wasfromfl; \ 1238 __entry->isfl = args->isfl;
1197 __entry->isfl = args->isfl; \ 1239 __entry->userdata = args->userdata;
1198 __entry->userdata = args->userdata; \ 1240 __entry->firstblock = args->firstblock;
1199 __entry->firstblock = args->firstblock; \ 1241 ),
1200 ), \ 1242 TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
1201 TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " \ 1243 "prod %u minleft %u total %u alignment %u minalignslop %u "
1202 "prod %u minleft %u total %u alignment %u minalignslop %u " \ 1244 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
1203 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d " \ 1245 "userdata %d firstblock 0x%llx",
1204 "userdata %d firstblock 0x%llx", \ 1246 MAJOR(__entry->dev), MINOR(__entry->dev),
1205 MAJOR(__entry->dev), MINOR(__entry->dev), \ 1247 __entry->agno,
1206 __entry->agno, \ 1248 __entry->agbno,
1207 __entry->agbno, \ 1249 __entry->minlen,
1208 __entry->minlen, \ 1250 __entry->maxlen,
1209 __entry->maxlen, \ 1251 __entry->mod,
1210 __entry->mod, \ 1252 __entry->prod,
1211 __entry->prod, \ 1253 __entry->minleft,
1212 __entry->minleft, \ 1254 __entry->total,
1213 __entry->total, \ 1255 __entry->alignment,
1214 __entry->alignment, \ 1256 __entry->minalignslop,
1215 __entry->minalignslop, \ 1257 __entry->len,
1216 __entry->len, \ 1258 __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
1217 __print_symbolic(__entry->type, XFS_ALLOC_TYPES), \ 1259 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
1218 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES), \ 1260 __entry->wasdel,
1219 __entry->wasdel, \ 1261 __entry->wasfromfl,
1220 __entry->wasfromfl, \ 1262 __entry->isfl,
1221 __entry->isfl, \ 1263 __entry->userdata,
1222 __entry->userdata, \ 1264 __entry->firstblock)
1223 __entry->firstblock) \
1224) 1265)
1225 1266
1267#define DEFINE_ALLOC_EVENT(name) \
1268DEFINE_EVENT(xfs_alloc_class, name, \
1269 TP_PROTO(struct xfs_alloc_arg *args), \
1270 TP_ARGS(args))
1226DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); 1271DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
1227DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); 1272DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
1228DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); 1273DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
@@ -1245,92 +1290,100 @@ DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
1245DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); 1290DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
1246DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); 1291DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
1247 1292
1248#define DEFINE_DIR2_TRACE(tname) \ 1293DECLARE_EVENT_CLASS(xfs_dir2_class,
1249TRACE_EVENT(tname, \ 1294 TP_PROTO(struct xfs_da_args *args),
1295 TP_ARGS(args),
1296 TP_STRUCT__entry(
1297 __field(dev_t, dev)
1298 __field(xfs_ino_t, ino)
1299 __dynamic_array(char, name, args->namelen)
1300 __field(int, namelen)
1301 __field(xfs_dahash_t, hashval)
1302 __field(xfs_ino_t, inumber)
1303 __field(int, op_flags)
1304 ),
1305 TP_fast_assign(
1306 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1307 __entry->ino = args->dp->i_ino;
1308 if (args->namelen)
1309 memcpy(__get_str(name), args->name, args->namelen);
1310 __entry->namelen = args->namelen;
1311 __entry->hashval = args->hashval;
1312 __entry->inumber = args->inumber;
1313 __entry->op_flags = args->op_flags;
1314 ),
1315 TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
1316 "inumber 0x%llx op_flags %s",
1317 MAJOR(__entry->dev), MINOR(__entry->dev),
1318 __entry->ino,
1319 __entry->namelen,
1320 __entry->namelen ? __get_str(name) : NULL,
1321 __entry->namelen,
1322 __entry->hashval,
1323 __entry->inumber,
1324 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
1325)
1326
1327#define DEFINE_DIR2_EVENT(name) \
1328DEFINE_EVENT(xfs_dir2_class, name, \
1250 TP_PROTO(struct xfs_da_args *args), \ 1329 TP_PROTO(struct xfs_da_args *args), \
1251 TP_ARGS(args), \ 1330 TP_ARGS(args))
1252 TP_STRUCT__entry( \ 1331DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
1253 __field(dev_t, dev) \ 1332DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
1254 __field(xfs_ino_t, ino) \ 1333DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
1255 __dynamic_array(char, name, args->namelen) \ 1334DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
1256 __field(int, namelen) \ 1335DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
1257 __field(xfs_dahash_t, hashval) \ 1336DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
1258 __field(xfs_ino_t, inumber) \ 1337DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
1259 __field(int, op_flags) \ 1338DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
1260 ), \ 1339DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
1261 TP_fast_assign( \ 1340DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
1262 __entry->dev = VFS_I(args->dp)->i_sb->s_dev; \ 1341DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
1263 __entry->ino = args->dp->i_ino; \ 1342DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
1264 if (args->namelen) \ 1343DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
1265 memcpy(__get_str(name), args->name, args->namelen); \ 1344DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
1266 __entry->namelen = args->namelen; \ 1345DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
1267 __entry->hashval = args->hashval; \ 1346DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
1268 __entry->inumber = args->inumber; \ 1347DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
1269 __entry->op_flags = args->op_flags; \ 1348DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
1270 ), \ 1349DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
1271 TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x " \ 1350DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
1272 "inumber 0x%llx op_flags %s", \ 1351DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
1273 MAJOR(__entry->dev), MINOR(__entry->dev), \ 1352DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
1274 __entry->ino, \ 1353DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
1275 __entry->namelen, \ 1354DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
1276 __entry->namelen ? __get_str(name) : NULL, \ 1355DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
1277 __entry->namelen, \ 1356
1278 __entry->hashval, \ 1357DECLARE_EVENT_CLASS(xfs_dir2_space_class,
1279 __entry->inumber, \ 1358 TP_PROTO(struct xfs_da_args *args, int idx),
1280 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS)) \ 1359 TP_ARGS(args, idx),
1360 TP_STRUCT__entry(
1361 __field(dev_t, dev)
1362 __field(xfs_ino_t, ino)
1363 __field(int, op_flags)
1364 __field(int, idx)
1365 ),
1366 TP_fast_assign(
1367 __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
1368 __entry->ino = args->dp->i_ino;
1369 __entry->op_flags = args->op_flags;
1370 __entry->idx = idx;
1371 ),
1372 TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
1373 MAJOR(__entry->dev), MINOR(__entry->dev),
1374 __entry->ino,
1375 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
1376 __entry->idx)
1281) 1377)
1282DEFINE_DIR2_TRACE(xfs_dir2_sf_addname);
1283DEFINE_DIR2_TRACE(xfs_dir2_sf_create);
1284DEFINE_DIR2_TRACE(xfs_dir2_sf_lookup);
1285DEFINE_DIR2_TRACE(xfs_dir2_sf_replace);
1286DEFINE_DIR2_TRACE(xfs_dir2_sf_removename);
1287DEFINE_DIR2_TRACE(xfs_dir2_sf_toino4);
1288DEFINE_DIR2_TRACE(xfs_dir2_sf_toino8);
1289DEFINE_DIR2_TRACE(xfs_dir2_sf_to_block);
1290DEFINE_DIR2_TRACE(xfs_dir2_block_addname);
1291DEFINE_DIR2_TRACE(xfs_dir2_block_lookup);
1292DEFINE_DIR2_TRACE(xfs_dir2_block_replace);
1293DEFINE_DIR2_TRACE(xfs_dir2_block_removename);
1294DEFINE_DIR2_TRACE(xfs_dir2_block_to_sf);
1295DEFINE_DIR2_TRACE(xfs_dir2_block_to_leaf);
1296DEFINE_DIR2_TRACE(xfs_dir2_leaf_addname);
1297DEFINE_DIR2_TRACE(xfs_dir2_leaf_lookup);
1298DEFINE_DIR2_TRACE(xfs_dir2_leaf_replace);
1299DEFINE_DIR2_TRACE(xfs_dir2_leaf_removename);
1300DEFINE_DIR2_TRACE(xfs_dir2_leaf_to_block);
1301DEFINE_DIR2_TRACE(xfs_dir2_leaf_to_node);
1302DEFINE_DIR2_TRACE(xfs_dir2_node_addname);
1303DEFINE_DIR2_TRACE(xfs_dir2_node_lookup);
1304DEFINE_DIR2_TRACE(xfs_dir2_node_replace);
1305DEFINE_DIR2_TRACE(xfs_dir2_node_removename);
1306DEFINE_DIR2_TRACE(xfs_dir2_node_to_leaf);
1307 1378
1308#define DEFINE_DIR2_SPACE_TRACE(tname) \ 1379#define DEFINE_DIR2_SPACE_EVENT(name) \
1309TRACE_EVENT(tname, \ 1380DEFINE_EVENT(xfs_dir2_space_class, name, \
1310 TP_PROTO(struct xfs_da_args *args, int idx), \ 1381 TP_PROTO(struct xfs_da_args *args, int idx), \
1311 TP_ARGS(args, idx), \ 1382 TP_ARGS(args, idx))
1312 TP_STRUCT__entry( \ 1383DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
1313 __field(dev_t, dev) \ 1384DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
1314 __field(xfs_ino_t, ino) \ 1385DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
1315 __field(int, op_flags) \ 1386DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
1316 __field(int, idx) \
1317 ), \
1318 TP_fast_assign( \
1319 __entry->dev = VFS_I(args->dp)->i_sb->s_dev; \
1320 __entry->ino = args->dp->i_ino; \
1321 __entry->op_flags = args->op_flags; \
1322 __entry->idx = idx; \
1323 ), \
1324 TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d", \
1325 MAJOR(__entry->dev), MINOR(__entry->dev), \
1326 __entry->ino, \
1327 __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), \
1328 __entry->idx) \
1329)
1330DEFINE_DIR2_SPACE_TRACE(xfs_dir2_leafn_add);
1331DEFINE_DIR2_SPACE_TRACE(xfs_dir2_leafn_remove);
1332DEFINE_DIR2_SPACE_TRACE(xfs_dir2_grow_inode);
1333DEFINE_DIR2_SPACE_TRACE(xfs_dir2_shrink_inode);
1334 1387
1335TRACE_EVENT(xfs_dir2_leafn_moveents, 1388TRACE_EVENT(xfs_dir2_leafn_moveents,
1336 TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count), 1389 TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 71af76fe8a23..873e07e29074 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -891,7 +891,7 @@ xfs_qm_dqrele_all_inodes(
891 uint flags) 891 uint flags)
892{ 892{
893 ASSERT(mp->m_quotainfo); 893 ASSERT(mp->m_quotainfo);
894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG); 894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
895} 895}
896 896
897/*------------------------------------------------------------------------*/ 897/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index a1c65fc6d9c4..275b1f4f9430 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2563,43 +2563,41 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
2563 xfs_mount_t *mp; 2563 xfs_mount_t *mp;
2564 xfs_perag_busy_t *bsy; 2564 xfs_perag_busy_t *bsy;
2565 xfs_agblock_t uend, bend; 2565 xfs_agblock_t uend, bend;
2566 xfs_lsn_t lsn; 2566 xfs_lsn_t lsn = 0;
2567 int cnt; 2567 int cnt;
2568 2568
2569 mp = tp->t_mountp; 2569 mp = tp->t_mountp;
2570 2570
2571 spin_lock(&mp->m_perag[agno].pagb_lock); 2571 spin_lock(&mp->m_perag[agno].pagb_lock);
2572 cnt = mp->m_perag[agno].pagb_count;
2573 2572
2574 uend = bno + len - 1; 2573 uend = bno + len - 1;
2575 2574
2576 /* search pagb_list for this slot, skipping open slots */ 2575 /*
2577 for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) { 2576 * search pagb_list for this slot, skipping open slots. We have to
2577 * search the entire array as there may be multiple overlaps and
2578 * we have to get the most recent LSN for the log force to push out
2579 * all the transactions that span the range.
2580 */
2581 for (cnt = 0; cnt < mp->m_perag[agno].pagb_count; cnt++) {
2582 bsy = &mp->m_perag[agno].pagb_list[cnt];
2583 if (!bsy->busy_tp)
2584 continue;
2578 2585
2579 /* 2586 bend = bsy->busy_start + bsy->busy_length - 1;
2580 * (start1,length1) within (start2, length2) 2587 if (bno > bend || uend < bsy->busy_start)
2581 */ 2588 continue;
2582 if (bsy->busy_tp != NULL) {
2583 bend = bsy->busy_start + bsy->busy_length - 1;
2584 if ((bno > bend) || (uend < bsy->busy_start)) {
2585 cnt--;
2586 } else {
2587 break;
2588 }
2589 }
2590 }
2591 2589
2592 trace_xfs_alloc_busysearch(mp, agno, bno, len, !!cnt); 2590 /* (start1,length1) within (start2, length2) */
2591 if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0)
2592 lsn = bsy->busy_tp->t_commit_lsn;
2593 }
2594 spin_unlock(&mp->m_perag[agno].pagb_lock);
2595 trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn);
2593 2596
2594 /* 2597 /*
2595 * If a block was found, force the log through the LSN of the 2598 * If a block was found, force the log through the LSN of the
2596 * transaction that freed the block 2599 * transaction that freed the block
2597 */ 2600 */
2598 if (cnt) { 2601 if (lsn)
2599 lsn = bsy->busy_tp->t_commit_lsn;
2600 spin_unlock(&mp->m_perag[agno].pagb_lock);
2601 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); 2602 xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
2602 } else {
2603 spin_unlock(&mp->m_perag[agno].pagb_lock);
2604 }
2605} 2603}
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index d1483a4f71b8..84ca1cf16a1e 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -114,10 +114,82 @@ xfs_swapext(
114 return error; 114 return error;
115} 115}
116 116
117/*
118 * We need to check that the format of the data fork in the temporary inode is
119 * valid for the target inode before doing the swap. This is not a problem with
120 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
121 * data fork depending on the space the attribute fork is taking so we can get
122 * invalid formats on the target inode.
123 *
124 * E.g. target has space for 7 extents in extent format, temp inode only has
125 * space for 6. If we defragment down to 7 extents, then the tmp format is a
126 * btree, but when swapped it needs to be in extent format. Hence we can't just
127 * blindly swap data forks on attr2 filesystems.
128 *
129 * Note that we check the swap in both directions so that we don't end up with
130 * a corrupt temporary inode, either.
131 *
132 * Note that fixing the way xfs_fsr sets up the attribute fork in the source
133 * inode will prevent this situation from occurring, so all we do here is
134 * reject and log the attempt. basically we are putting the responsibility on
135 * userspace to get this right.
136 */
137static int
138xfs_swap_extents_check_format(
139 xfs_inode_t *ip, /* target inode */
140 xfs_inode_t *tip) /* tmp inode */
141{
142
143 /* Should never get a local format */
144 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
145 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
146 return EINVAL;
147
148 /*
149 * if the target inode has less extents that then temporary inode then
150 * why did userspace call us?
151 */
152 if (ip->i_d.di_nextents < tip->i_d.di_nextents)
153 return EINVAL;
154
155 /*
156 * if the target inode is in extent form and the temp inode is in btree
157 * form then we will end up with the target inode in the wrong format
158 * as we already know there are less extents in the temp inode.
159 */
160 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
161 tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
162 return EINVAL;
163
164 /* Check temp in extent form to max in target */
165 if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
166 XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max)
167 return EINVAL;
168
169 /* Check target in extent form to max in temp */
170 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
171 XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
172 return EINVAL;
173
174 /* Check root block of temp in btree form to max in target */
175 if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
176 XFS_IFORK_BOFF(ip) &&
177 tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
178 return EINVAL;
179
180 /* Check root block of target in btree form to max in temp */
181 if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
182 XFS_IFORK_BOFF(tip) &&
183 ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
184 return EINVAL;
185
186 return 0;
187}
188
117int 189int
118xfs_swap_extents( 190xfs_swap_extents(
119 xfs_inode_t *ip, 191 xfs_inode_t *ip, /* target inode */
120 xfs_inode_t *tip, 192 xfs_inode_t *tip, /* tmp inode */
121 xfs_swapext_t *sxp) 193 xfs_swapext_t *sxp)
122{ 194{
123 xfs_mount_t *mp; 195 xfs_mount_t *mp;
@@ -161,13 +233,6 @@ xfs_swap_extents(
161 goto out_unlock; 233 goto out_unlock;
162 } 234 }
163 235
164 /* Should never get a local format */
165 if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
166 tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
167 error = XFS_ERROR(EINVAL);
168 goto out_unlock;
169 }
170
171 if (VN_CACHED(VFS_I(tip)) != 0) { 236 if (VN_CACHED(VFS_I(tip)) != 0) {
172 error = xfs_flushinval_pages(tip, 0, -1, 237 error = xfs_flushinval_pages(tip, 0, -1,
173 FI_REMAPF_LOCKED); 238 FI_REMAPF_LOCKED);
@@ -189,13 +254,12 @@ xfs_swap_extents(
189 goto out_unlock; 254 goto out_unlock;
190 } 255 }
191 256
192 /* 257 /* check inode formats now that data is flushed */
193 * If the target has extended attributes, the tmp file 258 error = xfs_swap_extents_check_format(ip, tip);
194 * must also in order to ensure the correct data fork 259 if (error) {
195 * format. 260 xfs_fs_cmn_err(CE_NOTE, mp,
196 */ 261 "%s: inode 0x%llx format is incompatible for exchanging.",
197 if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { 262 __FILE__, ip->i_ino);
198 error = XFS_ERROR(EINVAL);
199 goto out_unlock; 263 goto out_unlock;
200 } 264 }
201 265
@@ -276,6 +340,16 @@ xfs_swap_extents(
276 *tifp = *tempifp; /* struct copy */ 340 *tifp = *tempifp; /* struct copy */
277 341
278 /* 342 /*
343 * Fix the in-memory data fork values that are dependent on the fork
344 * offset in the inode. We can't assume they remain the same as attr2
345 * has dynamic fork offsets.
346 */
347 ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) /
348 (uint)sizeof(xfs_bmbt_rec_t);
349 tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) /
350 (uint)sizeof(xfs_bmbt_rec_t);
351
352 /*
279 * Fix the on-disk inode values 353 * Fix the on-disk inode values
280 */ 354 */
281 tmp = (__uint64_t)ip->i_d.di_nblocks; 355 tmp = (__uint64_t)ip->i_d.di_nblocks;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index fa402a6bbbcf..155e798f30a1 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -73,7 +73,6 @@ xfs_inode_alloc(
73 ASSERT(atomic_read(&ip->i_pincount) == 0); 73 ASSERT(atomic_read(&ip->i_pincount) == 0);
74 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 74 ASSERT(!spin_is_locked(&ip->i_flags_lock));
75 ASSERT(completion_done(&ip->i_flush)); 75 ASSERT(completion_done(&ip->i_flush));
76 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
77 76
78 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); 77 mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
79 78
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ce278b3ae7fc..ef77fd88c8e3 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2841,8 +2841,8 @@ xfs_iflush(
2841 mp = ip->i_mount; 2841 mp = ip->i_mount;
2842 2842
2843 /* 2843 /*
2844 * If the inode isn't dirty, then just release the inode 2844 * If the inode isn't dirty, then just release the inode flush lock and
2845 * flush lock and do nothing. 2845 * do nothing.
2846 */ 2846 */
2847 if (xfs_inode_clean(ip)) { 2847 if (xfs_inode_clean(ip)) {
2848 xfs_ifunlock(ip); 2848 xfs_ifunlock(ip);
@@ -2868,6 +2868,19 @@ xfs_iflush(
2868 xfs_iunpin_wait(ip); 2868 xfs_iunpin_wait(ip);
2869 2869
2870 /* 2870 /*
2871 * For stale inodes we cannot rely on the backing buffer remaining
2872 * stale in cache for the remaining life of the stale inode and so
2873 * xfs_itobp() below may give us a buffer that no longer contains
2874 * inodes below. We have to check this after ensuring the inode is
2875 * unpinned so that it is safe to reclaim the stale inode after the
2876 * flush call.
2877 */
2878 if (xfs_iflags_test(ip, XFS_ISTALE)) {
2879 xfs_ifunlock(ip);
2880 return 0;
2881 }
2882
2883 /*
2871 * This may have been unpinned because the filesystem is shutting 2884 * This may have been unpinned because the filesystem is shutting
2872 * down forcibly. If that's the case we must not write this inode 2885 * down forcibly. If that's the case we must not write this inode
2873 * to disk, because the log record didn't make it to disk! 2886 * to disk, because the log record didn't make it to disk!
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 9e15a1185362..6be05f756d59 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1517,6 +1517,8 @@ xfs_rtfree_range(
1517 */ 1517 */
1518 error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, 1518 error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
1519 &postblock); 1519 &postblock);
1520 if (error)
1521 return error;
1520 /* 1522 /*
1521 * If there are blocks not being freed at the front of the 1523 * If there are blocks not being freed at the front of the
1522 * old extent, add summary data for them to be allocated. 1524 * old extent, add summary data for them to be allocated.
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6558ffd8d140..6f268756bf36 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -70,7 +70,6 @@ xfs_setattr(
70 uint commit_flags=0; 70 uint commit_flags=0;
71 uid_t uid=0, iuid=0; 71 uid_t uid=0, iuid=0;
72 gid_t gid=0, igid=0; 72 gid_t gid=0, igid=0;
73 int timeflags = 0;
74 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 73 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
75 int need_iolock = 1; 74 int need_iolock = 1;
76 75
@@ -135,16 +134,13 @@ xfs_setattr(
135 if (flags & XFS_ATTR_NOLOCK) 134 if (flags & XFS_ATTR_NOLOCK)
136 need_iolock = 0; 135 need_iolock = 0;
137 if (!(mask & ATTR_SIZE)) { 136 if (!(mask & ATTR_SIZE)) {
138 if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) || 137 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
139 (mp->m_flags & XFS_MOUNT_WSYNC)) { 138 commit_flags = 0;
140 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 139 code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp),
141 commit_flags = 0; 140 0, 0, 0);
142 if ((code = xfs_trans_reserve(tp, 0, 141 if (code) {
143 XFS_ICHANGE_LOG_RES(mp), 0, 142 lock_flags = 0;
144 0, 0))) { 143 goto error_return;
145 lock_flags = 0;
146 goto error_return;
147 }
148 } 144 }
149 } else { 145 } else {
150 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && 146 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
@@ -295,15 +291,23 @@ xfs_setattr(
295 * or we are explicitly asked to change it. This handles 291 * or we are explicitly asked to change it. This handles
296 * the semantic difference between truncate() and ftruncate() 292 * the semantic difference between truncate() and ftruncate()
297 * as implemented in the VFS. 293 * as implemented in the VFS.
294 *
295 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME
296 * is a special case where we need to update the times despite
297 * not having these flags set. For all other operations the
298 * VFS set these flags explicitly if it wants a timestamp
299 * update.
298 */ 300 */
299 if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME)) 301 if (iattr->ia_size != ip->i_size &&
300 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 302 (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
303 iattr->ia_ctime = iattr->ia_mtime =
304 current_fs_time(inode->i_sb);
305 mask |= ATTR_CTIME | ATTR_MTIME;
306 }
301 307
302 if (iattr->ia_size > ip->i_size) { 308 if (iattr->ia_size > ip->i_size) {
303 ip->i_d.di_size = iattr->ia_size; 309 ip->i_d.di_size = iattr->ia_size;
304 ip->i_size = iattr->ia_size; 310 ip->i_size = iattr->ia_size;
305 if (!(flags & XFS_ATTR_DMI))
306 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
307 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 311 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
308 } else if (iattr->ia_size <= ip->i_size || 312 } else if (iattr->ia_size <= ip->i_size ||
309 (iattr->ia_size == 0 && ip->i_d.di_nextents)) { 313 (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
@@ -374,9 +378,6 @@ xfs_setattr(
374 ip->i_d.di_gid = gid; 378 ip->i_d.di_gid = gid;
375 inode->i_gid = gid; 379 inode->i_gid = gid;
376 } 380 }
377
378 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
379 timeflags |= XFS_ICHGTIME_CHG;
380 } 381 }
381 382
382 /* 383 /*
@@ -393,51 +394,37 @@ xfs_setattr(
393 394
394 inode->i_mode &= S_IFMT; 395 inode->i_mode &= S_IFMT;
395 inode->i_mode |= mode & ~S_IFMT; 396 inode->i_mode |= mode & ~S_IFMT;
396
397 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
398 timeflags |= XFS_ICHGTIME_CHG;
399 } 397 }
400 398
401 /* 399 /*
402 * Change file access or modified times. 400 * Change file access or modified times.
403 */ 401 */
404 if (mask & (ATTR_ATIME|ATTR_MTIME)) { 402 if (mask & ATTR_ATIME) {
405 if (mask & ATTR_ATIME) { 403 inode->i_atime = iattr->ia_atime;
406 inode->i_atime = iattr->ia_atime; 404 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
407 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; 405 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
408 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; 406 ip->i_update_core = 1;
409 ip->i_update_core = 1;
410 }
411 if (mask & ATTR_MTIME) {
412 inode->i_mtime = iattr->ia_mtime;
413 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
414 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
415 timeflags &= ~XFS_ICHGTIME_MOD;
416 timeflags |= XFS_ICHGTIME_CHG;
417 }
418 if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)))
419 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
420 } 407 }
421 408 if (mask & ATTR_CTIME) {
422 /*
423 * Change file inode change time only if ATTR_CTIME set
424 * AND we have been called by a DMI function.
425 */
426
427 if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
428 inode->i_ctime = iattr->ia_ctime; 409 inode->i_ctime = iattr->ia_ctime;
429 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; 410 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
430 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; 411 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
431 ip->i_update_core = 1; 412 ip->i_update_core = 1;
432 timeflags &= ~XFS_ICHGTIME_CHG; 413 }
414 if (mask & ATTR_MTIME) {
415 inode->i_mtime = iattr->ia_mtime;
416 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
417 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
418 ip->i_update_core = 1;
433 } 419 }
434 420
435 /* 421 /*
436 * Send out timestamp changes that need to be set to the 422 * And finally, log the inode core if any attribute in it
437 * current time. Not done when called by a DMI function. 423 * has been changed.
438 */ 424 */
439 if (timeflags && !(flags & XFS_ATTR_DMI)) 425 if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE|
440 xfs_ichgtime(ip, timeflags); 426 ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
427 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
441 428
442 XFS_STATS_INC(xs_ig_attrchg); 429 XFS_STATS_INC(xs_ig_attrchg);
443 430
@@ -452,12 +439,10 @@ xfs_setattr(
452 * mix so this probably isn't worth the trouble to optimize. 439 * mix so this probably isn't worth the trouble to optimize.
453 */ 440 */
454 code = 0; 441 code = 0;
455 if (tp) { 442 if (mp->m_flags & XFS_MOUNT_WSYNC)
456 if (mp->m_flags & XFS_MOUNT_WSYNC) 443 xfs_trans_set_sync(tp);
457 xfs_trans_set_sync(tp);
458 444
459 code = xfs_trans_commit(tp, commit_flags); 445 code = xfs_trans_commit(tp, commit_flags);
460 }
461 446
462 xfs_iunlock(ip, lock_flags); 447 xfs_iunlock(ip, lock_flags);
463 448