aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/binfmt_flat.c6
-rw-r--r--fs/btrfs/ctree.c129
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/ioctl.c20
-rw-r--r--fs/cifs/cifsfs.c16
-rw-r--r--fs/cifs/cifsproto.h1
-rw-r--r--fs/cifs/dir.c76
-rw-r--r--fs/cifs/file.c100
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/cifs/sess.c10
-rw-r--r--fs/dcache.c4
-rw-r--r--fs/ext2/acl.c1
-rw-r--r--fs/ext3/acl.c1
-rw-r--r--fs/fcntl.c6
-rw-r--r--fs/fs-writeback.c465
-rw-r--r--fs/gfs2/bmap.c1
-rw-r--r--fs/gfs2/dir.c2
-rw-r--r--fs/gfs2/glock.c12
-rw-r--r--fs/gfs2/inode.c12
-rw-r--r--fs/gfs2/quota.c10
-rw-r--r--fs/gfs2/quota.h2
-rw-r--r--fs/inode.c2
-rw-r--r--fs/jbd2/journal.c15
-rw-r--r--fs/jbd2/transaction.c9
-rw-r--r--fs/mbcache.c5
-rw-r--r--fs/nfs/client.c122
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/internal.h3
-rw-r--r--fs/nfs/nfs4xdr.c4
-rw-r--r--fs/nfs/super.c22
-rw-r--r--fs/ocfs2/aops.c94
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c3
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c22
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/file.c309
-rw-r--r--fs/ocfs2/file.h6
-rw-r--r--fs/ocfs2/journal.c30
-rw-r--r--fs/ocfs2/localalloc.c7
-rw-r--r--fs/ocfs2/quota_global.c2
-rw-r--r--fs/ocfs2/quota_local.c4
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/reservations.c1
-rw-r--r--fs/ocfs2/suballoc.c2
-rw-r--r--fs/ocfs2/xattr.c200
-rw-r--r--fs/partitions/ibm.c14
-rw-r--r--fs/proc/proc_devtree.c3
-rw-r--r--fs/proc/task_nommu.c20
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/splice.c9
-rw-r--r--fs/super.c6
-rw-r--r--fs/sysv/ialloc.c6
-rw-r--r--fs/ubifs/budget.c2
-rw-r--r--fs/ubifs/shrinker.c2
-rw-r--r--fs/ubifs/ubifs.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c130
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h3
-rw-r--r--fs/xfs/quota/xfs_qm.c25
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c27
-rw-r--r--fs/xfs/xfs_dfrag.c5
-rw-r--r--fs/xfs/xfs_ialloc.c142
-rw-r--r--fs/xfs/xfs_iget.c10
-rw-r--r--fs/xfs/xfs_inode.c5
-rw-r--r--fs/xfs/xfs_inode.h6
-rw-r--r--fs/xfs/xfs_itable.c285
-rw-r--r--fs/xfs/xfs_itable.h17
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--fs/xfs/xfs_mount.c2
-rw-r--r--fs/xfs/xfs_mount.h2
-rw-r--r--fs/xfs/xfs_rtalloc.c4
-rw-r--r--fs/xfs/xfs_trans_inode.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c2
79 files changed, 1304 insertions, 1234 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 3dab9e9948d0..722743b152d8 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -680,7 +680,6 @@ int afs_writeback_all(struct afs_vnode *vnode)
680{ 680{
681 struct address_space *mapping = vnode->vfs_inode.i_mapping; 681 struct address_space *mapping = vnode->vfs_inode.i_mapping;
682 struct writeback_control wbc = { 682 struct writeback_control wbc = {
683 .bdi = mapping->backing_dev_info,
684 .sync_mode = WB_SYNC_ALL, 683 .sync_mode = WB_SYNC_ALL,
685 .nr_to_write = LONG_MAX, 684 .nr_to_write = LONG_MAX,
686 .range_cyclic = 1, 685 .range_cyclic = 1,
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index b6ab27ccf214..811384bec8de 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -68,11 +68,7 @@
68 * Here we can be a bit looser than the data sections since this 68 * Here we can be a bit looser than the data sections since this
69 * needs to only meet arch ABI requirements. 69 * needs to only meet arch ABI requirements.
70 */ 70 */
71#ifdef ARCH_SLAB_MINALIGN 71#define FLAT_STACK_ALIGN max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN)
72#define FLAT_STACK_ALIGN (ARCH_SLAB_MINALIGN)
73#else
74#define FLAT_STACK_ALIGN (sizeof(void *))
75#endif
76 72
77#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */ 73#define RELOC_FAILED 0xff00ff01 /* Relocation incorrect somewhere */
78#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */ 74#define UNLOADED_LIB 0x7ff000ff /* Placeholder for unused library */
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 0d1d966b0fe4..c3df14ce2cc2 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2304,12 +2304,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
2304 return ret; 2304 return ret;
2305} 2305}
2306 2306
2307/*
2308 * min slot controls the lowest index we're willing to push to the
2309 * right. We'll push up to and including min_slot, but no lower
2310 */
2307static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, 2311static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2308 struct btrfs_root *root, 2312 struct btrfs_root *root,
2309 struct btrfs_path *path, 2313 struct btrfs_path *path,
2310 int data_size, int empty, 2314 int data_size, int empty,
2311 struct extent_buffer *right, 2315 struct extent_buffer *right,
2312 int free_space, u32 left_nritems) 2316 int free_space, u32 left_nritems,
2317 u32 min_slot)
2313{ 2318{
2314 struct extent_buffer *left = path->nodes[0]; 2319 struct extent_buffer *left = path->nodes[0];
2315 struct extent_buffer *upper = path->nodes[1]; 2320 struct extent_buffer *upper = path->nodes[1];
@@ -2327,7 +2332,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
2327 if (empty) 2332 if (empty)
2328 nr = 0; 2333 nr = 0;
2329 else 2334 else
2330 nr = 1; 2335 nr = max_t(u32, 1, min_slot);
2331 2336
2332 if (path->slots[0] >= left_nritems) 2337 if (path->slots[0] >= left_nritems)
2333 push_space += data_size; 2338 push_space += data_size;
@@ -2469,10 +2474,14 @@ out_unlock:
2469 * 2474 *
2470 * returns 1 if the push failed because the other node didn't have enough 2475 * returns 1 if the push failed because the other node didn't have enough
2471 * room, 0 if everything worked out and < 0 if there were major errors. 2476 * room, 0 if everything worked out and < 0 if there were major errors.
2477 *
2478 * this will push starting from min_slot to the end of the leaf. It won't
2479 * push any slot lower than min_slot
2472 */ 2480 */
2473static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root 2481static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2474 *root, struct btrfs_path *path, int data_size, 2482 *root, struct btrfs_path *path,
2475 int empty) 2483 int min_data_size, int data_size,
2484 int empty, u32 min_slot)
2476{ 2485{
2477 struct extent_buffer *left = path->nodes[0]; 2486 struct extent_buffer *left = path->nodes[0];
2478 struct extent_buffer *right; 2487 struct extent_buffer *right;
@@ -2514,8 +2523,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2514 if (left_nritems == 0) 2523 if (left_nritems == 0)
2515 goto out_unlock; 2524 goto out_unlock;
2516 2525
2517 return __push_leaf_right(trans, root, path, data_size, empty, 2526 return __push_leaf_right(trans, root, path, min_data_size, empty,
2518 right, free_space, left_nritems); 2527 right, free_space, left_nritems, min_slot);
2519out_unlock: 2528out_unlock:
2520 btrfs_tree_unlock(right); 2529 btrfs_tree_unlock(right);
2521 free_extent_buffer(right); 2530 free_extent_buffer(right);
@@ -2525,12 +2534,17 @@ out_unlock:
2525/* 2534/*
2526 * push some data in the path leaf to the left, trying to free up at 2535 * push some data in the path leaf to the left, trying to free up at
2527 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2536 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2537 *
2538 * max_slot can put a limit on how far into the leaf we'll push items. The
2539 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
2540 * items
2528 */ 2541 */
2529static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, 2542static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2530 struct btrfs_root *root, 2543 struct btrfs_root *root,
2531 struct btrfs_path *path, int data_size, 2544 struct btrfs_path *path, int data_size,
2532 int empty, struct extent_buffer *left, 2545 int empty, struct extent_buffer *left,
2533 int free_space, int right_nritems) 2546 int free_space, u32 right_nritems,
2547 u32 max_slot)
2534{ 2548{
2535 struct btrfs_disk_key disk_key; 2549 struct btrfs_disk_key disk_key;
2536 struct extent_buffer *right = path->nodes[0]; 2550 struct extent_buffer *right = path->nodes[0];
@@ -2549,9 +2563,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2549 slot = path->slots[1]; 2563 slot = path->slots[1];
2550 2564
2551 if (empty) 2565 if (empty)
2552 nr = right_nritems; 2566 nr = min(right_nritems, max_slot);
2553 else 2567 else
2554 nr = right_nritems - 1; 2568 nr = min(right_nritems - 1, max_slot);
2555 2569
2556 for (i = 0; i < nr; i++) { 2570 for (i = 0; i < nr; i++) {
2557 item = btrfs_item_nr(right, i); 2571 item = btrfs_item_nr(right, i);
@@ -2712,10 +2726,14 @@ out:
2712/* 2726/*
2713 * push some data in the path leaf to the left, trying to free up at 2727 * push some data in the path leaf to the left, trying to free up at
2714 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2728 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2729 *
2730 * max_slot can put a limit on how far into the leaf we'll push items. The
2731 * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
2732 * items
2715 */ 2733 */
2716static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root 2734static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2717 *root, struct btrfs_path *path, int data_size, 2735 *root, struct btrfs_path *path, int min_data_size,
2718 int empty) 2736 int data_size, int empty, u32 max_slot)
2719{ 2737{
2720 struct extent_buffer *right = path->nodes[0]; 2738 struct extent_buffer *right = path->nodes[0];
2721 struct extent_buffer *left; 2739 struct extent_buffer *left;
@@ -2761,8 +2779,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2761 goto out; 2779 goto out;
2762 } 2780 }
2763 2781
2764 return __push_leaf_left(trans, root, path, data_size, 2782 return __push_leaf_left(trans, root, path, min_data_size,
2765 empty, left, free_space, right_nritems); 2783 empty, left, free_space, right_nritems,
2784 max_slot);
2766out: 2785out:
2767 btrfs_tree_unlock(left); 2786 btrfs_tree_unlock(left);
2768 free_extent_buffer(left); 2787 free_extent_buffer(left);
@@ -2855,6 +2874,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
2855} 2874}
2856 2875
2857/* 2876/*
2877 * double splits happen when we need to insert a big item in the middle
2878 * of a leaf. A double split can leave us with 3 mostly empty leaves:
2879 * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
2880 * A B C
2881 *
2882 * We avoid this by trying to push the items on either side of our target
2883 * into the adjacent leaves. If all goes well we can avoid the double split
2884 * completely.
2885 */
2886static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
2887 struct btrfs_root *root,
2888 struct btrfs_path *path,
2889 int data_size)
2890{
2891 int ret;
2892 int progress = 0;
2893 int slot;
2894 u32 nritems;
2895
2896 slot = path->slots[0];
2897
2898 /*
2899 * try to push all the items after our slot into the
2900 * right leaf
2901 */
2902 ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
2903 if (ret < 0)
2904 return ret;
2905
2906 if (ret == 0)
2907 progress++;
2908
2909 nritems = btrfs_header_nritems(path->nodes[0]);
2910 /*
2911 * our goal is to get our slot at the start or end of a leaf. If
2912 * we've done so we're done
2913 */
2914 if (path->slots[0] == 0 || path->slots[0] == nritems)
2915 return 0;
2916
2917 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
2918 return 0;
2919
2920 /* try to push all the items before our slot into the next leaf */
2921 slot = path->slots[0];
2922 ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
2923 if (ret < 0)
2924 return ret;
2925
2926 if (ret == 0)
2927 progress++;
2928
2929 if (progress)
2930 return 0;
2931 return 1;
2932}
2933
2934/*
2858 * split the path's leaf in two, making sure there is at least data_size 2935 * split the path's leaf in two, making sure there is at least data_size
2859 * available for the resulting leaf level of the path. 2936 * available for the resulting leaf level of the path.
2860 * 2937 *
@@ -2876,6 +2953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2876 int wret; 2953 int wret;
2877 int split; 2954 int split;
2878 int num_doubles = 0; 2955 int num_doubles = 0;
2956 int tried_avoid_double = 0;
2879 2957
2880 l = path->nodes[0]; 2958 l = path->nodes[0];
2881 slot = path->slots[0]; 2959 slot = path->slots[0];
@@ -2884,12 +2962,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
2884 return -EOVERFLOW; 2962 return -EOVERFLOW;
2885 2963
2886 /* first try to make some room by pushing left and right */ 2964 /* first try to make some room by pushing left and right */
2887 if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { 2965 if (data_size) {
2888 wret = push_leaf_right(trans, root, path, data_size, 0); 2966 wret = push_leaf_right(trans, root, path, data_size,
2967 data_size, 0, 0);
2889 if (wret < 0) 2968 if (wret < 0)
2890 return wret; 2969 return wret;
2891 if (wret) { 2970 if (wret) {
2892 wret = push_leaf_left(trans, root, path, data_size, 0); 2971 wret = push_leaf_left(trans, root, path, data_size,
2972 data_size, 0, (u32)-1);
2893 if (wret < 0) 2973 if (wret < 0)
2894 return wret; 2974 return wret;
2895 } 2975 }
@@ -2923,6 +3003,8 @@ again:
2923 if (mid != nritems && 3003 if (mid != nritems &&
2924 leaf_space_used(l, mid, nritems - mid) + 3004 leaf_space_used(l, mid, nritems - mid) +
2925 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 3005 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
3006 if (data_size && !tried_avoid_double)
3007 goto push_for_double;
2926 split = 2; 3008 split = 2;
2927 } 3009 }
2928 } 3010 }
@@ -2939,6 +3021,8 @@ again:
2939 if (mid != nritems && 3021 if (mid != nritems &&
2940 leaf_space_used(l, mid, nritems - mid) + 3022 leaf_space_used(l, mid, nritems - mid) +
2941 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 3023 data_size > BTRFS_LEAF_DATA_SIZE(root)) {
3024 if (data_size && !tried_avoid_double)
3025 goto push_for_double;
2942 split = 2 ; 3026 split = 2 ;
2943 } 3027 }
2944 } 3028 }
@@ -3019,6 +3103,13 @@ again:
3019 } 3103 }
3020 3104
3021 return ret; 3105 return ret;
3106
3107push_for_double:
3108 push_for_double_split(trans, root, path, data_size);
3109 tried_avoid_double = 1;
3110 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size)
3111 return 0;
3112 goto again;
3022} 3113}
3023 3114
3024static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, 3115static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
@@ -3915,13 +4006,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3915 extent_buffer_get(leaf); 4006 extent_buffer_get(leaf);
3916 4007
3917 btrfs_set_path_blocking(path); 4008 btrfs_set_path_blocking(path);
3918 wret = push_leaf_left(trans, root, path, 1, 1); 4009 wret = push_leaf_left(trans, root, path, 1, 1,
4010 1, (u32)-1);
3919 if (wret < 0 && wret != -ENOSPC) 4011 if (wret < 0 && wret != -ENOSPC)
3920 ret = wret; 4012 ret = wret;
3921 4013
3922 if (path->nodes[0] == leaf && 4014 if (path->nodes[0] == leaf &&
3923 btrfs_header_nritems(leaf)) { 4015 btrfs_header_nritems(leaf)) {
3924 wret = push_leaf_right(trans, root, path, 1, 1); 4016 wret = push_leaf_right(trans, root, path, 1,
4017 1, 1, 0);
3925 if (wret < 0 && wret != -ENOSPC) 4018 if (wret < 0 && wret != -ENOSPC)
3926 ret = wret; 4019 ret = wret;
3927 } 4020 }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a4080c21ec55..d74e6af9b53a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2594,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2594 .sync_io = wbc->sync_mode == WB_SYNC_ALL, 2594 .sync_io = wbc->sync_mode == WB_SYNC_ALL,
2595 }; 2595 };
2596 struct writeback_control wbc_writepages = { 2596 struct writeback_control wbc_writepages = {
2597 .bdi = wbc->bdi,
2598 .sync_mode = wbc->sync_mode, 2597 .sync_mode = wbc->sync_mode,
2599 .older_than_this = NULL, 2598 .older_than_this = NULL,
2600 .nr_to_write = 64, 2599 .nr_to_write = 64,
@@ -2628,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
2628 .sync_io = mode == WB_SYNC_ALL, 2627 .sync_io = mode == WB_SYNC_ALL,
2629 }; 2628 };
2630 struct writeback_control wbc_writepages = { 2629 struct writeback_control wbc_writepages = {
2631 .bdi = inode->i_mapping->backing_dev_info,
2632 .sync_mode = mode, 2630 .sync_mode = mode,
2633 .older_than_this = NULL, 2631 .older_than_this = NULL,
2634 .nr_to_write = nr_pages * 2, 2632 .nr_to_write = nr_pages * 2,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4dbaf89b1337..9254b3d58dbe 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1458,7 +1458,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1458 */ 1458 */
1459 1459
1460 /* the destination must be opened for writing */ 1460 /* the destination must be opened for writing */
1461 if (!(file->f_mode & FMODE_WRITE)) 1461 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
1462 return -EINVAL; 1462 return -EINVAL;
1463 1463
1464 ret = mnt_want_write(file->f_path.mnt); 1464 ret = mnt_want_write(file->f_path.mnt);
@@ -1511,7 +1511,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1511 1511
1512 /* determine range to clone */ 1512 /* determine range to clone */
1513 ret = -EINVAL; 1513 ret = -EINVAL;
1514 if (off >= src->i_size || off + len > src->i_size) 1514 if (off + len > src->i_size || off + len < off)
1515 goto out_unlock; 1515 goto out_unlock;
1516 if (len == 0) 1516 if (len == 0)
1517 olen = len = src->i_size - off; 1517 olen = len = src->i_size - off;
@@ -1578,6 +1578,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1578 u64 disko = 0, diskl = 0; 1578 u64 disko = 0, diskl = 0;
1579 u64 datao = 0, datal = 0; 1579 u64 datao = 0, datal = 0;
1580 u8 comp; 1580 u8 comp;
1581 u64 endoff;
1581 1582
1582 size = btrfs_item_size_nr(leaf, slot); 1583 size = btrfs_item_size_nr(leaf, slot);
1583 read_extent_buffer(leaf, buf, 1584 read_extent_buffer(leaf, buf,
@@ -1712,9 +1713,18 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1712 btrfs_release_path(root, path); 1713 btrfs_release_path(root, path);
1713 1714
1714 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 1715 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1715 if (new_key.offset + datal > inode->i_size) 1716
1716 btrfs_i_size_write(inode, 1717 /*
1717 new_key.offset + datal); 1718 * we round up to the block size at eof when
1719 * determining which extents to clone above,
1720 * but shouldn't round up the file size
1721 */
1722 endoff = new_key.offset + datal;
1723 if (endoff > off+olen)
1724 endoff = off+olen;
1725 if (endoff > inode->i_size)
1726 btrfs_i_size_write(inode, endoff);
1727
1718 BTRFS_I(inode)->flags = BTRFS_I(src)->flags; 1728 BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
1719 ret = btrfs_update_inode(trans, root, inode); 1729 ret = btrfs_update_inode(trans, root, inode);
1720 BUG_ON(ret); 1730 BUG_ON(ret);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 78c02eb4cb1f..484e52bb40bb 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -473,14 +473,24 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
473 return 0; 473 return 0;
474} 474}
475 475
476void cifs_drop_inode(struct inode *inode)
477{
478 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
479
480 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
481 return generic_drop_inode(inode);
482
483 return generic_delete_inode(inode);
484}
485
476static const struct super_operations cifs_super_ops = { 486static const struct super_operations cifs_super_ops = {
477 .put_super = cifs_put_super, 487 .put_super = cifs_put_super,
478 .statfs = cifs_statfs, 488 .statfs = cifs_statfs,
479 .alloc_inode = cifs_alloc_inode, 489 .alloc_inode = cifs_alloc_inode,
480 .destroy_inode = cifs_destroy_inode, 490 .destroy_inode = cifs_destroy_inode,
481/* .drop_inode = generic_delete_inode, 491 .drop_inode = cifs_drop_inode,
482 .delete_inode = cifs_delete_inode, */ /* Do not need above two 492/* .delete_inode = cifs_delete_inode, */ /* Do not need above
483 functions unless later we add lazy close of inodes or unless the 493 function unless later we add lazy close of inodes or unless the
484 kernel forgets to call us with the same number of releases (closes) 494 kernel forgets to call us with the same number of releases (closes)
485 as opens */ 495 as opens */
486 .show_options = cifs_show_options, 496 .show_options = cifs_show_options,
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index fb1657e0fdb8..fb6318b81509 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -106,7 +106,6 @@ extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode,
106 __u16 fileHandle, struct file *file, 106 __u16 fileHandle, struct file *file,
107 struct vfsmount *mnt, unsigned int oflags); 107 struct vfsmount *mnt, unsigned int oflags);
108extern int cifs_posix_open(char *full_path, struct inode **pinode, 108extern int cifs_posix_open(char *full_path, struct inode **pinode,
109 struct vfsmount *mnt,
110 struct super_block *sb, 109 struct super_block *sb,
111 int mode, int oflags, 110 int mode, int oflags,
112 __u32 *poplock, __u16 *pnetfid, int xid); 111 __u32 *poplock, __u16 *pnetfid, int xid);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 391816b461ca..e7ae78b66fa1 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -25,6 +25,7 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/namei.h> 26#include <linux/namei.h>
27#include <linux/mount.h> 27#include <linux/mount.h>
28#include <linux/file.h>
28#include "cifsfs.h" 29#include "cifsfs.h"
29#include "cifspdu.h" 30#include "cifspdu.h"
30#include "cifsglob.h" 31#include "cifsglob.h"
@@ -184,12 +185,13 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
184 } 185 }
185 write_unlock(&GlobalSMBSeslock); 186 write_unlock(&GlobalSMBSeslock);
186 187
188 file->private_data = pCifsFile;
189
187 return pCifsFile; 190 return pCifsFile;
188} 191}
189 192
190int cifs_posix_open(char *full_path, struct inode **pinode, 193int cifs_posix_open(char *full_path, struct inode **pinode,
191 struct vfsmount *mnt, struct super_block *sb, 194 struct super_block *sb, int mode, int oflags,
192 int mode, int oflags,
193 __u32 *poplock, __u16 *pnetfid, int xid) 195 __u32 *poplock, __u16 *pnetfid, int xid)
194{ 196{
195 int rc; 197 int rc;
@@ -258,19 +260,6 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
258 cifs_fattr_to_inode(*pinode, &fattr); 260 cifs_fattr_to_inode(*pinode, &fattr);
259 } 261 }
260 262
261 /*
262 * cifs_fill_filedata() takes care of setting cifsFileInfo pointer to
263 * file->private_data.
264 */
265 if (mnt) {
266 struct cifsFileInfo *pfile_info;
267
268 pfile_info = cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt,
269 oflags);
270 if (pfile_info == NULL)
271 rc = -ENOMEM;
272 }
273
274posix_open_ret: 263posix_open_ret:
275 kfree(presp_data); 264 kfree(presp_data);
276 return rc; 265 return rc;
@@ -298,7 +287,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
298 int create_options = CREATE_NOT_DIR; 287 int create_options = CREATE_NOT_DIR;
299 __u32 oplock = 0; 288 __u32 oplock = 0;
300 int oflags; 289 int oflags;
301 bool posix_create = false;
302 /* 290 /*
303 * BB below access is probably too much for mknod to request 291 * BB below access is probably too much for mknod to request
304 * but we have to do query and setpathinfo so requesting 292 * but we have to do query and setpathinfo so requesting
@@ -339,7 +327,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
339 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 327 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
340 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 328 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
341 rc = cifs_posix_open(full_path, &newinode, 329 rc = cifs_posix_open(full_path, &newinode,
342 nd ? nd->path.mnt : NULL,
343 inode->i_sb, mode, oflags, &oplock, &fileHandle, xid); 330 inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
344 /* EIO could indicate that (posix open) operation is not 331 /* EIO could indicate that (posix open) operation is not
345 supported, despite what server claimed in capability 332 supported, despite what server claimed in capability
@@ -347,7 +334,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
347 handled in posix open */ 334 handled in posix open */
348 335
349 if (rc == 0) { 336 if (rc == 0) {
350 posix_create = true;
351 if (newinode == NULL) /* query inode info */ 337 if (newinode == NULL) /* query inode info */
352 goto cifs_create_get_file_info; 338 goto cifs_create_get_file_info;
353 else /* success, no need to query */ 339 else /* success, no need to query */
@@ -478,21 +464,28 @@ cifs_create_set_dentry:
478 else 464 else
479 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); 465 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
480 466
481 /* nfsd case - nfs srv does not set nd */ 467 if (newinode && nd && (nd->flags & LOOKUP_OPEN)) {
482 if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) {
483 /* mknod case - do not leave file open */
484 CIFSSMBClose(xid, tcon, fileHandle);
485 } else if (!(posix_create) && (newinode)) {
486 struct cifsFileInfo *pfile_info; 468 struct cifsFileInfo *pfile_info;
487 /* 469 struct file *filp;
488 * cifs_fill_filedata() takes care of setting cifsFileInfo 470
489 * pointer to file->private_data. 471 filp = lookup_instantiate_filp(nd, direntry, generic_file_open);
490 */ 472 if (IS_ERR(filp)) {
491 pfile_info = cifs_new_fileinfo(newinode, fileHandle, NULL, 473 rc = PTR_ERR(filp);
474 CIFSSMBClose(xid, tcon, fileHandle);
475 goto cifs_create_out;
476 }
477
478 pfile_info = cifs_new_fileinfo(newinode, fileHandle, filp,
492 nd->path.mnt, oflags); 479 nd->path.mnt, oflags);
493 if (pfile_info == NULL) 480 if (pfile_info == NULL) {
481 fput(filp);
482 CIFSSMBClose(xid, tcon, fileHandle);
494 rc = -ENOMEM; 483 rc = -ENOMEM;
484 }
485 } else {
486 CIFSSMBClose(xid, tcon, fileHandle);
495 } 487 }
488
496cifs_create_out: 489cifs_create_out:
497 kfree(buf); 490 kfree(buf);
498 kfree(full_path); 491 kfree(full_path);
@@ -636,6 +629,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
636 bool posix_open = false; 629 bool posix_open = false;
637 struct cifs_sb_info *cifs_sb; 630 struct cifs_sb_info *cifs_sb;
638 struct cifsTconInfo *pTcon; 631 struct cifsTconInfo *pTcon;
632 struct cifsFileInfo *cfile;
639 struct inode *newInode = NULL; 633 struct inode *newInode = NULL;
640 char *full_path = NULL; 634 char *full_path = NULL;
641 struct file *filp; 635 struct file *filp;
@@ -703,7 +697,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
703 if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && 697 if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
704 (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && 698 (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
705 (nd->intent.open.flags & O_CREAT)) { 699 (nd->intent.open.flags & O_CREAT)) {
706 rc = cifs_posix_open(full_path, &newInode, nd->path.mnt, 700 rc = cifs_posix_open(full_path, &newInode,
707 parent_dir_inode->i_sb, 701 parent_dir_inode->i_sb,
708 nd->intent.open.create_mode, 702 nd->intent.open.create_mode,
709 nd->intent.open.flags, &oplock, 703 nd->intent.open.flags, &oplock,
@@ -733,8 +727,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
733 else 727 else
734 direntry->d_op = &cifs_dentry_ops; 728 direntry->d_op = &cifs_dentry_ops;
735 d_add(direntry, newInode); 729 d_add(direntry, newInode);
736 if (posix_open) 730 if (posix_open) {
737 filp = lookup_instantiate_filp(nd, direntry, NULL); 731 filp = lookup_instantiate_filp(nd, direntry,
732 generic_file_open);
733 if (IS_ERR(filp)) {
734 rc = PTR_ERR(filp);
735 CIFSSMBClose(xid, pTcon, fileHandle);
736 goto lookup_out;
737 }
738
739 cfile = cifs_new_fileinfo(newInode, fileHandle, filp,
740 nd->path.mnt,
741 nd->intent.open.flags);
742 if (cfile == NULL) {
743 fput(filp);
744 CIFSSMBClose(xid, pTcon, fileHandle);
745 rc = -ENOMEM;
746 goto lookup_out;
747 }
748 }
738 /* since paths are not looked up by component - the parent 749 /* since paths are not looked up by component - the parent
739 directories are presumed to be good here */ 750 directories are presumed to be good here */
740 renew_parental_timestamps(direntry); 751 renew_parental_timestamps(direntry);
@@ -755,6 +766,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
755 is a common return code */ 766 is a common return code */
756 } 767 }
757 768
769lookup_out:
758 kfree(full_path); 770 kfree(full_path);
759 FreeXid(xid); 771 FreeXid(xid);
760 return ERR_PTR(rc); 772 return ERR_PTR(rc);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 75541af4b3db..409e4f523e61 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -162,44 +162,12 @@ psx_client_can_cache:
162 return 0; 162 return 0;
163} 163}
164 164
165static struct cifsFileInfo *
166cifs_fill_filedata(struct file *file)
167{
168 struct list_head *tmp;
169 struct cifsFileInfo *pCifsFile = NULL;
170 struct cifsInodeInfo *pCifsInode = NULL;
171
172 /* search inode for this file and fill in file->private_data */
173 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
174 read_lock(&GlobalSMBSeslock);
175 list_for_each(tmp, &pCifsInode->openFileList) {
176 pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
177 if ((pCifsFile->pfile == NULL) &&
178 (pCifsFile->pid == current->tgid)) {
179 /* mode set in cifs_create */
180
181 /* needed for writepage */
182 pCifsFile->pfile = file;
183 file->private_data = pCifsFile;
184 break;
185 }
186 }
187 read_unlock(&GlobalSMBSeslock);
188
189 if (file->private_data != NULL) {
190 return pCifsFile;
191 } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
192 cERROR(1, "could not find file instance for "
193 "new file %p", file);
194 return NULL;
195}
196
197/* all arguments to this function must be checked for validity in caller */ 165/* all arguments to this function must be checked for validity in caller */
198static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, 166static inline int cifs_open_inode_helper(struct inode *inode,
199 struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
200 struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf, 167 struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
201 char *full_path, int xid) 168 char *full_path, int xid)
202{ 169{
170 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
203 struct timespec temp; 171 struct timespec temp;
204 int rc; 172 int rc;
205 173
@@ -213,36 +181,35 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
213 /* if not oplocked, invalidate inode pages if mtime or file 181 /* if not oplocked, invalidate inode pages if mtime or file
214 size changed */ 182 size changed */
215 temp = cifs_NTtimeToUnix(buf->LastWriteTime); 183 temp = cifs_NTtimeToUnix(buf->LastWriteTime);
216 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) && 184 if (timespec_equal(&inode->i_mtime, &temp) &&
217 (file->f_path.dentry->d_inode->i_size == 185 (inode->i_size ==
218 (loff_t)le64_to_cpu(buf->EndOfFile))) { 186 (loff_t)le64_to_cpu(buf->EndOfFile))) {
219 cFYI(1, "inode unchanged on server"); 187 cFYI(1, "inode unchanged on server");
220 } else { 188 } else {
221 if (file->f_path.dentry->d_inode->i_mapping) { 189 if (inode->i_mapping) {
222 /* BB no need to lock inode until after invalidate 190 /* BB no need to lock inode until after invalidate
223 since namei code should already have it locked? */ 191 since namei code should already have it locked? */
224 rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping); 192 rc = filemap_write_and_wait(inode->i_mapping);
225 if (rc != 0) 193 if (rc != 0)
226 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc; 194 pCifsInode->write_behind_rc = rc;
227 } 195 }
228 cFYI(1, "invalidating remote inode since open detected it " 196 cFYI(1, "invalidating remote inode since open detected it "
229 "changed"); 197 "changed");
230 invalidate_remote_inode(file->f_path.dentry->d_inode); 198 invalidate_remote_inode(inode);
231 } 199 }
232 200
233client_can_cache: 201client_can_cache:
234 if (pTcon->unix_ext) 202 if (pTcon->unix_ext)
235 rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode, 203 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
236 full_path, inode->i_sb, xid); 204 xid);
237 else 205 else
238 rc = cifs_get_inode_info(&file->f_path.dentry->d_inode, 206 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
239 full_path, buf, inode->i_sb, xid, NULL); 207 xid, NULL);
240 208
241 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) { 209 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
242 pCifsInode->clientCanCacheAll = true; 210 pCifsInode->clientCanCacheAll = true;
243 pCifsInode->clientCanCacheRead = true; 211 pCifsInode->clientCanCacheRead = true;
244 cFYI(1, "Exclusive Oplock granted on inode %p", 212 cFYI(1, "Exclusive Oplock granted on inode %p", inode);
245 file->f_path.dentry->d_inode);
246 } else if ((*oplock & 0xF) == OPLOCK_READ) 213 } else if ((*oplock & 0xF) == OPLOCK_READ)
247 pCifsInode->clientCanCacheRead = true; 214 pCifsInode->clientCanCacheRead = true;
248 215
@@ -256,7 +223,7 @@ int cifs_open(struct inode *inode, struct file *file)
256 __u32 oplock; 223 __u32 oplock;
257 struct cifs_sb_info *cifs_sb; 224 struct cifs_sb_info *cifs_sb;
258 struct cifsTconInfo *tcon; 225 struct cifsTconInfo *tcon;
259 struct cifsFileInfo *pCifsFile; 226 struct cifsFileInfo *pCifsFile = NULL;
260 struct cifsInodeInfo *pCifsInode; 227 struct cifsInodeInfo *pCifsInode;
261 char *full_path = NULL; 228 char *full_path = NULL;
262 int desiredAccess; 229 int desiredAccess;
@@ -270,12 +237,6 @@ int cifs_open(struct inode *inode, struct file *file)
270 tcon = cifs_sb->tcon; 237 tcon = cifs_sb->tcon;
271 238
272 pCifsInode = CIFS_I(file->f_path.dentry->d_inode); 239 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
273 pCifsFile = cifs_fill_filedata(file);
274 if (pCifsFile) {
275 rc = 0;
276 FreeXid(xid);
277 return rc;
278 }
279 240
280 full_path = build_path_from_dentry(file->f_path.dentry); 241 full_path = build_path_from_dentry(file->f_path.dentry);
281 if (full_path == NULL) { 242 if (full_path == NULL) {
@@ -299,8 +260,7 @@ int cifs_open(struct inode *inode, struct file *file)
299 int oflags = (int) cifs_posix_convert_flags(file->f_flags); 260 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
300 oflags |= SMB_O_CREAT; 261 oflags |= SMB_O_CREAT;
301 /* can not refresh inode info since size could be stale */ 262 /* can not refresh inode info since size could be stale */
302 rc = cifs_posix_open(full_path, &inode, file->f_path.mnt, 263 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
303 inode->i_sb,
304 cifs_sb->mnt_file_mode /* ignored */, 264 cifs_sb->mnt_file_mode /* ignored */,
305 oflags, &oplock, &netfid, xid); 265 oflags, &oplock, &netfid, xid);
306 if (rc == 0) { 266 if (rc == 0) {
@@ -308,9 +268,20 @@ int cifs_open(struct inode *inode, struct file *file)
308 /* no need for special case handling of setting mode 268 /* no need for special case handling of setting mode
309 on read only files needed here */ 269 on read only files needed here */
310 270
311 pCifsFile = cifs_fill_filedata(file); 271 rc = cifs_posix_open_inode_helper(inode, file,
312 cifs_posix_open_inode_helper(inode, file, pCifsInode, 272 pCifsInode, oplock, netfid);
313 oplock, netfid); 273 if (rc != 0) {
274 CIFSSMBClose(xid, tcon, netfid);
275 goto out;
276 }
277
278 pCifsFile = cifs_new_fileinfo(inode, netfid, file,
279 file->f_path.mnt,
280 oflags);
281 if (pCifsFile == NULL) {
282 CIFSSMBClose(xid, tcon, netfid);
283 rc = -ENOMEM;
284 }
314 goto out; 285 goto out;
315 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 286 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
316 if (tcon->ses->serverNOS) 287 if (tcon->ses->serverNOS)
@@ -391,17 +362,17 @@ int cifs_open(struct inode *inode, struct file *file)
391 goto out; 362 goto out;
392 } 363 }
393 364
365 rc = cifs_open_inode_helper(inode, tcon, &oplock, buf, full_path, xid);
366 if (rc != 0)
367 goto out;
368
394 pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt, 369 pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt,
395 file->f_flags); 370 file->f_flags);
396 file->private_data = pCifsFile; 371 if (pCifsFile == NULL) {
397 if (file->private_data == NULL) {
398 rc = -ENOMEM; 372 rc = -ENOMEM;
399 goto out; 373 goto out;
400 } 374 }
401 375
402 rc = cifs_open_inode_helper(inode, file, pCifsInode, pCifsFile, tcon,
403 &oplock, buf, full_path, xid);
404
405 if (oplock & CIFS_CREATE_ACTION) { 376 if (oplock & CIFS_CREATE_ACTION) {
406 /* time to set mode which we can not set earlier due to 377 /* time to set mode which we can not set earlier due to
407 problems creating new read-only files */ 378 problems creating new read-only files */
@@ -513,8 +484,7 @@ reopen_error_exit:
513 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 484 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
514 int oflags = (int) cifs_posix_convert_flags(file->f_flags); 485 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
515 /* can not refresh inode info since size could be stale */ 486 /* can not refresh inode info since size could be stale */
516 rc = cifs_posix_open(full_path, NULL, file->f_path.mnt, 487 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
517 inode->i_sb,
518 cifs_sb->mnt_file_mode /* ignored */, 488 cifs_sb->mnt_file_mode /* ignored */,
519 oflags, &oplock, &netfid, xid); 489 oflags, &oplock, &netfid, xid);
520 if (rc == 0) { 490 if (rc == 0) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 62b324f26a56..6f0683c68952 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1401,6 +1401,10 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath,
1401 if (rc == 0 || rc != -ETXTBSY) 1401 if (rc == 0 || rc != -ETXTBSY)
1402 return rc; 1402 return rc;
1403 1403
1404 /* open-file renames don't work across directories */
1405 if (to_dentry->d_parent != from_dentry->d_parent)
1406 return rc;
1407
1404 /* open the file to be renamed -- we need DELETE perms */ 1408 /* open the file to be renamed -- we need DELETE perms */
1405 rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE, 1409 rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE,
1406 CREATE_NOT_DIR, &srcfid, &oplock, NULL, 1410 CREATE_NOT_DIR, &srcfid, &oplock, NULL,
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7707389bdf2c..0a57cb7db5dd 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -730,15 +730,7 @@ ssetup_ntlmssp_authenticate:
730 730
731 /* calculate session key */ 731 /* calculate session key */
732 setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); 732 setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
733 if (first_time) /* should this be moved into common code 733 /* FIXME: calculate MAC key */
734 with similar ntlmv2 path? */
735 /* cifs_calculate_ntlmv2_mac_key(ses->server->mac_signing_key,
736 response BB FIXME, v2_sess_key); */
737
738 /* copy session key */
739
740 /* memcpy(bcc_ptr, (char *)ntlm_session_key,LM2_SESS_KEY_SIZE);
741 bcc_ptr += LM2_SESS_KEY_SIZE; */
742 memcpy(bcc_ptr, (char *)v2_sess_key, 734 memcpy(bcc_ptr, (char *)v2_sess_key,
743 sizeof(struct ntlmv2_resp)); 735 sizeof(struct ntlmv2_resp));
744 bcc_ptr += sizeof(struct ntlmv2_resp); 736 bcc_ptr += sizeof(struct ntlmv2_resp);
diff --git a/fs/dcache.c b/fs/dcache.c
index d96047b4a633..86d4db15473e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -590,6 +590,8 @@ static void prune_dcache(int count)
590 up_read(&sb->s_umount); 590 up_read(&sb->s_umount);
591 } 591 }
592 spin_lock(&sb_lock); 592 spin_lock(&sb_lock);
593 /* lock was dropped, must reset next */
594 list_safe_reset_next(sb, n, s_list);
593 count -= pruned; 595 count -= pruned;
594 __put_super(sb); 596 __put_super(sb);
595 /* more work left to do? */ 597 /* more work left to do? */
@@ -894,7 +896,7 @@ EXPORT_SYMBOL(shrink_dcache_parent);
894 * 896 *
895 * In this case we return -1 to tell the caller that we baled. 897 * In this case we return -1 to tell the caller that we baled.
896 */ 898 */
897static int shrink_dcache_memory(int nr, gfp_t gfp_mask) 899static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
898{ 900{
899 if (nr) { 901 if (nr) {
900 if (!(gfp_mask & __GFP_FS)) 902 if (!(gfp_mask & __GFP_FS))
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index ca7e2a0ed98a..2bcc0431bada 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -200,6 +200,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
200 return error; 200 return error;
201 else { 201 else {
202 inode->i_mode = mode; 202 inode->i_mode = mode;
203 inode->i_ctime = CURRENT_TIME_SEC;
203 mark_inode_dirty(inode); 204 mark_inode_dirty(inode);
204 if (error == 0) 205 if (error == 0)
205 acl = NULL; 206 acl = NULL;
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 01552abbca3c..8a11fe212183 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -205,6 +205,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
205 return error; 205 return error;
206 else { 206 else {
207 inode->i_mode = mode; 207 inode->i_mode = mode;
208 inode->i_ctime = CURRENT_TIME_SEC;
208 ext3_mark_inode_dirty(handle, inode); 209 ext3_mark_inode_dirty(handle, inode);
209 if (error == 0) 210 if (error == 0)
210 acl = NULL; 211 acl = NULL;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 51e11bf5708f..9d175d623aab 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -733,12 +733,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
733{ 733{
734 while (fa) { 734 while (fa) {
735 struct fown_struct *fown; 735 struct fown_struct *fown;
736 unsigned long flags;
737
736 if (fa->magic != FASYNC_MAGIC) { 738 if (fa->magic != FASYNC_MAGIC) {
737 printk(KERN_ERR "kill_fasync: bad magic number in " 739 printk(KERN_ERR "kill_fasync: bad magic number in "
738 "fasync_struct!\n"); 740 "fasync_struct!\n");
739 return; 741 return;
740 } 742 }
741 spin_lock(&fa->fa_lock); 743 spin_lock_irqsave(&fa->fa_lock, flags);
742 if (fa->fa_file) { 744 if (fa->fa_file) {
743 fown = &fa->fa_file->f_owner; 745 fown = &fa->fa_file->f_owner;
744 /* Don't send SIGURG to processes which have not set a 746 /* Don't send SIGURG to processes which have not set a
@@ -747,7 +749,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
747 if (!(sig == SIGURG && fown->signum == 0)) 749 if (!(sig == SIGURG && fown->signum == 0))
748 send_sigio(fown, fa->fa_fd, band); 750 send_sigio(fown, fa->fa_fd, band);
749 } 751 }
750 spin_unlock(&fa->fa_lock); 752 spin_unlock_irqrestore(&fa->fa_lock, flags);
751 fa = rcu_dereference(fa->fa_next); 753 fa = rcu_dereference(fa->fa_next);
752 } 754 }
753} 755}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1d1088f48bc2..d5be1693ac93 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -38,51 +38,18 @@ int nr_pdflush_threads;
38/* 38/*
39 * Passed into wb_writeback(), essentially a subset of writeback_control 39 * Passed into wb_writeback(), essentially a subset of writeback_control
40 */ 40 */
41struct wb_writeback_args { 41struct wb_writeback_work {
42 long nr_pages; 42 long nr_pages;
43 struct super_block *sb; 43 struct super_block *sb;
44 enum writeback_sync_modes sync_mode; 44 enum writeback_sync_modes sync_mode;
45 unsigned int for_kupdate:1; 45 unsigned int for_kupdate:1;
46 unsigned int range_cyclic:1; 46 unsigned int range_cyclic:1;
47 unsigned int for_background:1; 47 unsigned int for_background:1;
48};
49 48
50/*
51 * Work items for the bdi_writeback threads
52 */
53struct bdi_work {
54 struct list_head list; /* pending work list */ 49 struct list_head list; /* pending work list */
55 struct rcu_head rcu_head; /* for RCU free/clear of work */ 50 struct completion *done; /* set if the caller waits */
56
57 unsigned long seen; /* threads that have seen this work */
58 atomic_t pending; /* number of threads still to do work */
59
60 struct wb_writeback_args args; /* writeback arguments */
61
62 unsigned long state; /* flag bits, see WS_* */
63}; 51};
64 52
65enum {
66 WS_USED_B = 0,
67 WS_ONSTACK_B,
68};
69
70#define WS_USED (1 << WS_USED_B)
71#define WS_ONSTACK (1 << WS_ONSTACK_B)
72
73static inline bool bdi_work_on_stack(struct bdi_work *work)
74{
75 return test_bit(WS_ONSTACK_B, &work->state);
76}
77
78static inline void bdi_work_init(struct bdi_work *work,
79 struct wb_writeback_args *args)
80{
81 INIT_RCU_HEAD(&work->rcu_head);
82 work->args = *args;
83 work->state = WS_USED;
84}
85
86/** 53/**
87 * writeback_in_progress - determine whether there is writeback in progress 54 * writeback_in_progress - determine whether there is writeback in progress
88 * @bdi: the device's backing_dev_info structure. 55 * @bdi: the device's backing_dev_info structure.
@@ -95,76 +62,11 @@ int writeback_in_progress(struct backing_dev_info *bdi)
95 return !list_empty(&bdi->work_list); 62 return !list_empty(&bdi->work_list);
96} 63}
97 64
98static void bdi_work_clear(struct bdi_work *work) 65static void bdi_queue_work(struct backing_dev_info *bdi,
99{ 66 struct wb_writeback_work *work)
100 clear_bit(WS_USED_B, &work->state);
101 smp_mb__after_clear_bit();
102 /*
103 * work can have disappeared at this point. bit waitq functions
104 * should be able to tolerate this, provided bdi_sched_wait does
105 * not dereference it's pointer argument.
106 */
107 wake_up_bit(&work->state, WS_USED_B);
108}
109
110static void bdi_work_free(struct rcu_head *head)
111{
112 struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
113
114 if (!bdi_work_on_stack(work))
115 kfree(work);
116 else
117 bdi_work_clear(work);
118}
119
120static void wb_work_complete(struct bdi_work *work)
121{
122 const enum writeback_sync_modes sync_mode = work->args.sync_mode;
123 int onstack = bdi_work_on_stack(work);
124
125 /*
126 * For allocated work, we can clear the done/seen bit right here.
127 * For on-stack work, we need to postpone both the clear and free
128 * to after the RCU grace period, since the stack could be invalidated
129 * as soon as bdi_work_clear() has done the wakeup.
130 */
131 if (!onstack)
132 bdi_work_clear(work);
133 if (sync_mode == WB_SYNC_NONE || onstack)
134 call_rcu(&work->rcu_head, bdi_work_free);
135}
136
137static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
138{
139 /*
140 * The caller has retrieved the work arguments from this work,
141 * drop our reference. If this is the last ref, delete and free it
142 */
143 if (atomic_dec_and_test(&work->pending)) {
144 struct backing_dev_info *bdi = wb->bdi;
145
146 spin_lock(&bdi->wb_lock);
147 list_del_rcu(&work->list);
148 spin_unlock(&bdi->wb_lock);
149
150 wb_work_complete(work);
151 }
152}
153
154static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
155{ 67{
156 work->seen = bdi->wb_mask;
157 BUG_ON(!work->seen);
158 atomic_set(&work->pending, bdi->wb_cnt);
159 BUG_ON(!bdi->wb_cnt);
160
161 /*
162 * list_add_tail_rcu() contains the necessary barriers to
163 * make sure the above stores are seen before the item is
164 * noticed on the list
165 */
166 spin_lock(&bdi->wb_lock); 68 spin_lock(&bdi->wb_lock);
167 list_add_tail_rcu(&work->list, &bdi->work_list); 69 list_add_tail(&work->list, &bdi->work_list);
168 spin_unlock(&bdi->wb_lock); 70 spin_unlock(&bdi->wb_lock);
169 71
170 /* 72 /*
@@ -181,97 +83,59 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
181 } 83 }
182} 84}
183 85
184/* 86static void
185 * Used for on-stack allocated work items. The caller needs to wait until 87__bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
186 * the wb threads have acked the work before it's safe to continue. 88 bool range_cyclic, bool for_background)
187 */
188static void bdi_wait_on_work_clear(struct bdi_work *work)
189{
190 wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
191 TASK_UNINTERRUPTIBLE);
192}
193
194static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
195 struct wb_writeback_args *args)
196{ 89{
197 struct bdi_work *work; 90 struct wb_writeback_work *work;
198 91
199 /* 92 /*
200 * This is WB_SYNC_NONE writeback, so if allocation fails just 93 * This is WB_SYNC_NONE writeback, so if allocation fails just
201 * wakeup the thread for old dirty data writeback 94 * wakeup the thread for old dirty data writeback
202 */ 95 */
203 work = kmalloc(sizeof(*work), GFP_ATOMIC); 96 work = kzalloc(sizeof(*work), GFP_ATOMIC);
204 if (work) { 97 if (!work) {
205 bdi_work_init(work, args); 98 if (bdi->wb.task)
206 bdi_queue_work(bdi, work); 99 wake_up_process(bdi->wb.task);
207 } else { 100 return;
208 struct bdi_writeback *wb = &bdi->wb;
209
210 if (wb->task)
211 wake_up_process(wb->task);
212 } 101 }
102
103 work->sync_mode = WB_SYNC_NONE;
104 work->nr_pages = nr_pages;
105 work->range_cyclic = range_cyclic;
106 work->for_background = for_background;
107
108 bdi_queue_work(bdi, work);
213} 109}
214 110
215/** 111/**
216 * bdi_sync_writeback - start and wait for writeback 112 * bdi_start_writeback - start writeback
217 * @bdi: the backing device to write from 113 * @bdi: the backing device to write from
218 * @sb: write inodes from this super_block 114 * @nr_pages: the number of pages to write
219 * 115 *
220 * Description: 116 * Description:
221 * This does WB_SYNC_ALL data integrity writeback and waits for the 117 * This does WB_SYNC_NONE opportunistic writeback. The IO is only
222 * IO to complete. Callers must hold the sb s_umount semaphore for 118 * started when this function returns, we make no guarentees on
223 * reading, to avoid having the super disappear before we are done. 119 * completion. Caller need not hold sb s_umount semaphore.
120 *
224 */ 121 */
225static void bdi_sync_writeback(struct backing_dev_info *bdi, 122void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
226 struct super_block *sb)
227{ 123{
228 struct wb_writeback_args args = { 124 __bdi_start_writeback(bdi, nr_pages, true, false);
229 .sb = sb,
230 .sync_mode = WB_SYNC_ALL,
231 .nr_pages = LONG_MAX,
232 .range_cyclic = 0,
233 };
234 struct bdi_work work;
235
236 bdi_work_init(&work, &args);
237 work.state |= WS_ONSTACK;
238
239 bdi_queue_work(bdi, &work);
240 bdi_wait_on_work_clear(&work);
241} 125}
242 126
243/** 127/**
244 * bdi_start_writeback - start writeback 128 * bdi_start_background_writeback - start background writeback
245 * @bdi: the backing device to write from 129 * @bdi: the backing device to write from
246 * @sb: write inodes from this super_block
247 * @nr_pages: the number of pages to write
248 * 130 *
249 * Description: 131 * Description:
250 * This does WB_SYNC_NONE opportunistic writeback. The IO is only 132 * This does WB_SYNC_NONE background writeback. The IO is only
251 * started when this function returns, we make no guarentees on 133 * started when this function returns, we make no guarentees on
252 * completion. Caller need not hold sb s_umount semaphore. 134 * completion. Caller need not hold sb s_umount semaphore.
253 *
254 */ 135 */
255void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb, 136void bdi_start_background_writeback(struct backing_dev_info *bdi)
256 long nr_pages)
257{ 137{
258 struct wb_writeback_args args = { 138 __bdi_start_writeback(bdi, LONG_MAX, true, true);
259 .sb = sb,
260 .sync_mode = WB_SYNC_NONE,
261 .nr_pages = nr_pages,
262 .range_cyclic = 1,
263 };
264
265 /*
266 * We treat @nr_pages=0 as the special case to do background writeback,
267 * ie. to sync pages until the background dirty threshold is reached.
268 */
269 if (!nr_pages) {
270 args.nr_pages = LONG_MAX;
271 args.for_background = 1;
272 }
273
274 bdi_alloc_queue_work(bdi, &args);
275} 139}
276 140
277/* 141/*
@@ -561,75 +425,69 @@ select_queue:
561 return ret; 425 return ret;
562} 426}
563 427
564static void unpin_sb_for_writeback(struct super_block *sb)
565{
566 up_read(&sb->s_umount);
567 put_super(sb);
568}
569
570enum sb_pin_state {
571 SB_PINNED,
572 SB_NOT_PINNED,
573 SB_PIN_FAILED
574};
575
576/* 428/*
577 * For WB_SYNC_NONE writeback, the caller does not have the sb pinned 429 * For background writeback the caller does not have the sb pinned
578 * before calling writeback. So make sure that we do pin it, so it doesn't 430 * before calling writeback. So make sure that we do pin it, so it doesn't
579 * go away while we are writing inodes from it. 431 * go away while we are writing inodes from it.
580 */ 432 */
581static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc, 433static bool pin_sb_for_writeback(struct super_block *sb)
582 struct super_block *sb)
583{ 434{
584 /*
585 * Caller must already hold the ref for this
586 */
587 if (wbc->sync_mode == WB_SYNC_ALL) {
588 WARN_ON(!rwsem_is_locked(&sb->s_umount));
589 return SB_NOT_PINNED;
590 }
591 spin_lock(&sb_lock); 435 spin_lock(&sb_lock);
436 if (list_empty(&sb->s_instances)) {
437 spin_unlock(&sb_lock);
438 return false;
439 }
440
592 sb->s_count++; 441 sb->s_count++;
442 spin_unlock(&sb_lock);
443
593 if (down_read_trylock(&sb->s_umount)) { 444 if (down_read_trylock(&sb->s_umount)) {
594 if (sb->s_root) { 445 if (sb->s_root)
595 spin_unlock(&sb_lock); 446 return true;
596 return SB_PINNED;
597 }
598 /*
599 * umounted, drop rwsem again and fall through to failure
600 */
601 up_read(&sb->s_umount); 447 up_read(&sb->s_umount);
602 } 448 }
603 sb->s_count--; 449
604 spin_unlock(&sb_lock); 450 put_super(sb);
605 return SB_PIN_FAILED; 451 return false;
606} 452}
607 453
608/* 454/*
609 * Write a portion of b_io inodes which belong to @sb. 455 * Write a portion of b_io inodes which belong to @sb.
610 * If @wbc->sb != NULL, then find and write all such 456 *
457 * If @only_this_sb is true, then find and write all such
611 * inodes. Otherwise write only ones which go sequentially 458 * inodes. Otherwise write only ones which go sequentially
612 * in reverse order. 459 * in reverse order.
460 *
613 * Return 1, if the caller writeback routine should be 461 * Return 1, if the caller writeback routine should be
614 * interrupted. Otherwise return 0. 462 * interrupted. Otherwise return 0.
615 */ 463 */
616static int writeback_sb_inodes(struct super_block *sb, 464static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
617 struct bdi_writeback *wb, 465 struct writeback_control *wbc, bool only_this_sb)
618 struct writeback_control *wbc)
619{ 466{
620 while (!list_empty(&wb->b_io)) { 467 while (!list_empty(&wb->b_io)) {
621 long pages_skipped; 468 long pages_skipped;
622 struct inode *inode = list_entry(wb->b_io.prev, 469 struct inode *inode = list_entry(wb->b_io.prev,
623 struct inode, i_list); 470 struct inode, i_list);
624 if (wbc->sb && sb != inode->i_sb) { 471
625 /* super block given and doesn't 472 if (inode->i_sb != sb) {
626 match, skip this inode */ 473 if (only_this_sb) {
627 redirty_tail(inode); 474 /*
628 continue; 475 * We only want to write back data for this
629 } 476 * superblock, move all inodes not belonging
630 if (sb != inode->i_sb) 477 * to it back onto the dirty list.
631 /* finish with this superblock */ 478 */
479 redirty_tail(inode);
480 continue;
481 }
482
483 /*
484 * The inode belongs to a different superblock.
485 * Bounce back to the caller to unpin this and
486 * pin the next superblock.
487 */
632 return 0; 488 return 0;
489 }
490
633 if (inode->i_state & (I_NEW | I_WILL_FREE)) { 491 if (inode->i_state & (I_NEW | I_WILL_FREE)) {
634 requeue_io(inode); 492 requeue_io(inode);
635 continue; 493 continue;
@@ -667,8 +525,8 @@ static int writeback_sb_inodes(struct super_block *sb,
667 return 1; 525 return 1;
668} 526}
669 527
670static void writeback_inodes_wb(struct bdi_writeback *wb, 528void writeback_inodes_wb(struct bdi_writeback *wb,
671 struct writeback_control *wbc) 529 struct writeback_control *wbc)
672{ 530{
673 int ret = 0; 531 int ret = 0;
674 532
@@ -681,24 +539,14 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
681 struct inode *inode = list_entry(wb->b_io.prev, 539 struct inode *inode = list_entry(wb->b_io.prev,
682 struct inode, i_list); 540 struct inode, i_list);
683 struct super_block *sb = inode->i_sb; 541 struct super_block *sb = inode->i_sb;
684 enum sb_pin_state state;
685 542
686 if (wbc->sb && sb != wbc->sb) { 543 if (!pin_sb_for_writeback(sb)) {
687 /* super block given and doesn't
688 match, skip this inode */
689 redirty_tail(inode);
690 continue;
691 }
692 state = pin_sb_for_writeback(wbc, sb);
693
694 if (state == SB_PIN_FAILED) {
695 requeue_io(inode); 544 requeue_io(inode);
696 continue; 545 continue;
697 } 546 }
698 ret = writeback_sb_inodes(sb, wb, wbc); 547 ret = writeback_sb_inodes(sb, wb, wbc, false);
548 drop_super(sb);
699 549
700 if (state == SB_PINNED)
701 unpin_sb_for_writeback(sb);
702 if (ret) 550 if (ret)
703 break; 551 break;
704 } 552 }
@@ -706,11 +554,17 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
706 /* Leave any unwritten inodes on b_io */ 554 /* Leave any unwritten inodes on b_io */
707} 555}
708 556
709void writeback_inodes_wbc(struct writeback_control *wbc) 557static void __writeback_inodes_sb(struct super_block *sb,
558 struct bdi_writeback *wb, struct writeback_control *wbc)
710{ 559{
711 struct backing_dev_info *bdi = wbc->bdi; 560 WARN_ON(!rwsem_is_locked(&sb->s_umount));
712 561
713 writeback_inodes_wb(&bdi->wb, wbc); 562 wbc->wb_start = jiffies; /* livelock avoidance */
563 spin_lock(&inode_lock);
564 if (!wbc->for_kupdate || list_empty(&wb->b_io))
565 queue_io(wb, wbc->older_than_this);
566 writeback_sb_inodes(sb, wb, wbc, true);
567 spin_unlock(&inode_lock);
714} 568}
715 569
716/* 570/*
@@ -748,16 +602,14 @@ static inline bool over_bground_thresh(void)
748 * all dirty pages if they are all attached to "old" mappings. 602 * all dirty pages if they are all attached to "old" mappings.
749 */ 603 */
750static long wb_writeback(struct bdi_writeback *wb, 604static long wb_writeback(struct bdi_writeback *wb,
751 struct wb_writeback_args *args) 605 struct wb_writeback_work *work)
752{ 606{
753 struct writeback_control wbc = { 607 struct writeback_control wbc = {
754 .bdi = wb->bdi, 608 .sync_mode = work->sync_mode,
755 .sb = args->sb,
756 .sync_mode = args->sync_mode,
757 .older_than_this = NULL, 609 .older_than_this = NULL,
758 .for_kupdate = args->for_kupdate, 610 .for_kupdate = work->for_kupdate,
759 .for_background = args->for_background, 611 .for_background = work->for_background,
760 .range_cyclic = args->range_cyclic, 612 .range_cyclic = work->range_cyclic,
761 }; 613 };
762 unsigned long oldest_jif; 614 unsigned long oldest_jif;
763 long wrote = 0; 615 long wrote = 0;
@@ -777,21 +629,24 @@ static long wb_writeback(struct bdi_writeback *wb,
777 /* 629 /*
778 * Stop writeback when nr_pages has been consumed 630 * Stop writeback when nr_pages has been consumed
779 */ 631 */
780 if (args->nr_pages <= 0) 632 if (work->nr_pages <= 0)
781 break; 633 break;
782 634
783 /* 635 /*
784 * For background writeout, stop when we are below the 636 * For background writeout, stop when we are below the
785 * background dirty threshold 637 * background dirty threshold
786 */ 638 */
787 if (args->for_background && !over_bground_thresh()) 639 if (work->for_background && !over_bground_thresh())
788 break; 640 break;
789 641
790 wbc.more_io = 0; 642 wbc.more_io = 0;
791 wbc.nr_to_write = MAX_WRITEBACK_PAGES; 643 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
792 wbc.pages_skipped = 0; 644 wbc.pages_skipped = 0;
793 writeback_inodes_wb(wb, &wbc); 645 if (work->sb)
794 args->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write; 646 __writeback_inodes_sb(work->sb, wb, &wbc);
647 else
648 writeback_inodes_wb(wb, &wbc);
649 work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
795 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write; 650 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
796 651
797 /* 652 /*
@@ -827,31 +682,21 @@ static long wb_writeback(struct bdi_writeback *wb,
827} 682}
828 683
829/* 684/*
830 * Return the next bdi_work struct that hasn't been processed by this 685 * Return the next wb_writeback_work struct that hasn't been processed yet.
831 * wb thread yet. ->seen is initially set for each thread that exists
832 * for this device, when a thread first notices a piece of work it
833 * clears its bit. Depending on writeback type, the thread will notify
834 * completion on either receiving the work (WB_SYNC_NONE) or after
835 * it is done (WB_SYNC_ALL).
836 */ 686 */
837static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi, 687static struct wb_writeback_work *
838 struct bdi_writeback *wb) 688get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb)
839{ 689{
840 struct bdi_work *work, *ret = NULL; 690 struct wb_writeback_work *work = NULL;
841 691
842 rcu_read_lock(); 692 spin_lock(&bdi->wb_lock);
843 693 if (!list_empty(&bdi->work_list)) {
844 list_for_each_entry_rcu(work, &bdi->work_list, list) { 694 work = list_entry(bdi->work_list.next,
845 if (!test_bit(wb->nr, &work->seen)) 695 struct wb_writeback_work, list);
846 continue; 696 list_del_init(&work->list);
847 clear_bit(wb->nr, &work->seen);
848
849 ret = work;
850 break;
851 } 697 }
852 698 spin_unlock(&bdi->wb_lock);
853 rcu_read_unlock(); 699 return work;
854 return ret;
855} 700}
856 701
857static long wb_check_old_data_flush(struct bdi_writeback *wb) 702static long wb_check_old_data_flush(struct bdi_writeback *wb)
@@ -876,14 +721,14 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
876 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 721 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
877 722
878 if (nr_pages) { 723 if (nr_pages) {
879 struct wb_writeback_args args = { 724 struct wb_writeback_work work = {
880 .nr_pages = nr_pages, 725 .nr_pages = nr_pages,
881 .sync_mode = WB_SYNC_NONE, 726 .sync_mode = WB_SYNC_NONE,
882 .for_kupdate = 1, 727 .for_kupdate = 1,
883 .range_cyclic = 1, 728 .range_cyclic = 1,
884 }; 729 };
885 730
886 return wb_writeback(wb, &args); 731 return wb_writeback(wb, &work);
887 } 732 }
888 733
889 return 0; 734 return 0;
@@ -895,33 +740,27 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
895long wb_do_writeback(struct bdi_writeback *wb, int force_wait) 740long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
896{ 741{
897 struct backing_dev_info *bdi = wb->bdi; 742 struct backing_dev_info *bdi = wb->bdi;
898 struct bdi_work *work; 743 struct wb_writeback_work *work;
899 long wrote = 0; 744 long wrote = 0;
900 745
901 while ((work = get_next_work_item(bdi, wb)) != NULL) { 746 while ((work = get_next_work_item(bdi, wb)) != NULL) {
902 struct wb_writeback_args args = work->args;
903
904 /* 747 /*
905 * Override sync mode, in case we must wait for completion 748 * Override sync mode, in case we must wait for completion
749 * because this thread is exiting now.
906 */ 750 */
907 if (force_wait) 751 if (force_wait)
908 work->args.sync_mode = args.sync_mode = WB_SYNC_ALL; 752 work->sync_mode = WB_SYNC_ALL;
909
910 /*
911 * If this isn't a data integrity operation, just notify
912 * that we have seen this work and we are now starting it.
913 */
914 if (args.sync_mode == WB_SYNC_NONE)
915 wb_clear_pending(wb, work);
916 753
917 wrote += wb_writeback(wb, &args); 754 wrote += wb_writeback(wb, work);
918 755
919 /* 756 /*
920 * This is a data integrity writeback, so only do the 757 * Notify the caller of completion if this is a synchronous
921 * notification when we have completed the work. 758 * work item, otherwise just free it.
922 */ 759 */
923 if (args.sync_mode == WB_SYNC_ALL) 760 if (work->done)
924 wb_clear_pending(wb, work); 761 complete(work->done);
762 else
763 kfree(work);
925 } 764 }
926 765
927 /* 766 /*
@@ -978,42 +817,27 @@ int bdi_writeback_task(struct bdi_writeback *wb)
978} 817}
979 818
980/* 819/*
981 * Schedule writeback for all backing devices. This does WB_SYNC_NONE 820 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
982 * writeback, for integrity writeback see bdi_sync_writeback(). 821 * the whole world.
983 */ 822 */
984static void bdi_writeback_all(struct super_block *sb, long nr_pages) 823void wakeup_flusher_threads(long nr_pages)
985{ 824{
986 struct wb_writeback_args args = {
987 .sb = sb,
988 .nr_pages = nr_pages,
989 .sync_mode = WB_SYNC_NONE,
990 };
991 struct backing_dev_info *bdi; 825 struct backing_dev_info *bdi;
992 826
993 rcu_read_lock(); 827 if (!nr_pages) {
828 nr_pages = global_page_state(NR_FILE_DIRTY) +
829 global_page_state(NR_UNSTABLE_NFS);
830 }
994 831
832 rcu_read_lock();
995 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { 833 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
996 if (!bdi_has_dirty_io(bdi)) 834 if (!bdi_has_dirty_io(bdi))
997 continue; 835 continue;
998 836 __bdi_start_writeback(bdi, nr_pages, false, false);
999 bdi_alloc_queue_work(bdi, &args);
1000 } 837 }
1001
1002 rcu_read_unlock(); 838 rcu_read_unlock();
1003} 839}
1004 840
1005/*
1006 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
1007 * the whole world.
1008 */
1009void wakeup_flusher_threads(long nr_pages)
1010{
1011 if (nr_pages == 0)
1012 nr_pages = global_page_state(NR_FILE_DIRTY) +
1013 global_page_state(NR_UNSTABLE_NFS);
1014 bdi_writeback_all(NULL, nr_pages);
1015}
1016
1017static noinline void block_dump___mark_inode_dirty(struct inode *inode) 841static noinline void block_dump___mark_inode_dirty(struct inode *inode)
1018{ 842{
1019 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { 843 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1218,12 +1042,20 @@ void writeback_inodes_sb(struct super_block *sb)
1218{ 1042{
1219 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY); 1043 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1220 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS); 1044 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1221 long nr_to_write; 1045 DECLARE_COMPLETION_ONSTACK(done);
1046 struct wb_writeback_work work = {
1047 .sb = sb,
1048 .sync_mode = WB_SYNC_NONE,
1049 .done = &done,
1050 };
1051
1052 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1222 1053
1223 nr_to_write = nr_dirty + nr_unstable + 1054 work.nr_pages = nr_dirty + nr_unstable +
1224 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 1055 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
1225 1056
1226 bdi_start_writeback(sb->s_bdi, sb, nr_to_write); 1057 bdi_queue_work(sb->s_bdi, &work);
1058 wait_for_completion(&done);
1227} 1059}
1228EXPORT_SYMBOL(writeback_inodes_sb); 1060EXPORT_SYMBOL(writeback_inodes_sb);
1229 1061
@@ -1237,7 +1069,9 @@ EXPORT_SYMBOL(writeback_inodes_sb);
1237int writeback_inodes_sb_if_idle(struct super_block *sb) 1069int writeback_inodes_sb_if_idle(struct super_block *sb)
1238{ 1070{
1239 if (!writeback_in_progress(sb->s_bdi)) { 1071 if (!writeback_in_progress(sb->s_bdi)) {
1072 down_read(&sb->s_umount);
1240 writeback_inodes_sb(sb); 1073 writeback_inodes_sb(sb);
1074 up_read(&sb->s_umount);
1241 return 1; 1075 return 1;
1242 } else 1076 } else
1243 return 0; 1077 return 0;
@@ -1253,7 +1087,20 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1253 */ 1087 */
1254void sync_inodes_sb(struct super_block *sb) 1088void sync_inodes_sb(struct super_block *sb)
1255{ 1089{
1256 bdi_sync_writeback(sb->s_bdi, sb); 1090 DECLARE_COMPLETION_ONSTACK(done);
1091 struct wb_writeback_work work = {
1092 .sb = sb,
1093 .sync_mode = WB_SYNC_ALL,
1094 .nr_pages = LONG_MAX,
1095 .range_cyclic = 0,
1096 .done = &done,
1097 };
1098
1099 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1100
1101 bdi_queue_work(sb->s_bdi, &work);
1102 wait_for_completion(&done);
1103
1257 wait_sb_inodes(sb); 1104 wait_sb_inodes(sb);
1258} 1105}
1259EXPORT_SYMBOL(sync_inodes_sb); 1106EXPORT_SYMBOL(sync_inodes_sb);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 4a48c0f4b402..84da64b551b2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1041,6 +1041,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
1041 1041
1042 if (gfs2_is_stuffed(ip)) { 1042 if (gfs2_is_stuffed(ip)) {
1043 u64 dsize = size + sizeof(struct gfs2_inode); 1043 u64 dsize = size + sizeof(struct gfs2_inode);
1044 ip->i_disksize = size;
1044 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 1045 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
1045 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1046 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1046 gfs2_dinode_out(ip, dibh->b_data); 1047 gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 8295c5b5d4a9..26ca3361a8bc 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -392,7 +392,7 @@ static int gfs2_dirent_find_space(const struct gfs2_dirent *dent,
392 unsigned totlen = be16_to_cpu(dent->de_rec_len); 392 unsigned totlen = be16_to_cpu(dent->de_rec_len);
393 393
394 if (gfs2_dirent_sentinel(dent)) 394 if (gfs2_dirent_sentinel(dent))
395 actual = GFS2_DIRENT_SIZE(0); 395 actual = 0;
396 if (totlen - actual >= required) 396 if (totlen - actual >= required)
397 return 1; 397 return 1;
398 return 0; 398 return 0;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ddcdbf493536..0898f3ec8212 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -706,8 +706,18 @@ static void glock_work_func(struct work_struct *work)
706{ 706{
707 unsigned long delay = 0; 707 unsigned long delay = 0;
708 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); 708 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
709 struct gfs2_holder *gh;
709 int drop_ref = 0; 710 int drop_ref = 0;
710 711
712 if (unlikely(test_bit(GLF_FROZEN, &gl->gl_flags))) {
713 spin_lock(&gl->gl_spin);
714 gh = find_first_waiter(gl);
715 if (gh && (gh->gh_flags & LM_FLAG_NOEXP) &&
716 test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))
717 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
718 spin_unlock(&gl->gl_spin);
719 }
720
711 if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { 721 if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
712 finish_xmote(gl, gl->gl_reply); 722 finish_xmote(gl, gl->gl_reply);
713 drop_ref = 1; 723 drop_ref = 1;
@@ -1348,7 +1358,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1348} 1358}
1349 1359
1350 1360
1351static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) 1361static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
1352{ 1362{
1353 struct gfs2_glock *gl; 1363 struct gfs2_glock *gl;
1354 int may_demote; 1364 int may_demote;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b5612cbb62a5..f03afd9c44bc 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -169,7 +169,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb,
169{ 169{
170 struct inode *inode; 170 struct inode *inode;
171 struct gfs2_inode *ip; 171 struct gfs2_inode *ip;
172 struct gfs2_glock *io_gl; 172 struct gfs2_glock *io_gl = NULL;
173 int error; 173 int error;
174 174
175 inode = gfs2_iget(sb, no_addr); 175 inode = gfs2_iget(sb, no_addr);
@@ -198,6 +198,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb,
198 ip->i_iopen_gh.gh_gl->gl_object = ip; 198 ip->i_iopen_gh.gh_gl->gl_object = ip;
199 199
200 gfs2_glock_put(io_gl); 200 gfs2_glock_put(io_gl);
201 io_gl = NULL;
201 202
202 if ((type == DT_UNKNOWN) && (no_formal_ino == 0)) 203 if ((type == DT_UNKNOWN) && (no_formal_ino == 0))
203 goto gfs2_nfsbypass; 204 goto gfs2_nfsbypass;
@@ -228,7 +229,8 @@ gfs2_nfsbypass:
228fail_glock: 229fail_glock:
229 gfs2_glock_dq(&ip->i_iopen_gh); 230 gfs2_glock_dq(&ip->i_iopen_gh);
230fail_iopen: 231fail_iopen:
231 gfs2_glock_put(io_gl); 232 if (io_gl)
233 gfs2_glock_put(io_gl);
232fail_put: 234fail_put:
233 if (inode->i_state & I_NEW) 235 if (inode->i_state & I_NEW)
234 ip->i_gl->gl_object = NULL; 236 ip->i_gl->gl_object = NULL;
@@ -256,7 +258,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
256{ 258{
257 struct gfs2_sbd *sdp; 259 struct gfs2_sbd *sdp;
258 struct gfs2_inode *ip; 260 struct gfs2_inode *ip;
259 struct gfs2_glock *io_gl; 261 struct gfs2_glock *io_gl = NULL;
260 int error; 262 int error;
261 struct gfs2_holder gh; 263 struct gfs2_holder gh;
262 struct inode *inode; 264 struct inode *inode;
@@ -293,6 +295,7 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
293 295
294 ip->i_iopen_gh.gh_gl->gl_object = ip; 296 ip->i_iopen_gh.gh_gl->gl_object = ip;
295 gfs2_glock_put(io_gl); 297 gfs2_glock_put(io_gl);
298 io_gl = NULL;
296 299
297 inode->i_mode = DT2IF(DT_UNKNOWN); 300 inode->i_mode = DT2IF(DT_UNKNOWN);
298 301
@@ -319,7 +322,8 @@ void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
319fail_glock: 322fail_glock:
320 gfs2_glock_dq(&ip->i_iopen_gh); 323 gfs2_glock_dq(&ip->i_iopen_gh);
321fail_iopen: 324fail_iopen:
322 gfs2_glock_put(io_gl); 325 if (io_gl)
326 gfs2_glock_put(io_gl);
323fail_put: 327fail_put:
324 ip->i_gl->gl_object = NULL; 328 ip->i_gl->gl_object = NULL;
325 gfs2_glock_put(ip->i_gl); 329 gfs2_glock_put(ip->i_gl);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 49667d68769e..8f02d3db8f42 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -77,7 +77,7 @@ static LIST_HEAD(qd_lru_list);
77static atomic_t qd_lru_count = ATOMIC_INIT(0); 77static atomic_t qd_lru_count = ATOMIC_INIT(0);
78static DEFINE_SPINLOCK(qd_lru_lock); 78static DEFINE_SPINLOCK(qd_lru_lock);
79 79
80int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask) 80int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
81{ 81{
82 struct gfs2_quota_data *qd; 82 struct gfs2_quota_data *qd;
83 struct gfs2_sbd *sdp; 83 struct gfs2_sbd *sdp;
@@ -694,10 +694,8 @@ get_a_page:
694 if (!buffer_mapped(bh)) 694 if (!buffer_mapped(bh))
695 goto unlock_out; 695 goto unlock_out;
696 /* If it's a newly allocated disk block for quota, zero it */ 696 /* If it's a newly allocated disk block for quota, zero it */
697 if (buffer_new(bh)) { 697 if (buffer_new(bh))
698 memset(bh->b_data, 0, bh->b_size); 698 zero_user(page, pos - blocksize, bh->b_size);
699 set_buffer_uptodate(bh);
700 }
701 } 699 }
702 700
703 if (PageUptodate(page)) 701 if (PageUptodate(page))
@@ -723,7 +721,7 @@ get_a_page:
723 721
724 /* If quota straddles page boundary, we need to update the rest of the 722 /* If quota straddles page boundary, we need to update the rest of the
725 * quota at the beginning of the next page */ 723 * quota at the beginning of the next page */
726 if (offset != 0) { /* first page, offset is closer to PAGE_CACHE_SIZE */ 724 if ((offset + sizeof(struct gfs2_quota)) > PAGE_CACHE_SIZE) {
727 ptr = ptr + nbytes; 725 ptr = ptr + nbytes;
728 nbytes = sizeof(struct gfs2_quota) - nbytes; 726 nbytes = sizeof(struct gfs2_quota) - nbytes;
729 offset = 0; 727 offset = 0;
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 195f60c8bd14..e7d236ca48bd 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -51,7 +51,7 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
51 return ret; 51 return ret;
52} 52}
53 53
54extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask); 54extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask);
55extern const struct quotactl_ops gfs2_quotactl_ops; 55extern const struct quotactl_ops gfs2_quotactl_ops;
56 56
57#endif /* __QUOTA_DOT_H__ */ 57#endif /* __QUOTA_DOT_H__ */
diff --git a/fs/inode.c b/fs/inode.c
index 2bee20ae3d65..722860b323a9 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -512,7 +512,7 @@ static void prune_icache(int nr_to_scan)
512 * This function is passed the number of inodes to scan, and it returns the 512 * This function is passed the number of inodes to scan, and it returns the
513 * total number of remaining possibly-reclaimable inodes. 513 * total number of remaining possibly-reclaimable inodes.
514 */ 514 */
515static int shrink_icache_memory(int nr, gfp_t gfp_mask) 515static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
516{ 516{
517 if (nr) { 517 if (nr) {
518 /* 518 /*
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index bc2ff5932769..036880895bfc 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -297,7 +297,6 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
297 struct page *new_page; 297 struct page *new_page;
298 unsigned int new_offset; 298 unsigned int new_offset;
299 struct buffer_head *bh_in = jh2bh(jh_in); 299 struct buffer_head *bh_in = jh2bh(jh_in);
300 struct jbd2_buffer_trigger_type *triggers;
301 journal_t *journal = transaction->t_journal; 300 journal_t *journal = transaction->t_journal;
302 301
303 /* 302 /*
@@ -328,21 +327,21 @@ repeat:
328 done_copy_out = 1; 327 done_copy_out = 1;
329 new_page = virt_to_page(jh_in->b_frozen_data); 328 new_page = virt_to_page(jh_in->b_frozen_data);
330 new_offset = offset_in_page(jh_in->b_frozen_data); 329 new_offset = offset_in_page(jh_in->b_frozen_data);
331 triggers = jh_in->b_frozen_triggers;
332 } else { 330 } else {
333 new_page = jh2bh(jh_in)->b_page; 331 new_page = jh2bh(jh_in)->b_page;
334 new_offset = offset_in_page(jh2bh(jh_in)->b_data); 332 new_offset = offset_in_page(jh2bh(jh_in)->b_data);
335 triggers = jh_in->b_triggers;
336 } 333 }
337 334
338 mapped_data = kmap_atomic(new_page, KM_USER0); 335 mapped_data = kmap_atomic(new_page, KM_USER0);
339 /* 336 /*
340 * Fire any commit trigger. Do this before checking for escaping, 337 * Fire data frozen trigger if data already wasn't frozen. Do this
341 * as the trigger may modify the magic offset. If a copy-out 338 * before checking for escaping, as the trigger may modify the magic
342 * happens afterwards, it will have the correct data in the buffer. 339 * offset. If a copy-out happens afterwards, it will have the correct
340 * data in the buffer.
343 */ 341 */
344 jbd2_buffer_commit_trigger(jh_in, mapped_data + new_offset, 342 if (!done_copy_out)
345 triggers); 343 jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
344 jh_in->b_triggers);
346 345
347 /* 346 /*
348 * Check for escaping 347 * Check for escaping
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e214d68620ac..b8e0806681bb 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -725,6 +725,9 @@ done:
725 page = jh2bh(jh)->b_page; 725 page = jh2bh(jh)->b_page;
726 offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; 726 offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
727 source = kmap_atomic(page, KM_USER0); 727 source = kmap_atomic(page, KM_USER0);
728 /* Fire data frozen trigger just before we copy the data */
729 jbd2_buffer_frozen_trigger(jh, source + offset,
730 jh->b_triggers);
728 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); 731 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
729 kunmap_atomic(source, KM_USER0); 732 kunmap_atomic(source, KM_USER0);
730 733
@@ -963,15 +966,15 @@ void jbd2_journal_set_triggers(struct buffer_head *bh,
963 jh->b_triggers = type; 966 jh->b_triggers = type;
964} 967}
965 968
966void jbd2_buffer_commit_trigger(struct journal_head *jh, void *mapped_data, 969void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
967 struct jbd2_buffer_trigger_type *triggers) 970 struct jbd2_buffer_trigger_type *triggers)
968{ 971{
969 struct buffer_head *bh = jh2bh(jh); 972 struct buffer_head *bh = jh2bh(jh);
970 973
971 if (!triggers || !triggers->t_commit) 974 if (!triggers || !triggers->t_frozen)
972 return; 975 return;
973 976
974 triggers->t_commit(triggers, bh, mapped_data, bh->b_size); 977 triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
975} 978}
976 979
977void jbd2_buffer_abort_trigger(struct journal_head *jh, 980void jbd2_buffer_abort_trigger(struct journal_head *jh,
diff --git a/fs/mbcache.c b/fs/mbcache.c
index ec88ff3d04a9..e28f21b95344 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -115,7 +115,7 @@ mb_cache_indexes(struct mb_cache *cache)
115 * What the mbcache registers as to get shrunk dynamically. 115 * What the mbcache registers as to get shrunk dynamically.
116 */ 116 */
117 117
118static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask); 118static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
119 119
120static struct shrinker mb_cache_shrinker = { 120static struct shrinker mb_cache_shrinker = {
121 .shrink = mb_cache_shrink_fn, 121 .shrink = mb_cache_shrink_fn,
@@ -191,13 +191,14 @@ forget:
191 * This function is called by the kernel memory management when memory 191 * This function is called by the kernel memory management when memory
192 * gets low. 192 * gets low.
193 * 193 *
194 * @shrink: (ignored)
194 * @nr_to_scan: Number of objects to scan 195 * @nr_to_scan: Number of objects to scan
195 * @gfp_mask: (ignored) 196 * @gfp_mask: (ignored)
196 * 197 *
197 * Returns the number of objects which are present in the cache. 198 * Returns the number of objects which are present in the cache.
198 */ 199 */
199static int 200static int
200mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask) 201mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
201{ 202{
202 LIST_HEAD(free_list); 203 LIST_HEAD(free_list);
203 struct list_head *l, *ltmp; 204 struct list_head *l, *ltmp;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 7ec9b34a59f8..d25b5257b7a1 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1286,6 +1286,55 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
1286#endif /* CONFIG_NFS_V4_1 */ 1286#endif /* CONFIG_NFS_V4_1 */
1287} 1287}
1288 1288
1289static int nfs4_server_common_setup(struct nfs_server *server,
1290 struct nfs_fh *mntfh)
1291{
1292 struct nfs_fattr *fattr;
1293 int error;
1294
1295 BUG_ON(!server->nfs_client);
1296 BUG_ON(!server->nfs_client->rpc_ops);
1297 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1298
1299 fattr = nfs_alloc_fattr();
1300 if (fattr == NULL)
1301 return -ENOMEM;
1302
1303 /* We must ensure the session is initialised first */
1304 error = nfs4_init_session(server);
1305 if (error < 0)
1306 goto out;
1307
1308 /* Probe the root fh to retrieve its FSID and filehandle */
1309 error = nfs4_get_rootfh(server, mntfh);
1310 if (error < 0)
1311 goto out;
1312
1313 dprintk("Server FSID: %llx:%llx\n",
1314 (unsigned long long) server->fsid.major,
1315 (unsigned long long) server->fsid.minor);
1316 dprintk("Mount FH: %d\n", mntfh->size);
1317
1318 nfs4_session_set_rwsize(server);
1319
1320 error = nfs_probe_fsinfo(server, mntfh, fattr);
1321 if (error < 0)
1322 goto out;
1323
1324 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
1325 server->namelen = NFS4_MAXNAMLEN;
1326
1327 spin_lock(&nfs_client_lock);
1328 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1329 list_add_tail(&server->master_link, &nfs_volume_list);
1330 spin_unlock(&nfs_client_lock);
1331
1332 server->mount_time = jiffies;
1333out:
1334 nfs_free_fattr(fattr);
1335 return error;
1336}
1337
1289/* 1338/*
1290 * Create a version 4 volume record 1339 * Create a version 4 volume record
1291 */ 1340 */
@@ -1346,7 +1395,6 @@ error:
1346struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, 1395struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1347 struct nfs_fh *mntfh) 1396 struct nfs_fh *mntfh)
1348{ 1397{
1349 struct nfs_fattr *fattr;
1350 struct nfs_server *server; 1398 struct nfs_server *server;
1351 int error; 1399 int error;
1352 1400
@@ -1356,55 +1404,19 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1356 if (!server) 1404 if (!server)
1357 return ERR_PTR(-ENOMEM); 1405 return ERR_PTR(-ENOMEM);
1358 1406
1359 error = -ENOMEM;
1360 fattr = nfs_alloc_fattr();
1361 if (fattr == NULL)
1362 goto error;
1363
1364 /* set up the general RPC client */ 1407 /* set up the general RPC client */
1365 error = nfs4_init_server(server, data); 1408 error = nfs4_init_server(server, data);
1366 if (error < 0) 1409 if (error < 0)
1367 goto error; 1410 goto error;
1368 1411
1369 BUG_ON(!server->nfs_client); 1412 error = nfs4_server_common_setup(server, mntfh);
1370 BUG_ON(!server->nfs_client->rpc_ops);
1371 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1372
1373 error = nfs4_init_session(server);
1374 if (error < 0)
1375 goto error;
1376
1377 /* Probe the root fh to retrieve its FSID */
1378 error = nfs4_get_rootfh(server, mntfh);
1379 if (error < 0) 1413 if (error < 0)
1380 goto error; 1414 goto error;
1381 1415
1382 dprintk("Server FSID: %llx:%llx\n",
1383 (unsigned long long) server->fsid.major,
1384 (unsigned long long) server->fsid.minor);
1385 dprintk("Mount FH: %d\n", mntfh->size);
1386
1387 nfs4_session_set_rwsize(server);
1388
1389 error = nfs_probe_fsinfo(server, mntfh, fattr);
1390 if (error < 0)
1391 goto error;
1392
1393 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
1394 server->namelen = NFS4_MAXNAMLEN;
1395
1396 spin_lock(&nfs_client_lock);
1397 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1398 list_add_tail(&server->master_link, &nfs_volume_list);
1399 spin_unlock(&nfs_client_lock);
1400
1401 server->mount_time = jiffies;
1402 dprintk("<-- nfs4_create_server() = %p\n", server); 1416 dprintk("<-- nfs4_create_server() = %p\n", server);
1403 nfs_free_fattr(fattr);
1404 return server; 1417 return server;
1405 1418
1406error: 1419error:
1407 nfs_free_fattr(fattr);
1408 nfs_free_server(server); 1420 nfs_free_server(server);
1409 dprintk("<-- nfs4_create_server() = error %d\n", error); 1421 dprintk("<-- nfs4_create_server() = error %d\n", error);
1410 return ERR_PTR(error); 1422 return ERR_PTR(error);
@@ -1418,7 +1430,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1418{ 1430{
1419 struct nfs_client *parent_client; 1431 struct nfs_client *parent_client;
1420 struct nfs_server *server, *parent_server; 1432 struct nfs_server *server, *parent_server;
1421 struct nfs_fattr *fattr;
1422 int error; 1433 int error;
1423 1434
1424 dprintk("--> nfs4_create_referral_server()\n"); 1435 dprintk("--> nfs4_create_referral_server()\n");
@@ -1427,11 +1438,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1427 if (!server) 1438 if (!server)
1428 return ERR_PTR(-ENOMEM); 1439 return ERR_PTR(-ENOMEM);
1429 1440
1430 error = -ENOMEM;
1431 fattr = nfs_alloc_fattr();
1432 if (fattr == NULL)
1433 goto error;
1434
1435 parent_server = NFS_SB(data->sb); 1441 parent_server = NFS_SB(data->sb);
1436 parent_client = parent_server->nfs_client; 1442 parent_client = parent_server->nfs_client;
1437 1443
@@ -1456,40 +1462,14 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1456 if (error < 0) 1462 if (error < 0)
1457 goto error; 1463 goto error;
1458 1464
1459 BUG_ON(!server->nfs_client); 1465 error = nfs4_server_common_setup(server, mntfh);
1460 BUG_ON(!server->nfs_client->rpc_ops);
1461 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1462
1463 /* Probe the root fh to retrieve its FSID and filehandle */
1464 error = nfs4_get_rootfh(server, mntfh);
1465 if (error < 0)
1466 goto error;
1467
1468 /* probe the filesystem info for this server filesystem */
1469 error = nfs_probe_fsinfo(server, mntfh, fattr);
1470 if (error < 0) 1466 if (error < 0)
1471 goto error; 1467 goto error;
1472 1468
1473 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
1474 server->namelen = NFS4_MAXNAMLEN;
1475
1476 dprintk("Referral FSID: %llx:%llx\n",
1477 (unsigned long long) server->fsid.major,
1478 (unsigned long long) server->fsid.minor);
1479
1480 spin_lock(&nfs_client_lock);
1481 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1482 list_add_tail(&server->master_link, &nfs_volume_list);
1483 spin_unlock(&nfs_client_lock);
1484
1485 server->mount_time = jiffies;
1486
1487 nfs_free_fattr(fattr);
1488 dprintk("<-- nfs_create_referral_server() = %p\n", server); 1469 dprintk("<-- nfs_create_referral_server() = %p\n", server);
1489 return server; 1470 return server;
1490 1471
1491error: 1472error:
1492 nfs_free_fattr(fattr);
1493 nfs_free_server(server); 1473 nfs_free_server(server);
1494 dprintk("<-- nfs4_create_referral_server() = error %d\n", error); 1474 dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
1495 return ERR_PTR(error); 1475 return ERR_PTR(error);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 782b431ef91c..e60416d3f818 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1710,7 +1710,7 @@ static void nfs_access_free_list(struct list_head *head)
1710 } 1710 }
1711} 1711}
1712 1712
1713int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) 1713int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
1714{ 1714{
1715 LIST_HEAD(head); 1715 LIST_HEAD(head);
1716 struct nfs_inode *nfsi; 1716 struct nfs_inode *nfsi;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 7428f7d6273b..a70e446e1605 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -146,7 +146,7 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
146 goto out; 146 goto out;
147 } 147 }
148 148
149 if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_MODE) 149 if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE)
150 || !S_ISDIR(fsinfo.fattr->mode)) { 150 || !S_ISDIR(fsinfo.fattr->mode)) {
151 printk(KERN_ERR "nfs4_get_rootfh:" 151 printk(KERN_ERR "nfs4_get_rootfh:"
152 " getroot encountered non-directory\n"); 152 " getroot encountered non-directory\n");
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index d8bd619e386c..e70f44b9b3f4 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -205,7 +205,8 @@ extern struct rpc_procinfo nfs4_procedures[];
205void nfs_close_context(struct nfs_open_context *ctx, int is_sync); 205void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
206 206
207/* dir.c */ 207/* dir.c */
208extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); 208extern int nfs_access_cache_shrinker(struct shrinker *shrink,
209 int nr_to_scan, gfp_t gfp_mask);
209 210
210/* inode.c */ 211/* inode.c */
211extern struct workqueue_struct *nfsiod_workqueue; 212extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6bdef28efa33..65c8dae4b267 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -862,8 +862,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
862 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; 862 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
863 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME); 863 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
864 *p++ = cpu_to_be32(0); 864 *p++ = cpu_to_be32(0);
865 *p++ = cpu_to_be32(iap->ia_mtime.tv_sec); 865 *p++ = cpu_to_be32(iap->ia_atime.tv_sec);
866 *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec); 866 *p++ = cpu_to_be32(iap->ia_atime.tv_nsec);
867 } 867 }
868 else if (iap->ia_valid & ATTR_ATIME) { 868 else if (iap->ia_valid & ATTR_ATIME) {
869 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; 869 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 04214fc5c304..f9df16de4a56 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -570,6 +570,22 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
570 nfs_show_mountd_netid(m, nfss, showdefaults); 570 nfs_show_mountd_netid(m, nfss, showdefaults);
571} 571}
572 572
573#ifdef CONFIG_NFS_V4
574static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
575 int showdefaults)
576{
577 struct nfs_client *clp = nfss->nfs_client;
578
579 seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
580 seq_printf(m, ",minorversion=%u", clp->cl_minorversion);
581}
582#else
583static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
584 int showdefaults)
585{
586}
587#endif
588
573/* 589/*
574 * Describe the mount options in force on this server representation 590 * Describe the mount options in force on this server representation
575 */ 591 */
@@ -631,11 +647,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
631 647
632 if (version != 4) 648 if (version != 4)
633 nfs_show_mountd_options(m, nfss, showdefaults); 649 nfs_show_mountd_options(m, nfss, showdefaults);
650 else
651 nfs_show_nfsv4_options(m, nfss, showdefaults);
634 652
635#ifdef CONFIG_NFS_V4
636 if (clp->rpc_ops->version == 4)
637 seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
638#endif
639 if (nfss->options & NFS_OPTION_FSCACHE) 653 if (nfss->options & NFS_OPTION_FSCACHE)
640 seq_printf(m, ",fsc"); 654 seq_printf(m, ",fsc");
641} 655}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 3623ca20cc18..356e976772bf 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -196,15 +196,14 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
196 dump_stack(); 196 dump_stack();
197 goto bail; 197 goto bail;
198 } 198 }
199
200 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
201 mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
202 (unsigned long long)past_eof);
203
204 if (create && (iblock >= past_eof))
205 set_buffer_new(bh_result);
206 } 199 }
207 200
201 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
202 mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
203 (unsigned long long)past_eof);
204 if (create && (iblock >= past_eof))
205 set_buffer_new(bh_result);
206
208bail: 207bail:
209 if (err < 0) 208 if (err < 0)
210 err = -EIO; 209 err = -EIO;
@@ -459,36 +458,6 @@ int walk_page_buffers( handle_t *handle,
459 return ret; 458 return ret;
460} 459}
461 460
462handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
463 struct page *page,
464 unsigned from,
465 unsigned to)
466{
467 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
468 handle_t *handle;
469 int ret = 0;
470
471 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
472 if (IS_ERR(handle)) {
473 ret = -ENOMEM;
474 mlog_errno(ret);
475 goto out;
476 }
477
478 if (ocfs2_should_order_data(inode)) {
479 ret = ocfs2_jbd2_file_inode(handle, inode);
480 if (ret < 0)
481 mlog_errno(ret);
482 }
483out:
484 if (ret) {
485 if (!IS_ERR(handle))
486 ocfs2_commit_trans(osb, handle);
487 handle = ERR_PTR(ret);
488 }
489 return handle;
490}
491
492static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) 461static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
493{ 462{
494 sector_t status; 463 sector_t status;
@@ -1131,23 +1100,37 @@ out:
1131 */ 1100 */
1132static int ocfs2_grab_pages_for_write(struct address_space *mapping, 1101static int ocfs2_grab_pages_for_write(struct address_space *mapping,
1133 struct ocfs2_write_ctxt *wc, 1102 struct ocfs2_write_ctxt *wc,
1134 u32 cpos, loff_t user_pos, int new, 1103 u32 cpos, loff_t user_pos,
1104 unsigned user_len, int new,
1135 struct page *mmap_page) 1105 struct page *mmap_page)
1136{ 1106{
1137 int ret = 0, i; 1107 int ret = 0, i;
1138 unsigned long start, target_index, index; 1108 unsigned long start, target_index, end_index, index;
1139 struct inode *inode = mapping->host; 1109 struct inode *inode = mapping->host;
1110 loff_t last_byte;
1140 1111
1141 target_index = user_pos >> PAGE_CACHE_SHIFT; 1112 target_index = user_pos >> PAGE_CACHE_SHIFT;
1142 1113
1143 /* 1114 /*
1144 * Figure out how many pages we'll be manipulating here. For 1115 * Figure out how many pages we'll be manipulating here. For
1145 * non allocating write, we just change the one 1116 * non allocating write, we just change the one
1146 * page. Otherwise, we'll need a whole clusters worth. 1117 * page. Otherwise, we'll need a whole clusters worth. If we're
1118 * writing past i_size, we only need enough pages to cover the
1119 * last page of the write.
1147 */ 1120 */
1148 if (new) { 1121 if (new) {
1149 wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb); 1122 wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);
1150 start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos); 1123 start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);
1124 /*
1125 * We need the index *past* the last page we could possibly
1126 * touch. This is the page past the end of the write or
1127 * i_size, whichever is greater.
1128 */
1129 last_byte = max(user_pos + user_len, i_size_read(inode));
1130 BUG_ON(last_byte < 1);
1131 end_index = ((last_byte - 1) >> PAGE_CACHE_SHIFT) + 1;
1132 if ((start + wc->w_num_pages) > end_index)
1133 wc->w_num_pages = end_index - start;
1151 } else { 1134 } else {
1152 wc->w_num_pages = 1; 1135 wc->w_num_pages = 1;
1153 start = target_index; 1136 start = target_index;
@@ -1620,21 +1603,20 @@ out:
1620 * write path can treat it as an non-allocating write, which has no 1603 * write path can treat it as an non-allocating write, which has no
1621 * special case code for sparse/nonsparse files. 1604 * special case code for sparse/nonsparse files.
1622 */ 1605 */
1623static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos, 1606static int ocfs2_expand_nonsparse_inode(struct inode *inode,
1624 unsigned len, 1607 struct buffer_head *di_bh,
1608 loff_t pos, unsigned len,
1625 struct ocfs2_write_ctxt *wc) 1609 struct ocfs2_write_ctxt *wc)
1626{ 1610{
1627 int ret; 1611 int ret;
1628 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1629 loff_t newsize = pos + len; 1612 loff_t newsize = pos + len;
1630 1613
1631 if (ocfs2_sparse_alloc(osb)) 1614 BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
1632 return 0;
1633 1615
1634 if (newsize <= i_size_read(inode)) 1616 if (newsize <= i_size_read(inode))
1635 return 0; 1617 return 0;
1636 1618
1637 ret = ocfs2_extend_no_holes(inode, newsize, pos); 1619 ret = ocfs2_extend_no_holes(inode, di_bh, newsize, pos);
1638 if (ret) 1620 if (ret)
1639 mlog_errno(ret); 1621 mlog_errno(ret);
1640 1622
@@ -1644,6 +1626,18 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
1644 return ret; 1626 return ret;
1645} 1627}
1646 1628
1629static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
1630 loff_t pos)
1631{
1632 int ret = 0;
1633
1634 BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
1635 if (pos > i_size_read(inode))
1636 ret = ocfs2_zero_extend(inode, di_bh, pos);
1637
1638 return ret;
1639}
1640
1647int ocfs2_write_begin_nolock(struct address_space *mapping, 1641int ocfs2_write_begin_nolock(struct address_space *mapping,
1648 loff_t pos, unsigned len, unsigned flags, 1642 loff_t pos, unsigned len, unsigned flags,
1649 struct page **pagep, void **fsdata, 1643 struct page **pagep, void **fsdata,
@@ -1679,7 +1673,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1679 } 1673 }
1680 } 1674 }
1681 1675
1682 ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc); 1676 if (ocfs2_sparse_alloc(osb))
1677 ret = ocfs2_zero_tail(inode, di_bh, pos);
1678 else
1679 ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len,
1680 wc);
1683 if (ret) { 1681 if (ret) {
1684 mlog_errno(ret); 1682 mlog_errno(ret);
1685 goto out; 1683 goto out;
@@ -1789,7 +1787,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1789 * that we can zero and flush if we error after adding the 1787 * that we can zero and flush if we error after adding the
1790 * extent. 1788 * extent.
1791 */ 1789 */
1792 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, 1790 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
1793 cluster_of_pages, mmap_page); 1791 cluster_of_pages, mmap_page);
1794 if (ret) { 1792 if (ret) {
1795 mlog_errno(ret); 1793 mlog_errno(ret);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 6b5a492e1749..153abb5abef0 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1671,7 +1671,7 @@ struct dlm_ctxt * dlm_register_domain(const char *domain,
1671 struct dlm_ctxt *dlm = NULL; 1671 struct dlm_ctxt *dlm = NULL;
1672 struct dlm_ctxt *new_ctxt = NULL; 1672 struct dlm_ctxt *new_ctxt = NULL;
1673 1673
1674 if (strlen(domain) > O2NM_MAX_NAME_LEN) { 1674 if (strlen(domain) >= O2NM_MAX_NAME_LEN) {
1675 ret = -ENAMETOOLONG; 1675 ret = -ENAMETOOLONG;
1676 mlog(ML_ERROR, "domain name length too long\n"); 1676 mlog(ML_ERROR, "domain name length too long\n");
1677 goto leave; 1677 goto leave;
@@ -1709,6 +1709,7 @@ retry:
1709 } 1709 }
1710 1710
1711 if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) { 1711 if (dlm_protocol_compare(&dlm->fs_locking_proto, fs_proto)) {
1712 spin_unlock(&dlm_domain_lock);
1712 mlog(ML_ERROR, 1713 mlog(ML_ERROR,
1713 "Requested locking protocol version is not " 1714 "Requested locking protocol version is not "
1714 "compatible with already registered domain " 1715 "compatible with already registered domain "
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 4a7506a4e314..94b97fc6a88e 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2808,14 +2808,8 @@ again:
2808 mlog(0, "trying again...\n"); 2808 mlog(0, "trying again...\n");
2809 goto again; 2809 goto again;
2810 } 2810 }
2811 /* now that we are sure the MIGRATING state is there, drop
2812 * the unneded state which blocked threads trying to DIRTY */
2813 spin_lock(&res->spinlock);
2814 BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
2815 BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
2816 res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
2817 spin_unlock(&res->spinlock);
2818 2811
2812 ret = 0;
2819 /* did the target go down or die? */ 2813 /* did the target go down or die? */
2820 spin_lock(&dlm->spinlock); 2814 spin_lock(&dlm->spinlock);
2821 if (!test_bit(target, dlm->domain_map)) { 2815 if (!test_bit(target, dlm->domain_map)) {
@@ -2826,9 +2820,21 @@ again:
2826 spin_unlock(&dlm->spinlock); 2820 spin_unlock(&dlm->spinlock);
2827 2821
2828 /* 2822 /*
2823 * if target is down, we need to clear DLM_LOCK_RES_BLOCK_DIRTY for
2824 * another try; otherwise, we are sure the MIGRATING state is there,
2825 * drop the unneded state which blocked threads trying to DIRTY
2826 */
2827 spin_lock(&res->spinlock);
2828 BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
2829 res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
2830 if (!ret)
2831 BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
2832 spin_unlock(&res->spinlock);
2833
2834 /*
2829 * at this point: 2835 * at this point:
2830 * 2836 *
2831 * o the DLM_LOCK_RES_MIGRATING flag is set 2837 * o the DLM_LOCK_RES_MIGRATING flag is set if target not down
2832 * o there are no pending asts on this lockres 2838 * o there are no pending asts on this lockres
2833 * o all processes trying to reserve an ast on this 2839 * o all processes trying to reserve an ast on this
2834 * lockres must wait for the MIGRATING flag to clear 2840 * lockres must wait for the MIGRATING flag to clear
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index f8b75ce4be70..9dfaac73b36d 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -463,7 +463,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
463 if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { 463 if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
464 int bit; 464 int bit;
465 465
466 bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES+1, 0); 466 bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0);
467 if (bit >= O2NM_MAX_NODES || bit < 0) 467 if (bit >= O2NM_MAX_NODES || bit < 0)
468 dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM); 468 dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM);
469 else 469 else
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6a13ea64c447..2b10b36d1577 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -724,28 +724,55 @@ leave:
724 return status; 724 return status;
725} 725}
726 726
727/*
728 * While a write will already be ordering the data, a truncate will not.
729 * Thus, we need to explicitly order the zeroed pages.
730 */
731static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
732{
733 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
734 handle_t *handle = NULL;
735 int ret = 0;
736
737 if (!ocfs2_should_order_data(inode))
738 goto out;
739
740 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
741 if (IS_ERR(handle)) {
742 ret = -ENOMEM;
743 mlog_errno(ret);
744 goto out;
745 }
746
747 ret = ocfs2_jbd2_file_inode(handle, inode);
748 if (ret < 0)
749 mlog_errno(ret);
750
751out:
752 if (ret) {
753 if (!IS_ERR(handle))
754 ocfs2_commit_trans(osb, handle);
755 handle = ERR_PTR(ret);
756 }
757 return handle;
758}
759
727/* Some parts of this taken from generic_cont_expand, which turned out 760/* Some parts of this taken from generic_cont_expand, which turned out
728 * to be too fragile to do exactly what we need without us having to 761 * to be too fragile to do exactly what we need without us having to
729 * worry about recursive locking in ->write_begin() and ->write_end(). */ 762 * worry about recursive locking in ->write_begin() and ->write_end(). */
730static int ocfs2_write_zero_page(struct inode *inode, 763static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
731 u64 size) 764 u64 abs_to)
732{ 765{
733 struct address_space *mapping = inode->i_mapping; 766 struct address_space *mapping = inode->i_mapping;
734 struct page *page; 767 struct page *page;
735 unsigned long index; 768 unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
736 unsigned int offset;
737 handle_t *handle = NULL; 769 handle_t *handle = NULL;
738 int ret; 770 int ret = 0;
771 unsigned zero_from, zero_to, block_start, block_end;
739 772
740 offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ 773 BUG_ON(abs_from >= abs_to);
741 /* ugh. in prepare/commit_write, if from==to==start of block, we 774 BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
742 ** skip the prepare. make sure we never send an offset for the start 775 BUG_ON(abs_from & (inode->i_blkbits - 1));
743 ** of a block
744 */
745 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
746 offset++;
747 }
748 index = size >> PAGE_CACHE_SHIFT;
749 776
750 page = grab_cache_page(mapping, index); 777 page = grab_cache_page(mapping, index);
751 if (!page) { 778 if (!page) {
@@ -754,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode,
754 goto out; 781 goto out;
755 } 782 }
756 783
757 ret = ocfs2_prepare_write_nolock(inode, page, offset, offset); 784 /* Get the offsets within the page that we want to zero */
758 if (ret < 0) { 785 zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
759 mlog_errno(ret); 786 zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
760 goto out_unlock; 787 if (!zero_to)
761 } 788 zero_to = PAGE_CACHE_SIZE;
762 789
763 if (ocfs2_should_order_data(inode)) { 790 mlog(0,
764 handle = ocfs2_start_walk_page_trans(inode, page, offset, 791 "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
765 offset); 792 (unsigned long long)abs_from, (unsigned long long)abs_to,
766 if (IS_ERR(handle)) { 793 index, zero_from, zero_to);
767 ret = PTR_ERR(handle); 794
768 handle = NULL; 795 /* We know that zero_from is block aligned */
796 for (block_start = zero_from; block_start < zero_to;
797 block_start = block_end) {
798 block_end = block_start + (1 << inode->i_blkbits);
799
800 /*
801 * block_start is block-aligned. Bump it by one to
802 * force ocfs2_{prepare,commit}_write() to zero the
803 * whole block.
804 */
805 ret = ocfs2_prepare_write_nolock(inode, page,
806 block_start + 1,
807 block_start + 1);
808 if (ret < 0) {
809 mlog_errno(ret);
769 goto out_unlock; 810 goto out_unlock;
770 } 811 }
771 }
772 812
773 /* must not update i_size! */ 813 if (!handle) {
774 ret = block_commit_write(page, offset, offset); 814 handle = ocfs2_zero_start_ordered_transaction(inode);
775 if (ret < 0) 815 if (IS_ERR(handle)) {
776 mlog_errno(ret); 816 ret = PTR_ERR(handle);
777 else 817 handle = NULL;
778 ret = 0; 818 break;
819 }
820 }
821
822 /* must not update i_size! */
823 ret = block_commit_write(page, block_start + 1,
824 block_start + 1);
825 if (ret < 0)
826 mlog_errno(ret);
827 else
828 ret = 0;
829 }
779 830
780 if (handle) 831 if (handle)
781 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 832 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
833
782out_unlock: 834out_unlock:
783 unlock_page(page); 835 unlock_page(page);
784 page_cache_release(page); 836 page_cache_release(page);
@@ -786,22 +838,114 @@ out:
786 return ret; 838 return ret;
787} 839}
788 840
789static int ocfs2_zero_extend(struct inode *inode, 841/*
790 u64 zero_to_size) 842 * Find the next range to zero. We do this in terms of bytes because
843 * that's what ocfs2_zero_extend() wants, and it is dealing with the
844 * pagecache. We may return multiple extents.
845 *
846 * zero_start and zero_end are ocfs2_zero_extend()s current idea of what
847 * needs to be zeroed. range_start and range_end return the next zeroing
848 * range. A subsequent call should pass the previous range_end as its
849 * zero_start. If range_end is 0, there's nothing to do.
850 *
851 * Unwritten extents are skipped over. Refcounted extents are CoWd.
852 */
853static int ocfs2_zero_extend_get_range(struct inode *inode,
854 struct buffer_head *di_bh,
855 u64 zero_start, u64 zero_end,
856 u64 *range_start, u64 *range_end)
791{ 857{
792 int ret = 0; 858 int rc = 0, needs_cow = 0;
793 u64 start_off; 859 u32 p_cpos, zero_clusters = 0;
794 struct super_block *sb = inode->i_sb; 860 u32 zero_cpos =
861 zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
862 u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
863 unsigned int num_clusters = 0;
864 unsigned int ext_flags = 0;
795 865
796 start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); 866 while (zero_cpos < last_cpos) {
797 while (start_off < zero_to_size) { 867 rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
798 ret = ocfs2_write_zero_page(inode, start_off); 868 &num_clusters, &ext_flags);
799 if (ret < 0) { 869 if (rc) {
800 mlog_errno(ret); 870 mlog_errno(rc);
871 goto out;
872 }
873
874 if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
875 zero_clusters = num_clusters;
876 if (ext_flags & OCFS2_EXT_REFCOUNTED)
877 needs_cow = 1;
878 break;
879 }
880
881 zero_cpos += num_clusters;
882 }
883 if (!zero_clusters) {
884 *range_end = 0;
885 goto out;
886 }
887
888 while ((zero_cpos + zero_clusters) < last_cpos) {
889 rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
890 &p_cpos, &num_clusters,
891 &ext_flags);
892 if (rc) {
893 mlog_errno(rc);
801 goto out; 894 goto out;
802 } 895 }
803 896
804 start_off += sb->s_blocksize; 897 if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN))
898 break;
899 if (ext_flags & OCFS2_EXT_REFCOUNTED)
900 needs_cow = 1;
901 zero_clusters += num_clusters;
902 }
903 if ((zero_cpos + zero_clusters) > last_cpos)
904 zero_clusters = last_cpos - zero_cpos;
905
906 if (needs_cow) {
907 rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
908 UINT_MAX);
909 if (rc) {
910 mlog_errno(rc);
911 goto out;
912 }
913 }
914
915 *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
916 *range_end = ocfs2_clusters_to_bytes(inode->i_sb,
917 zero_cpos + zero_clusters);
918
919out:
920 return rc;
921}
922
923/*
924 * Zero one range returned from ocfs2_zero_extend_get_range(). The caller
925 * has made sure that the entire range needs zeroing.
926 */
927static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
928 u64 range_end)
929{
930 int rc = 0;
931 u64 next_pos;
932 u64 zero_pos = range_start;
933
934 mlog(0, "range_start = %llu, range_end = %llu\n",
935 (unsigned long long)range_start,
936 (unsigned long long)range_end);
937 BUG_ON(range_start >= range_end);
938
939 while (zero_pos < range_end) {
940 next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
941 if (next_pos > range_end)
942 next_pos = range_end;
943 rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
944 if (rc < 0) {
945 mlog_errno(rc);
946 break;
947 }
948 zero_pos = next_pos;
805 949
806 /* 950 /*
807 * Very large extends have the potential to lock up 951 * Very large extends have the potential to lock up
@@ -810,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode,
810 cond_resched(); 954 cond_resched();
811 } 955 }
812 956
813out: 957 return rc;
958}
959
960int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
961 loff_t zero_to_size)
962{
963 int ret = 0;
964 u64 zero_start, range_start = 0, range_end = 0;
965 struct super_block *sb = inode->i_sb;
966
967 zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
968 mlog(0, "zero_start %llu for i_size %llu\n",
969 (unsigned long long)zero_start,
970 (unsigned long long)i_size_read(inode));
971 while (zero_start < zero_to_size) {
972 ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
973 zero_to_size,
974 &range_start,
975 &range_end);
976 if (ret) {
977 mlog_errno(ret);
978 break;
979 }
980 if (!range_end)
981 break;
982 /* Trim the ends */
983 if (range_start < zero_start)
984 range_start = zero_start;
985 if (range_end > zero_to_size)
986 range_end = zero_to_size;
987
988 ret = ocfs2_zero_extend_range(inode, range_start,
989 range_end);
990 if (ret) {
991 mlog_errno(ret);
992 break;
993 }
994 zero_start = range_end;
995 }
996
814 return ret; 997 return ret;
815} 998}
816 999
817int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to) 1000int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
1001 u64 new_i_size, u64 zero_to)
818{ 1002{
819 int ret; 1003 int ret;
820 u32 clusters_to_add; 1004 u32 clusters_to_add;
821 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1005 struct ocfs2_inode_info *oi = OCFS2_I(inode);
822 1006
1007 /*
1008 * Only quota files call this without a bh, and they can't be
1009 * refcounted.
1010 */
1011 BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
1012 BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
1013
823 clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size); 1014 clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
824 if (clusters_to_add < oi->ip_clusters) 1015 if (clusters_to_add < oi->ip_clusters)
825 clusters_to_add = 0; 1016 clusters_to_add = 0;
@@ -840,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
840 * still need to zero the area between the old i_size and the 1031 * still need to zero the area between the old i_size and the
841 * new i_size. 1032 * new i_size.
842 */ 1033 */
843 ret = ocfs2_zero_extend(inode, zero_to); 1034 ret = ocfs2_zero_extend(inode, di_bh, zero_to);
844 if (ret < 0) 1035 if (ret < 0)
845 mlog_errno(ret); 1036 mlog_errno(ret);
846 1037
@@ -862,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode,
862 goto out; 1053 goto out;
863 1054
864 if (i_size_read(inode) == new_i_size) 1055 if (i_size_read(inode) == new_i_size)
865 goto out; 1056 goto out;
866 BUG_ON(new_i_size < i_size_read(inode)); 1057 BUG_ON(new_i_size < i_size_read(inode));
867 1058
868 /* 1059 /*
869 * Fall through for converting inline data, even if the fs
870 * supports sparse files.
871 *
872 * The check for inline data here is legal - nobody can add
873 * the feature since we have i_mutex. We must check it again
874 * after acquiring ip_alloc_sem though, as paths like mmap
875 * might have raced us to converting the inode to extents.
876 */
877 if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
878 && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
879 goto out_update_size;
880
881 /*
882 * The alloc sem blocks people in read/write from reading our 1060 * The alloc sem blocks people in read/write from reading our
883 * allocation until we're done changing it. We depend on 1061 * allocation until we're done changing it. We depend on
884 * i_mutex to block other extend/truncate calls while we're 1062 * i_mutex to block other extend/truncate calls while we're
885 * here. 1063 * here. We even have to hold it for sparse files because there
1064 * might be some tail zeroing.
886 */ 1065 */
887 down_write(&oi->ip_alloc_sem); 1066 down_write(&oi->ip_alloc_sem);
888 1067
@@ -899,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode,
899 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh); 1078 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
900 if (ret) { 1079 if (ret) {
901 up_write(&oi->ip_alloc_sem); 1080 up_write(&oi->ip_alloc_sem);
902
903 mlog_errno(ret); 1081 mlog_errno(ret);
904 goto out; 1082 goto out;
905 } 1083 }
906 } 1084 }
907 1085
908 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 1086 if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
909 ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size); 1087 ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
1088 else
1089 ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
1090 new_i_size);
910 1091
911 up_write(&oi->ip_alloc_sem); 1092 up_write(&oi->ip_alloc_sem);
912 1093
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index d66cf4f7c70e..97bf761c9e7c 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -54,8 +54,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
54int ocfs2_simple_size_update(struct inode *inode, 54int ocfs2_simple_size_update(struct inode *inode,
55 struct buffer_head *di_bh, 55 struct buffer_head *di_bh,
56 u64 new_i_size); 56 u64 new_i_size);
57int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, 57int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
58 u64 zero_to); 58 u64 new_i_size, u64 zero_to);
59int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
60 loff_t zero_to);
59int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 61int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
60int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, 62int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
61 struct kstat *stat); 63 struct kstat *stat);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 47878cf16418..625de9d7088c 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -472,7 +472,7 @@ static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger
472 return container_of(triggers, struct ocfs2_triggers, ot_triggers); 472 return container_of(triggers, struct ocfs2_triggers, ot_triggers);
473} 473}
474 474
475static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers, 475static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
476 struct buffer_head *bh, 476 struct buffer_head *bh,
477 void *data, size_t size) 477 void *data, size_t size)
478{ 478{
@@ -491,7 +491,7 @@ static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
491 * Quota blocks have their own trigger because the struct ocfs2_block_check 491 * Quota blocks have their own trigger because the struct ocfs2_block_check
492 * offset depends on the blocksize. 492 * offset depends on the blocksize.
493 */ 493 */
494static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers, 494static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
495 struct buffer_head *bh, 495 struct buffer_head *bh,
496 void *data, size_t size) 496 void *data, size_t size)
497{ 497{
@@ -511,7 +511,7 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers,
511 * Directory blocks also have their own trigger because the 511 * Directory blocks also have their own trigger because the
512 * struct ocfs2_block_check offset depends on the blocksize. 512 * struct ocfs2_block_check offset depends on the blocksize.
513 */ 513 */
514static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers, 514static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
515 struct buffer_head *bh, 515 struct buffer_head *bh,
516 void *data, size_t size) 516 void *data, size_t size)
517{ 517{
@@ -544,7 +544,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
544 544
545static struct ocfs2_triggers di_triggers = { 545static struct ocfs2_triggers di_triggers = {
546 .ot_triggers = { 546 .ot_triggers = {
547 .t_commit = ocfs2_commit_trigger, 547 .t_frozen = ocfs2_frozen_trigger,
548 .t_abort = ocfs2_abort_trigger, 548 .t_abort = ocfs2_abort_trigger,
549 }, 549 },
550 .ot_offset = offsetof(struct ocfs2_dinode, i_check), 550 .ot_offset = offsetof(struct ocfs2_dinode, i_check),
@@ -552,7 +552,7 @@ static struct ocfs2_triggers di_triggers = {
552 552
553static struct ocfs2_triggers eb_triggers = { 553static struct ocfs2_triggers eb_triggers = {
554 .ot_triggers = { 554 .ot_triggers = {
555 .t_commit = ocfs2_commit_trigger, 555 .t_frozen = ocfs2_frozen_trigger,
556 .t_abort = ocfs2_abort_trigger, 556 .t_abort = ocfs2_abort_trigger,
557 }, 557 },
558 .ot_offset = offsetof(struct ocfs2_extent_block, h_check), 558 .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
@@ -560,7 +560,7 @@ static struct ocfs2_triggers eb_triggers = {
560 560
561static struct ocfs2_triggers rb_triggers = { 561static struct ocfs2_triggers rb_triggers = {
562 .ot_triggers = { 562 .ot_triggers = {
563 .t_commit = ocfs2_commit_trigger, 563 .t_frozen = ocfs2_frozen_trigger,
564 .t_abort = ocfs2_abort_trigger, 564 .t_abort = ocfs2_abort_trigger,
565 }, 565 },
566 .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check), 566 .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
@@ -568,7 +568,7 @@ static struct ocfs2_triggers rb_triggers = {
568 568
569static struct ocfs2_triggers gd_triggers = { 569static struct ocfs2_triggers gd_triggers = {
570 .ot_triggers = { 570 .ot_triggers = {
571 .t_commit = ocfs2_commit_trigger, 571 .t_frozen = ocfs2_frozen_trigger,
572 .t_abort = ocfs2_abort_trigger, 572 .t_abort = ocfs2_abort_trigger,
573 }, 573 },
574 .ot_offset = offsetof(struct ocfs2_group_desc, bg_check), 574 .ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
@@ -576,14 +576,14 @@ static struct ocfs2_triggers gd_triggers = {
576 576
577static struct ocfs2_triggers db_triggers = { 577static struct ocfs2_triggers db_triggers = {
578 .ot_triggers = { 578 .ot_triggers = {
579 .t_commit = ocfs2_db_commit_trigger, 579 .t_frozen = ocfs2_db_frozen_trigger,
580 .t_abort = ocfs2_abort_trigger, 580 .t_abort = ocfs2_abort_trigger,
581 }, 581 },
582}; 582};
583 583
584static struct ocfs2_triggers xb_triggers = { 584static struct ocfs2_triggers xb_triggers = {
585 .ot_triggers = { 585 .ot_triggers = {
586 .t_commit = ocfs2_commit_trigger, 586 .t_frozen = ocfs2_frozen_trigger,
587 .t_abort = ocfs2_abort_trigger, 587 .t_abort = ocfs2_abort_trigger,
588 }, 588 },
589 .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check), 589 .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
@@ -591,14 +591,14 @@ static struct ocfs2_triggers xb_triggers = {
591 591
592static struct ocfs2_triggers dq_triggers = { 592static struct ocfs2_triggers dq_triggers = {
593 .ot_triggers = { 593 .ot_triggers = {
594 .t_commit = ocfs2_dq_commit_trigger, 594 .t_frozen = ocfs2_dq_frozen_trigger,
595 .t_abort = ocfs2_abort_trigger, 595 .t_abort = ocfs2_abort_trigger,
596 }, 596 },
597}; 597};
598 598
599static struct ocfs2_triggers dr_triggers = { 599static struct ocfs2_triggers dr_triggers = {
600 .ot_triggers = { 600 .ot_triggers = {
601 .t_commit = ocfs2_commit_trigger, 601 .t_frozen = ocfs2_frozen_trigger,
602 .t_abort = ocfs2_abort_trigger, 602 .t_abort = ocfs2_abort_trigger,
603 }, 603 },
604 .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check), 604 .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
@@ -606,7 +606,7 @@ static struct ocfs2_triggers dr_triggers = {
606 606
607static struct ocfs2_triggers dl_triggers = { 607static struct ocfs2_triggers dl_triggers = {
608 .ot_triggers = { 608 .ot_triggers = {
609 .t_commit = ocfs2_commit_trigger, 609 .t_frozen = ocfs2_frozen_trigger,
610 .t_abort = ocfs2_abort_trigger, 610 .t_abort = ocfs2_abort_trigger,
611 }, 611 },
612 .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check), 612 .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
@@ -1936,7 +1936,7 @@ void ocfs2_orphan_scan_work(struct work_struct *work)
1936 mutex_lock(&os->os_lock); 1936 mutex_lock(&os->os_lock);
1937 ocfs2_queue_orphan_scan(osb); 1937 ocfs2_queue_orphan_scan(osb);
1938 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) 1938 if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
1939 schedule_delayed_work(&os->os_orphan_scan_work, 1939 queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
1940 ocfs2_orphan_scan_timeout()); 1940 ocfs2_orphan_scan_timeout());
1941 mutex_unlock(&os->os_lock); 1941 mutex_unlock(&os->os_lock);
1942} 1942}
@@ -1976,8 +1976,8 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
1976 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); 1976 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1977 else { 1977 else {
1978 atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE); 1978 atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
1979 schedule_delayed_work(&os->os_orphan_scan_work, 1979 queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
1980 ocfs2_orphan_scan_timeout()); 1980 ocfs2_orphan_scan_timeout());
1981 } 1981 }
1982} 1982}
1983 1983
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 3d7419682dc0..ec6adbf8f551 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -118,6 +118,7 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
118{ 118{
119 unsigned int la_mb; 119 unsigned int la_mb;
120 unsigned int gd_mb; 120 unsigned int gd_mb;
121 unsigned int la_max_mb;
121 unsigned int megs_per_slot; 122 unsigned int megs_per_slot;
122 struct super_block *sb = osb->sb; 123 struct super_block *sb = osb->sb;
123 124
@@ -182,6 +183,12 @@ unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
182 if (megs_per_slot < la_mb) 183 if (megs_per_slot < la_mb)
183 la_mb = megs_per_slot; 184 la_mb = megs_per_slot;
184 185
186 /* We can't store more bits than we can in a block. */
187 la_max_mb = ocfs2_clusters_to_megabytes(osb->sb,
188 ocfs2_local_alloc_size(sb) * 8);
189 if (la_mb > la_max_mb)
190 la_mb = la_max_mb;
191
185 return la_mb; 192 return la_mb;
186} 193}
187 194
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 2bb35fe00511..4607923eb24c 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -775,7 +775,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
775 * locking allocators ranks above a transaction start 775 * locking allocators ranks above a transaction start
776 */ 776 */
777 WARN_ON(journal_current_handle()); 777 WARN_ON(journal_current_handle());
778 status = ocfs2_extend_no_holes(gqinode, 778 status = ocfs2_extend_no_holes(gqinode, NULL,
779 gqinode->i_size + (need_alloc << sb->s_blocksize_bits), 779 gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
780 gqinode->i_size); 780 gqinode->i_size);
781 if (status < 0) 781 if (status < 0)
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 8bd70d4d184d..dc78764ccc4c 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -971,7 +971,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
971 u64 p_blkno; 971 u64 p_blkno;
972 972
973 /* We are protected by dqio_sem so no locking needed */ 973 /* We are protected by dqio_sem so no locking needed */
974 status = ocfs2_extend_no_holes(lqinode, 974 status = ocfs2_extend_no_holes(lqinode, NULL,
975 lqinode->i_size + 2 * sb->s_blocksize, 975 lqinode->i_size + 2 * sb->s_blocksize,
976 lqinode->i_size); 976 lqinode->i_size);
977 if (status < 0) { 977 if (status < 0) {
@@ -1114,7 +1114,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1114 return ocfs2_local_quota_add_chunk(sb, type, offset); 1114 return ocfs2_local_quota_add_chunk(sb, type, offset);
1115 1115
1116 /* We are protected by dqio_sem so no locking needed */ 1116 /* We are protected by dqio_sem so no locking needed */
1117 status = ocfs2_extend_no_holes(lqinode, 1117 status = ocfs2_extend_no_holes(lqinode, NULL,
1118 lqinode->i_size + sb->s_blocksize, 1118 lqinode->i_size + sb->s_blocksize,
1119 lqinode->i_size); 1119 lqinode->i_size);
1120 if (status < 0) { 1120 if (status < 0) {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 4793f36f6518..3ac5aa733e9c 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2931,6 +2931,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2931 2931
2932 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; 2932 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
2933 end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); 2933 end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
2934 /*
2935 * We only duplicate pages until we reach the page contains i_size - 1.
2936 * So trim 'end' to i_size.
2937 */
2938 if (end > i_size_read(context->inode))
2939 end = i_size_read(context->inode);
2934 2940
2935 while (offset < end) { 2941 while (offset < end) {
2936 page_index = offset >> PAGE_CACHE_SHIFT; 2942 page_index = offset >> PAGE_CACHE_SHIFT;
@@ -4166,6 +4172,12 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
4166 struct inode *inode = old_dentry->d_inode; 4172 struct inode *inode = old_dentry->d_inode;
4167 struct buffer_head *new_bh = NULL; 4173 struct buffer_head *new_bh = NULL;
4168 4174
4175 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
4176 ret = -EINVAL;
4177 mlog_errno(ret);
4178 goto out;
4179 }
4180
4169 ret = filemap_fdatawrite(inode->i_mapping); 4181 ret = filemap_fdatawrite(inode->i_mapping);
4170 if (ret) { 4182 if (ret) {
4171 mlog_errno(ret); 4183 mlog_errno(ret);
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index 40650021fc24..d8b6e4259b80 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -26,7 +26,6 @@
26 26
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/types.h> 28#include <linux/types.h>
29#include <linux/slab.h>
30#include <linux/highmem.h> 29#include <linux/highmem.h>
31#include <linux/bitops.h> 30#include <linux/bitops.h>
32#include <linux/list.h> 31#include <linux/list.h>
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index f4c2a9eb8c4d..a8e6a95a353f 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -741,7 +741,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
741 le16_to_cpu(bg->bg_free_bits_count)); 741 le16_to_cpu(bg->bg_free_bits_count));
742 le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, 742 le32_add_cpu(&cl->cl_recs[alloc_rec].c_total,
743 le16_to_cpu(bg->bg_bits)); 743 le16_to_cpu(bg->bg_bits));
744 cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg->bg_blkno); 744 cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno;
745 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) 745 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
746 le16_add_cpu(&cl->cl_next_free_rec, 1); 746 le16_add_cpu(&cl->cl_next_free_rec, 1);
747 747
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e97b34842cfe..d03469f61801 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -709,7 +709,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
709 struct ocfs2_xattr_value_buf *vb, 709 struct ocfs2_xattr_value_buf *vb,
710 struct ocfs2_xattr_set_ctxt *ctxt) 710 struct ocfs2_xattr_set_ctxt *ctxt)
711{ 711{
712 int status = 0; 712 int status = 0, credits;
713 handle_t *handle = ctxt->handle; 713 handle_t *handle = ctxt->handle;
714 enum ocfs2_alloc_restarted why; 714 enum ocfs2_alloc_restarted why;
715 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 715 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
@@ -719,38 +719,54 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
719 719
720 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb); 720 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
721 721
722 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh, 722 while (clusters_to_add) {
723 OCFS2_JOURNAL_ACCESS_WRITE); 723 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
724 if (status < 0) { 724 OCFS2_JOURNAL_ACCESS_WRITE);
725 mlog_errno(status); 725 if (status < 0) {
726 goto leave; 726 mlog_errno(status);
727 } 727 break;
728 }
728 729
729 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 730 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
730 status = ocfs2_add_clusters_in_btree(handle, 731 status = ocfs2_add_clusters_in_btree(handle,
731 &et, 732 &et,
732 &logical_start, 733 &logical_start,
733 clusters_to_add, 734 clusters_to_add,
734 0, 735 0,
735 ctxt->data_ac, 736 ctxt->data_ac,
736 ctxt->meta_ac, 737 ctxt->meta_ac,
737 &why); 738 &why);
738 if (status < 0) { 739 if ((status < 0) && (status != -EAGAIN)) {
739 mlog_errno(status); 740 if (status != -ENOSPC)
740 goto leave; 741 mlog_errno(status);
741 } 742 break;
743 }
742 744
743 ocfs2_journal_dirty(handle, vb->vb_bh); 745 ocfs2_journal_dirty(handle, vb->vb_bh);
744 746
745 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters; 747 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) -
748 prev_clusters;
746 749
747 /* 750 if (why != RESTART_NONE && clusters_to_add) {
748 * We should have already allocated enough space before the transaction, 751 /*
749 * so no need to restart. 752 * We can only fail in case the alloc file doesn't give
750 */ 753 * up enough clusters.
751 BUG_ON(why != RESTART_NONE || clusters_to_add); 754 */
752 755 BUG_ON(why == RESTART_META);
753leave: 756
757 mlog(0, "restarting xattr value extension for %u"
758 " clusters,.\n", clusters_to_add);
759 credits = ocfs2_calc_extend_credits(inode->i_sb,
760 &vb->vb_xv->xr_list,
761 clusters_to_add);
762 status = ocfs2_extend_trans(handle, credits);
763 if (status < 0) {
764 status = -ENOMEM;
765 mlog_errno(status);
766 break;
767 }
768 }
769 }
754 770
755 return status; 771 return status;
756} 772}
@@ -6788,16 +6804,15 @@ out:
6788 return ret; 6804 return ret;
6789} 6805}
6790 6806
6791static int ocfs2_reflink_xattr_buckets(handle_t *handle, 6807static int ocfs2_reflink_xattr_bucket(handle_t *handle,
6792 u64 blkno, u64 new_blkno, u32 clusters, 6808 u64 blkno, u64 new_blkno, u32 clusters,
6809 u32 *cpos, int num_buckets,
6793 struct ocfs2_alloc_context *meta_ac, 6810 struct ocfs2_alloc_context *meta_ac,
6794 struct ocfs2_alloc_context *data_ac, 6811 struct ocfs2_alloc_context *data_ac,
6795 struct ocfs2_reflink_xattr_tree_args *args) 6812 struct ocfs2_reflink_xattr_tree_args *args)
6796{ 6813{
6797 int i, j, ret = 0; 6814 int i, j, ret = 0;
6798 struct super_block *sb = args->reflink->old_inode->i_sb; 6815 struct super_block *sb = args->reflink->old_inode->i_sb;
6799 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
6800 u32 num_buckets = clusters * bpc;
6801 int bpb = args->old_bucket->bu_blocks; 6816 int bpb = args->old_bucket->bu_blocks;
6802 struct ocfs2_xattr_value_buf vb = { 6817 struct ocfs2_xattr_value_buf vb = {
6803 .vb_access = ocfs2_journal_access, 6818 .vb_access = ocfs2_journal_access,
@@ -6816,14 +6831,6 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6816 break; 6831 break;
6817 } 6832 }
6818 6833
6819 /*
6820 * The real bucket num in this series of blocks is stored
6821 * in the 1st bucket.
6822 */
6823 if (i == 0)
6824 num_buckets = le16_to_cpu(
6825 bucket_xh(args->old_bucket)->xh_num_buckets);
6826
6827 ret = ocfs2_xattr_bucket_journal_access(handle, 6834 ret = ocfs2_xattr_bucket_journal_access(handle,
6828 args->new_bucket, 6835 args->new_bucket,
6829 OCFS2_JOURNAL_ACCESS_CREATE); 6836 OCFS2_JOURNAL_ACCESS_CREATE);
@@ -6837,6 +6844,18 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6837 bucket_block(args->old_bucket, j), 6844 bucket_block(args->old_bucket, j),
6838 sb->s_blocksize); 6845 sb->s_blocksize);
6839 6846
6847 /*
6848 * Record the start cpos so that we can use it to initialize
6849 * our xattr tree we also set the xh_num_bucket for the new
6850 * bucket.
6851 */
6852 if (i == 0) {
6853 *cpos = le32_to_cpu(bucket_xh(args->new_bucket)->
6854 xh_entries[0].xe_name_hash);
6855 bucket_xh(args->new_bucket)->xh_num_buckets =
6856 cpu_to_le16(num_buckets);
6857 }
6858
6840 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6859 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6841 6860
6842 ret = ocfs2_reflink_xattr_header(handle, args->reflink, 6861 ret = ocfs2_reflink_xattr_header(handle, args->reflink,
@@ -6866,6 +6885,7 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6866 } 6885 }
6867 6886
6868 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket); 6887 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6888
6869 ocfs2_xattr_bucket_relse(args->old_bucket); 6889 ocfs2_xattr_bucket_relse(args->old_bucket);
6870 ocfs2_xattr_bucket_relse(args->new_bucket); 6890 ocfs2_xattr_bucket_relse(args->new_bucket);
6871 } 6891 }
@@ -6874,6 +6894,75 @@ static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6874 ocfs2_xattr_bucket_relse(args->new_bucket); 6894 ocfs2_xattr_bucket_relse(args->new_bucket);
6875 return ret; 6895 return ret;
6876} 6896}
6897
6898static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6899 struct inode *inode,
6900 struct ocfs2_reflink_xattr_tree_args *args,
6901 struct ocfs2_extent_tree *et,
6902 struct ocfs2_alloc_context *meta_ac,
6903 struct ocfs2_alloc_context *data_ac,
6904 u64 blkno, u32 cpos, u32 len)
6905{
6906 int ret, first_inserted = 0;
6907 u32 p_cluster, num_clusters, reflink_cpos = 0;
6908 u64 new_blkno;
6909 unsigned int num_buckets, reflink_buckets;
6910 unsigned int bpc =
6911 ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
6912
6913 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6914 if (ret) {
6915 mlog_errno(ret);
6916 goto out;
6917 }
6918 num_buckets = le16_to_cpu(bucket_xh(args->old_bucket)->xh_num_buckets);
6919 ocfs2_xattr_bucket_relse(args->old_bucket);
6920
6921 while (len && num_buckets) {
6922 ret = ocfs2_claim_clusters(handle, data_ac,
6923 1, &p_cluster, &num_clusters);
6924 if (ret) {
6925 mlog_errno(ret);
6926 goto out;
6927 }
6928
6929 new_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
6930 reflink_buckets = min(num_buckets, bpc * num_clusters);
6931
6932 ret = ocfs2_reflink_xattr_bucket(handle, blkno,
6933 new_blkno, num_clusters,
6934 &reflink_cpos, reflink_buckets,
6935 meta_ac, data_ac, args);
6936 if (ret) {
6937 mlog_errno(ret);
6938 goto out;
6939 }
6940
6941 /*
6942 * For the 1st allocated cluster, we make it use the same cpos
6943 * so that the xattr tree looks the same as the original one
6944 * in the most case.
6945 */
6946 if (!first_inserted) {
6947 reflink_cpos = cpos;
6948 first_inserted = 1;
6949 }
6950 ret = ocfs2_insert_extent(handle, et, reflink_cpos, new_blkno,
6951 num_clusters, 0, meta_ac);
6952 if (ret)
6953 mlog_errno(ret);
6954
6955 mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
6956 (unsigned long long)new_blkno, num_clusters, reflink_cpos);
6957
6958 len -= num_clusters;
6959 blkno += ocfs2_clusters_to_blocks(inode->i_sb, num_clusters);
6960 num_buckets -= reflink_buckets;
6961 }
6962out:
6963 return ret;
6964}
6965
6877/* 6966/*
6878 * Create the same xattr extent record in the new inode's xattr tree. 6967 * Create the same xattr extent record in the new inode's xattr tree.
6879 */ 6968 */
@@ -6885,8 +6974,6 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
6885 void *para) 6974 void *para)
6886{ 6975{
6887 int ret, credits = 0; 6976 int ret, credits = 0;
6888 u32 p_cluster, num_clusters;
6889 u64 new_blkno;
6890 handle_t *handle; 6977 handle_t *handle;
6891 struct ocfs2_reflink_xattr_tree_args *args = 6978 struct ocfs2_reflink_xattr_tree_args *args =
6892 (struct ocfs2_reflink_xattr_tree_args *)para; 6979 (struct ocfs2_reflink_xattr_tree_args *)para;
@@ -6895,6 +6982,9 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
6895 struct ocfs2_alloc_context *data_ac = NULL; 6982 struct ocfs2_alloc_context *data_ac = NULL;
6896 struct ocfs2_extent_tree et; 6983 struct ocfs2_extent_tree et;
6897 6984
6985 mlog(0, "reflink xattr buckets %llu len %u\n",
6986 (unsigned long long)blkno, len);
6987
6898 ocfs2_init_xattr_tree_extent_tree(&et, 6988 ocfs2_init_xattr_tree_extent_tree(&et,
6899 INODE_CACHE(args->reflink->new_inode), 6989 INODE_CACHE(args->reflink->new_inode),
6900 args->new_blk_bh); 6990 args->new_blk_bh);
@@ -6914,32 +7004,12 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
6914 goto out; 7004 goto out;
6915 } 7005 }
6916 7006
6917 ret = ocfs2_claim_clusters(handle, data_ac, 7007 ret = ocfs2_reflink_xattr_buckets(handle, inode, args, &et,
6918 len, &p_cluster, &num_clusters); 7008 meta_ac, data_ac,
6919 if (ret) { 7009 blkno, cpos, len);
6920 mlog_errno(ret);
6921 goto out_commit;
6922 }
6923
6924 new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
6925
6926 mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
6927 (unsigned long long)blkno, (unsigned long long)new_blkno, len);
6928 ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
6929 meta_ac, data_ac, args);
6930 if (ret) {
6931 mlog_errno(ret);
6932 goto out_commit;
6933 }
6934
6935 mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
6936 (unsigned long long)new_blkno, len, cpos);
6937 ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
6938 len, 0, meta_ac);
6939 if (ret) 7010 if (ret)
6940 mlog_errno(ret); 7011 mlog_errno(ret);
6941 7012
6942out_commit:
6943 ocfs2_commit_trans(osb, handle); 7013 ocfs2_commit_trans(osb, handle);
6944 7014
6945out: 7015out:
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 3e73de5967ff..fc8497643fd0 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -74,6 +74,7 @@ int ibm_partition(struct parsed_partitions *state)
74 } *label; 74 } *label;
75 unsigned char *data; 75 unsigned char *data;
76 Sector sect; 76 Sector sect;
77 sector_t labelsect;
77 78
78 res = 0; 79 res = 0;
79 blocksize = bdev_logical_block_size(bdev); 80 blocksize = bdev_logical_block_size(bdev);
@@ -98,10 +99,19 @@ int ibm_partition(struct parsed_partitions *state)
98 goto out_freeall; 99 goto out_freeall;
99 100
100 /* 101 /*
102 * Special case for FBA disks: label sector does not depend on
103 * blocksize.
104 */
105 if ((info->cu_type == 0x6310 && info->dev_type == 0x9336) ||
106 (info->cu_type == 0x3880 && info->dev_type == 0x3370))
107 labelsect = info->label_block;
108 else
109 labelsect = info->label_block * (blocksize >> 9);
110
111 /*
101 * Get volume label, extract name and type. 112 * Get volume label, extract name and type.
102 */ 113 */
103 data = read_part_sector(state, info->label_block*(blocksize/512), 114 data = read_part_sector(state, labelsect, &sect);
104 &sect);
105 if (data == NULL) 115 if (data == NULL)
106 goto out_readerr; 116 goto out_readerr;
107 117
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index ce94801f48ca..d9396a4fc7ff 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -209,6 +209,9 @@ void proc_device_tree_add_node(struct device_node *np,
209 for (pp = np->properties; pp != NULL; pp = pp->next) { 209 for (pp = np->properties; pp != NULL; pp = pp->next) {
210 p = pp->name; 210 p = pp->name;
211 211
212 if (strchr(p, '/'))
213 continue;
214
212 if (duplicate_name(de, p)) 215 if (duplicate_name(de, p))
213 p = fixup_name(np, de, p); 216 p = fixup_name(np, de, p);
214 217
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 46d4b5d72bd3..cb6306e63843 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -122,11 +122,20 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
122 return size; 122 return size;
123} 123}
124 124
125static void pad_len_spaces(struct seq_file *m, int len)
126{
127 len = 25 + sizeof(void*) * 6 - len;
128 if (len < 1)
129 len = 1;
130 seq_printf(m, "%*c", len, ' ');
131}
132
125/* 133/*
126 * display a single VMA to a sequenced file 134 * display a single VMA to a sequenced file
127 */ 135 */
128static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) 136static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
129{ 137{
138 struct mm_struct *mm = vma->vm_mm;
130 unsigned long ino = 0; 139 unsigned long ino = 0;
131 struct file *file; 140 struct file *file;
132 dev_t dev = 0; 141 dev_t dev = 0;
@@ -155,11 +164,14 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
155 MAJOR(dev), MINOR(dev), ino, &len); 164 MAJOR(dev), MINOR(dev), ino, &len);
156 165
157 if (file) { 166 if (file) {
158 len = 25 + sizeof(void *) * 6 - len; 167 pad_len_spaces(m, len);
159 if (len < 1)
160 len = 1;
161 seq_printf(m, "%*c", len, ' ');
162 seq_path(m, &file->f_path, ""); 168 seq_path(m, &file->f_path, "");
169 } else if (mm) {
170 if (vma->vm_start <= mm->start_stack &&
171 vma->vm_end >= mm->start_stack) {
172 pad_len_spaces(m, len);
173 seq_puts(m, "[stack]");
174 }
163 } 175 }
164 176
165 seq_putc(m, '\n'); 177 seq_putc(m, '\n');
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 12c233da1b6b..437d2ca2de97 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -676,7 +676,7 @@ static void prune_dqcache(int count)
676 * This is called from kswapd when we think we need some 676 * This is called from kswapd when we think we need some
677 * more memory 677 * more memory
678 */ 678 */
679static int shrink_dqcache_memory(int nr, gfp_t gfp_mask) 679static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
680{ 680{
681 if (nr) { 681 if (nr) {
682 spin_lock(&dq_list_lock); 682 spin_lock(&dq_list_lock);
diff --git a/fs/splice.c b/fs/splice.c
index 740e6b9faf7a..efdbfece9932 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1282,7 +1282,8 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,
1282{ 1282{
1283 struct file *file = sd->u.file; 1283 struct file *file = sd->u.file;
1284 1284
1285 return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); 1285 return do_splice_from(pipe, file, &file->f_pos, sd->total_len,
1286 sd->flags);
1286} 1287}
1287 1288
1288/** 1289/**
@@ -1371,8 +1372,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1371 if (off_in) 1372 if (off_in)
1372 return -ESPIPE; 1373 return -ESPIPE;
1373 if (off_out) { 1374 if (off_out) {
1374 if (!out->f_op || !out->f_op->llseek || 1375 if (!(out->f_mode & FMODE_PWRITE))
1375 out->f_op->llseek == no_llseek)
1376 return -EINVAL; 1376 return -EINVAL;
1377 if (copy_from_user(&offset, off_out, sizeof(loff_t))) 1377 if (copy_from_user(&offset, off_out, sizeof(loff_t)))
1378 return -EFAULT; 1378 return -EFAULT;
@@ -1392,8 +1392,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1392 if (off_out) 1392 if (off_out)
1393 return -ESPIPE; 1393 return -ESPIPE;
1394 if (off_in) { 1394 if (off_in) {
1395 if (!in->f_op || !in->f_op->llseek || 1395 if (!(in->f_mode & FMODE_PREAD))
1396 in->f_op->llseek == no_llseek)
1397 return -EINVAL; 1396 return -EINVAL;
1398 if (copy_from_user(&offset, off_in, sizeof(loff_t))) 1397 if (copy_from_user(&offset, off_in, sizeof(loff_t)))
1399 return -EFAULT; 1398 return -EFAULT;
diff --git a/fs/super.c b/fs/super.c
index 5c35bc7a499e..938119ab8dcb 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -374,6 +374,8 @@ void sync_supers(void)
374 up_read(&sb->s_umount); 374 up_read(&sb->s_umount);
375 375
376 spin_lock(&sb_lock); 376 spin_lock(&sb_lock);
377 /* lock was dropped, must reset next */
378 list_safe_reset_next(sb, n, s_list);
377 __put_super(sb); 379 __put_super(sb);
378 } 380 }
379 } 381 }
@@ -405,6 +407,8 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
405 up_read(&sb->s_umount); 407 up_read(&sb->s_umount);
406 408
407 spin_lock(&sb_lock); 409 spin_lock(&sb_lock);
410 /* lock was dropped, must reset next */
411 list_safe_reset_next(sb, n, s_list);
408 __put_super(sb); 412 __put_super(sb);
409 } 413 }
410 spin_unlock(&sb_lock); 414 spin_unlock(&sb_lock);
@@ -585,6 +589,8 @@ static void do_emergency_remount(struct work_struct *work)
585 } 589 }
586 up_write(&sb->s_umount); 590 up_write(&sb->s_umount);
587 spin_lock(&sb_lock); 591 spin_lock(&sb_lock);
592 /* lock was dropped, must reset next */
593 list_safe_reset_next(sb, n, s_list);
588 __put_super(sb); 594 __put_super(sb);
589 } 595 }
590 spin_unlock(&sb_lock); 596 spin_unlock(&sb_lock);
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index bbd69bdb0fa8..fcc498ec9b33 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -25,6 +25,7 @@
25#include <linux/stat.h> 25#include <linux/stat.h>
26#include <linux/string.h> 26#include <linux/string.h>
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/writeback.h>
28#include "sysv.h" 29#include "sysv.h"
29 30
30/* We don't trust the value of 31/* We don't trust the value of
@@ -139,6 +140,9 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
139 struct inode *inode; 140 struct inode *inode;
140 sysv_ino_t ino; 141 sysv_ino_t ino;
141 unsigned count; 142 unsigned count;
143 struct writeback_control wbc = {
144 .sync_mode = WB_SYNC_NONE
145 };
142 146
143 inode = new_inode(sb); 147 inode = new_inode(sb);
144 if (!inode) 148 if (!inode)
@@ -168,7 +172,7 @@ struct inode * sysv_new_inode(const struct inode * dir, mode_t mode)
168 insert_inode_hash(inode); 172 insert_inode_hash(inode);
169 mark_inode_dirty(inode); 173 mark_inode_dirty(inode);
170 174
171 sysv_write_inode(inode, 0); /* ensure inode not allocated again */ 175 sysv_write_inode(inode, &wbc); /* ensure inode not allocated again */
172 mark_inode_dirty(inode); /* cleared by sysv_write_inode() */ 176 mark_inode_dirty(inode); /* cleared by sysv_write_inode() */
173 /* That's it. */ 177 /* That's it. */
174 unlock_super(sb); 178 unlock_super(sb);
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 076ca50e9933..c8ff0d1ae5d3 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -62,7 +62,9 @@
62 */ 62 */
63static void shrink_liability(struct ubifs_info *c, int nr_to_write) 63static void shrink_liability(struct ubifs_info *c, int nr_to_write)
64{ 64{
65 down_read(&c->vfs_sb->s_umount);
65 writeback_inodes_sb(c->vfs_sb); 66 writeback_inodes_sb(c->vfs_sb);
67 up_read(&c->vfs_sb->s_umount);
66} 68}
67 69
68/** 70/**
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 02feb59cefca..0b201114a5ad 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -277,7 +277,7 @@ static int kick_a_thread(void)
277 return 0; 277 return 0;
278} 278}
279 279
280int ubifs_shrinker(int nr, gfp_t gfp_mask) 280int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask)
281{ 281{
282 int freed, contention = 0; 282 int freed, contention = 0;
283 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); 283 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 2eef553d50c8..04310878f449 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1575,7 +1575,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
1575int ubifs_tnc_end_commit(struct ubifs_info *c); 1575int ubifs_tnc_end_commit(struct ubifs_info *c);
1576 1576
1577/* shrinker.c */ 1577/* shrinker.c */
1578int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask); 1578int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
1579 1579
1580/* commit.c */ 1580/* commit.c */
1581int ubifs_bg_thread(void *info); 1581int ubifs_bg_thread(void *info);
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 649ade8ef598..2ee3f7a60163 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -45,7 +45,7 @@
45 45
46static kmem_zone_t *xfs_buf_zone; 46static kmem_zone_t *xfs_buf_zone;
47STATIC int xfsbufd(void *); 47STATIC int xfsbufd(void *);
48STATIC int xfsbufd_wakeup(int, gfp_t); 48STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
49STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); 49STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
50static struct shrinker xfs_buf_shake = { 50static struct shrinker xfs_buf_shake = {
51 .shrink = xfsbufd_wakeup, 51 .shrink = xfsbufd_wakeup,
@@ -340,7 +340,7 @@ _xfs_buf_lookup_pages(
340 __func__, gfp_mask); 340 __func__, gfp_mask);
341 341
342 XFS_STATS_INC(xb_page_retries); 342 XFS_STATS_INC(xb_page_retries);
343 xfsbufd_wakeup(0, gfp_mask); 343 xfsbufd_wakeup(NULL, 0, gfp_mask);
344 congestion_wait(BLK_RW_ASYNC, HZ/50); 344 congestion_wait(BLK_RW_ASYNC, HZ/50);
345 goto retry; 345 goto retry;
346 } 346 }
@@ -1762,6 +1762,7 @@ xfs_buf_runall_queues(
1762 1762
1763STATIC int 1763STATIC int
1764xfsbufd_wakeup( 1764xfsbufd_wakeup(
1765 struct shrinker *shrink,
1765 int priority, 1766 int priority,
1766 gfp_t mask) 1767 gfp_t mask)
1767{ 1768{
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 846b75aeb2ab..e7839ee49e43 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -128,13 +128,12 @@ xfs_nfs_get_inode(
128 return ERR_PTR(-ESTALE); 128 return ERR_PTR(-ESTALE);
129 129
130 /* 130 /*
131 * The XFS_IGET_BULKSTAT means that an invalid inode number is just 131 * The XFS_IGET_UNTRUSTED means that an invalid inode number is just
132 * fine and not an indication of a corrupted filesystem. Because 132 * fine and not an indication of a corrupted filesystem as clients can
133 * clients can send any kind of invalid file handle, e.g. after 133 * send invalid file handles and we have to handle it gracefully..
134 * a restore on the server we have to deal with this case gracefully.
135 */ 134 */
136 error = xfs_iget(mp, NULL, ino, XFS_IGET_BULKSTAT, 135 error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED,
137 XFS_ILOCK_SHARED, &ip, 0); 136 XFS_ILOCK_SHARED, &ip);
138 if (error) { 137 if (error) {
139 /* 138 /*
140 * EINVAL means the inode cluster doesn't exist anymore. 139 * EINVAL means the inode cluster doesn't exist anymore.
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 699b60cbab9c..e59a81062830 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -679,10 +679,9 @@ xfs_ioc_bulkstat(
679 error = xfs_bulkstat_single(mp, &inlast, 679 error = xfs_bulkstat_single(mp, &inlast,
680 bulkreq.ubuffer, &done); 680 bulkreq.ubuffer, &done);
681 else /* XFS_IOC_FSBULKSTAT */ 681 else /* XFS_IOC_FSBULKSTAT */
682 error = xfs_bulkstat(mp, &inlast, &count, 682 error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
683 (bulkstat_one_pf)xfs_bulkstat_one, NULL, 683 sizeof(xfs_bstat_t), bulkreq.ubuffer,
684 sizeof(xfs_bstat_t), bulkreq.ubuffer, 684 &done);
685 BULKSTAT_FG_QUICK, &done);
686 685
687 if (error) 686 if (error)
688 return -error; 687 return -error;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 9287135e9bfc..52ed49e6465c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -237,15 +237,12 @@ xfs_bulkstat_one_compat(
237 xfs_ino_t ino, /* inode number to get data for */ 237 xfs_ino_t ino, /* inode number to get data for */
238 void __user *buffer, /* buffer to place output in */ 238 void __user *buffer, /* buffer to place output in */
239 int ubsize, /* size of buffer */ 239 int ubsize, /* size of buffer */
240 void *private_data, /* my private data */
241 xfs_daddr_t bno, /* starting bno of inode cluster */
242 int *ubused, /* bytes used by me */ 240 int *ubused, /* bytes used by me */
243 void *dibuff, /* on-disk inode buffer */
244 int *stat) /* BULKSTAT_RV_... */ 241 int *stat) /* BULKSTAT_RV_... */
245{ 242{
246 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, 243 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
247 xfs_bulkstat_one_fmt_compat, bno, 244 xfs_bulkstat_one_fmt_compat,
248 ubused, dibuff, stat); 245 ubused, stat);
249} 246}
250 247
251/* copied from xfs_ioctl.c */ 248/* copied from xfs_ioctl.c */
@@ -298,13 +295,11 @@ xfs_compat_ioc_bulkstat(
298 int res; 295 int res;
299 296
300 error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, 297 error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
301 sizeof(compat_xfs_bstat_t), 298 sizeof(compat_xfs_bstat_t), 0, &res);
302 NULL, 0, NULL, NULL, &res);
303 } else if (cmd == XFS_IOC_FSBULKSTAT_32) { 299 } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
304 error = xfs_bulkstat(mp, &inlast, &count, 300 error = xfs_bulkstat(mp, &inlast, &count,
305 xfs_bulkstat_one_compat, NULL, 301 xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
306 sizeof(compat_xfs_bstat_t), bulkreq.ubuffer, 302 bulkreq.ubuffer, &done);
307 BULKSTAT_FG_QUICK, &done);
308 } else 303 } else
309 error = XFS_ERROR(EINVAL); 304 error = XFS_ERROR(EINVAL);
310 if (error) 305 if (error)
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index f2d1718c9165..80938c736c27 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1883,7 +1883,6 @@ init_xfs_fs(void)
1883 goto out_cleanup_procfs; 1883 goto out_cleanup_procfs;
1884 1884
1885 vfs_initquota(); 1885 vfs_initquota();
1886 xfs_inode_shrinker_init();
1887 1886
1888 error = register_filesystem(&xfs_fs_type); 1887 error = register_filesystem(&xfs_fs_type);
1889 if (error) 1888 if (error)
@@ -1911,7 +1910,6 @@ exit_xfs_fs(void)
1911{ 1910{
1912 vfs_exitquota(); 1911 vfs_exitquota();
1913 unregister_filesystem(&xfs_fs_type); 1912 unregister_filesystem(&xfs_fs_type);
1914 xfs_inode_shrinker_destroy();
1915 xfs_sysctl_unregister(); 1913 xfs_sysctl_unregister();
1916 xfs_cleanup_procfs(); 1914 xfs_cleanup_procfs();
1917 xfs_buf_terminate(); 1915 xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index ef7f0218bccb..a51a07c3a70c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -144,6 +144,41 @@ restart:
144 return last_error; 144 return last_error;
145} 145}
146 146
147/*
148 * Select the next per-ag structure to iterate during the walk. The reclaim
149 * walk is optimised only to walk AGs with reclaimable inodes in them.
150 */
151static struct xfs_perag *
152xfs_inode_ag_iter_next_pag(
153 struct xfs_mount *mp,
154 xfs_agnumber_t *first,
155 int tag)
156{
157 struct xfs_perag *pag = NULL;
158
159 if (tag == XFS_ICI_RECLAIM_TAG) {
160 int found;
161 int ref;
162
163 spin_lock(&mp->m_perag_lock);
164 found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
165 (void **)&pag, *first, 1, tag);
166 if (found <= 0) {
167 spin_unlock(&mp->m_perag_lock);
168 return NULL;
169 }
170 *first = pag->pag_agno + 1;
171 /* open coded pag reference increment */
172 ref = atomic_inc_return(&pag->pag_ref);
173 spin_unlock(&mp->m_perag_lock);
174 trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
175 } else {
176 pag = xfs_perag_get(mp, *first);
177 (*first)++;
178 }
179 return pag;
180}
181
147int 182int
148xfs_inode_ag_iterator( 183xfs_inode_ag_iterator(
149 struct xfs_mount *mp, 184 struct xfs_mount *mp,
@@ -154,16 +189,15 @@ xfs_inode_ag_iterator(
154 int exclusive, 189 int exclusive,
155 int *nr_to_scan) 190 int *nr_to_scan)
156{ 191{
192 struct xfs_perag *pag;
157 int error = 0; 193 int error = 0;
158 int last_error = 0; 194 int last_error = 0;
159 xfs_agnumber_t ag; 195 xfs_agnumber_t ag;
160 int nr; 196 int nr;
161 197
162 nr = nr_to_scan ? *nr_to_scan : INT_MAX; 198 nr = nr_to_scan ? *nr_to_scan : INT_MAX;
163 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 199 ag = 0;
164 struct xfs_perag *pag; 200 while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) {
165
166 pag = xfs_perag_get(mp, ag);
167 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, 201 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
168 exclusive, &nr); 202 exclusive, &nr);
169 xfs_perag_put(pag); 203 xfs_perag_put(pag);
@@ -640,6 +674,17 @@ __xfs_inode_set_reclaim_tag(
640 radix_tree_tag_set(&pag->pag_ici_root, 674 radix_tree_tag_set(&pag->pag_ici_root,
641 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 675 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
642 XFS_ICI_RECLAIM_TAG); 676 XFS_ICI_RECLAIM_TAG);
677
678 if (!pag->pag_ici_reclaimable) {
679 /* propagate the reclaim tag up into the perag radix tree */
680 spin_lock(&ip->i_mount->m_perag_lock);
681 radix_tree_tag_set(&ip->i_mount->m_perag_tree,
682 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
683 XFS_ICI_RECLAIM_TAG);
684 spin_unlock(&ip->i_mount->m_perag_lock);
685 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
686 -1, _RET_IP_);
687 }
643 pag->pag_ici_reclaimable++; 688 pag->pag_ici_reclaimable++;
644} 689}
645 690
@@ -674,6 +719,16 @@ __xfs_inode_clear_reclaim_tag(
674 radix_tree_tag_clear(&pag->pag_ici_root, 719 radix_tree_tag_clear(&pag->pag_ici_root,
675 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 720 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
676 pag->pag_ici_reclaimable--; 721 pag->pag_ici_reclaimable--;
722 if (!pag->pag_ici_reclaimable) {
723 /* clear the reclaim tag from the perag radix tree */
724 spin_lock(&ip->i_mount->m_perag_lock);
725 radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
726 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
727 XFS_ICI_RECLAIM_TAG);
728 spin_unlock(&ip->i_mount->m_perag_lock);
729 trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
730 -1, _RET_IP_);
731 }
677} 732}
678 733
679/* 734/*
@@ -828,83 +883,52 @@ xfs_reclaim_inodes(
828 883
829/* 884/*
830 * Shrinker infrastructure. 885 * Shrinker infrastructure.
831 *
832 * This is all far more complex than it needs to be. It adds a global list of
833 * mounts because the shrinkers can only call a global context. We need to make
834 * the shrinkers pass a context to avoid the need for global state.
835 */ 886 */
836static LIST_HEAD(xfs_mount_list);
837static struct rw_semaphore xfs_mount_list_lock;
838
839static int 887static int
840xfs_reclaim_inode_shrink( 888xfs_reclaim_inode_shrink(
889 struct shrinker *shrink,
841 int nr_to_scan, 890 int nr_to_scan,
842 gfp_t gfp_mask) 891 gfp_t gfp_mask)
843{ 892{
844 struct xfs_mount *mp; 893 struct xfs_mount *mp;
845 struct xfs_perag *pag; 894 struct xfs_perag *pag;
846 xfs_agnumber_t ag; 895 xfs_agnumber_t ag;
847 int reclaimable = 0; 896 int reclaimable;
848 897
898 mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
849 if (nr_to_scan) { 899 if (nr_to_scan) {
850 if (!(gfp_mask & __GFP_FS)) 900 if (!(gfp_mask & __GFP_FS))
851 return -1; 901 return -1;
852 902
853 down_read(&xfs_mount_list_lock); 903 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
854 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
855 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
856 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); 904 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
857 if (nr_to_scan <= 0) 905 /* if we don't exhaust the scan, don't bother coming back */
858 break; 906 if (nr_to_scan > 0)
859 } 907 return -1;
860 up_read(&xfs_mount_list_lock); 908 }
861 }
862 909
863 down_read(&xfs_mount_list_lock); 910 reclaimable = 0;
864 list_for_each_entry(mp, &xfs_mount_list, m_mplist) { 911 ag = 0;
865 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 912 while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag,
866 pag = xfs_perag_get(mp, ag); 913 XFS_ICI_RECLAIM_TAG))) {
867 reclaimable += pag->pag_ici_reclaimable; 914 reclaimable += pag->pag_ici_reclaimable;
868 xfs_perag_put(pag); 915 xfs_perag_put(pag);
869 }
870 } 916 }
871 up_read(&xfs_mount_list_lock);
872 return reclaimable; 917 return reclaimable;
873} 918}
874 919
875static struct shrinker xfs_inode_shrinker = {
876 .shrink = xfs_reclaim_inode_shrink,
877 .seeks = DEFAULT_SEEKS,
878};
879
880void __init
881xfs_inode_shrinker_init(void)
882{
883 init_rwsem(&xfs_mount_list_lock);
884 register_shrinker(&xfs_inode_shrinker);
885}
886
887void
888xfs_inode_shrinker_destroy(void)
889{
890 ASSERT(list_empty(&xfs_mount_list));
891 unregister_shrinker(&xfs_inode_shrinker);
892}
893
894void 920void
895xfs_inode_shrinker_register( 921xfs_inode_shrinker_register(
896 struct xfs_mount *mp) 922 struct xfs_mount *mp)
897{ 923{
898 down_write(&xfs_mount_list_lock); 924 mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink;
899 list_add_tail(&mp->m_mplist, &xfs_mount_list); 925 mp->m_inode_shrink.seeks = DEFAULT_SEEKS;
900 up_write(&xfs_mount_list_lock); 926 register_shrinker(&mp->m_inode_shrink);
901} 927}
902 928
903void 929void
904xfs_inode_shrinker_unregister( 930xfs_inode_shrinker_unregister(
905 struct xfs_mount *mp) 931 struct xfs_mount *mp)
906{ 932{
907 down_write(&xfs_mount_list_lock); 933 unregister_shrinker(&mp->m_inode_shrink);
908 list_del(&mp->m_mplist);
909 up_write(&xfs_mount_list_lock);
910} 934}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index cdcbaaca9880..e28139aaa4aa 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -55,8 +55,6 @@ int xfs_inode_ag_iterator(struct xfs_mount *mp,
55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
56 int flags, int tag, int write_lock, int *nr_to_scan); 56 int flags, int tag, int write_lock, int *nr_to_scan);
57 57
58void xfs_inode_shrinker_init(void);
59void xfs_inode_shrinker_destroy(void);
60void xfs_inode_shrinker_register(struct xfs_mount *mp); 58void xfs_inode_shrinker_register(struct xfs_mount *mp);
61void xfs_inode_shrinker_unregister(struct xfs_mount *mp); 59void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
62 60
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 73d5aa117384..302820690904 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -124,7 +124,10 @@ DEFINE_EVENT(xfs_perag_class, name, \
124 unsigned long caller_ip), \ 124 unsigned long caller_ip), \
125 TP_ARGS(mp, agno, refcount, caller_ip)) 125 TP_ARGS(mp, agno, refcount, caller_ip))
126DEFINE_PERAG_REF_EVENT(xfs_perag_get); 126DEFINE_PERAG_REF_EVENT(xfs_perag_get);
127DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
127DEFINE_PERAG_REF_EVENT(xfs_perag_put); 128DEFINE_PERAG_REF_EVENT(xfs_perag_put);
129DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
130DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
128 131
129TRACE_EVENT(xfs_attr_list_node_descend, 132TRACE_EVENT(xfs_attr_list_node_descend,
130 TP_PROTO(struct xfs_attr_list_context *ctx, 133 TP_PROTO(struct xfs_attr_list_context *ctx,
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 2d8b7bc792c9..67c018392d62 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -69,7 +69,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
69 69
70STATIC int xfs_qm_init_quotainos(xfs_mount_t *); 70STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
71STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); 71STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
72STATIC int xfs_qm_shake(int, gfp_t); 72STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t);
73 73
74static struct shrinker xfs_qm_shaker = { 74static struct shrinker xfs_qm_shaker = {
75 .shrink = xfs_qm_shake, 75 .shrink = xfs_qm_shake,
@@ -1632,10 +1632,7 @@ xfs_qm_dqusage_adjust(
1632 xfs_ino_t ino, /* inode number to get data for */ 1632 xfs_ino_t ino, /* inode number to get data for */
1633 void __user *buffer, /* not used */ 1633 void __user *buffer, /* not used */
1634 int ubsize, /* not used */ 1634 int ubsize, /* not used */
1635 void *private_data, /* not used */
1636 xfs_daddr_t bno, /* starting block of inode cluster */
1637 int *ubused, /* not used */ 1635 int *ubused, /* not used */
1638 void *dip, /* on-disk inode pointer (not used) */
1639 int *res) /* result code value */ 1636 int *res) /* result code value */
1640{ 1637{
1641 xfs_inode_t *ip; 1638 xfs_inode_t *ip;
@@ -1660,7 +1657,7 @@ xfs_qm_dqusage_adjust(
1660 * the case in all other instances. It's OK that we do this because 1657 * the case in all other instances. It's OK that we do this because
1661 * quotacheck is done only at mount time. 1658 * quotacheck is done only at mount time.
1662 */ 1659 */
1663 if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) { 1660 if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) {
1664 *res = BULKSTAT_RV_NOTHING; 1661 *res = BULKSTAT_RV_NOTHING;
1665 return error; 1662 return error;
1666 } 1663 }
@@ -1796,12 +1793,13 @@ xfs_qm_quotacheck(
1796 * Iterate thru all the inodes in the file system, 1793 * Iterate thru all the inodes in the file system,
1797 * adjusting the corresponding dquot counters in core. 1794 * adjusting the corresponding dquot counters in core.
1798 */ 1795 */
1799 if ((error = xfs_bulkstat(mp, &lastino, &count, 1796 error = xfs_bulkstat(mp, &lastino, &count,
1800 xfs_qm_dqusage_adjust, NULL, 1797 xfs_qm_dqusage_adjust,
1801 structsz, NULL, BULKSTAT_FG_IGET, &done))) 1798 structsz, NULL, &done);
1799 if (error)
1802 break; 1800 break;
1803 1801
1804 } while (! done); 1802 } while (!done);
1805 1803
1806 /* 1804 /*
1807 * We've made all the changes that we need to make incore. 1805 * We've made all the changes that we need to make incore.
@@ -1889,14 +1887,14 @@ xfs_qm_init_quotainos(
1889 mp->m_sb.sb_uquotino != NULLFSINO) { 1887 mp->m_sb.sb_uquotino != NULLFSINO) {
1890 ASSERT(mp->m_sb.sb_uquotino > 0); 1888 ASSERT(mp->m_sb.sb_uquotino > 0);
1891 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 1889 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
1892 0, 0, &uip, 0))) 1890 0, 0, &uip)))
1893 return XFS_ERROR(error); 1891 return XFS_ERROR(error);
1894 } 1892 }
1895 if (XFS_IS_OQUOTA_ON(mp) && 1893 if (XFS_IS_OQUOTA_ON(mp) &&
1896 mp->m_sb.sb_gquotino != NULLFSINO) { 1894 mp->m_sb.sb_gquotino != NULLFSINO) {
1897 ASSERT(mp->m_sb.sb_gquotino > 0); 1895 ASSERT(mp->m_sb.sb_gquotino > 0);
1898 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 1896 if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
1899 0, 0, &gip, 0))) { 1897 0, 0, &gip))) {
1900 if (uip) 1898 if (uip)
1901 IRELE(uip); 1899 IRELE(uip);
1902 return XFS_ERROR(error); 1900 return XFS_ERROR(error);
@@ -2119,7 +2117,10 @@ xfs_qm_shake_freelist(
2119 */ 2117 */
2120/* ARGSUSED */ 2118/* ARGSUSED */
2121STATIC int 2119STATIC int
2122xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask) 2120xfs_qm_shake(
2121 struct shrinker *shrink,
2122 int nr_to_scan,
2123 gfp_t gfp_mask)
2123{ 2124{
2124 int ndqused, nfree, n; 2125 int ndqused, nfree, n;
2125 2126
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 92b002f1805f..b4487764e923 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -262,7 +262,7 @@ xfs_qm_scall_trunc_qfiles(
262 } 262 }
263 263
264 if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) { 264 if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) {
265 error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0); 265 error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip);
266 if (!error) { 266 if (!error) {
267 error = xfs_truncate_file(mp, qip); 267 error = xfs_truncate_file(mp, qip);
268 IRELE(qip); 268 IRELE(qip);
@@ -271,7 +271,7 @@ xfs_qm_scall_trunc_qfiles(
271 271
272 if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) && 272 if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) &&
273 mp->m_sb.sb_gquotino != NULLFSINO) { 273 mp->m_sb.sb_gquotino != NULLFSINO) {
274 error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0); 274 error2 = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip);
275 if (!error2) { 275 if (!error2) {
276 error2 = xfs_truncate_file(mp, qip); 276 error2 = xfs_truncate_file(mp, qip);
277 IRELE(qip); 277 IRELE(qip);
@@ -417,12 +417,12 @@ xfs_qm_scall_getqstat(
417 } 417 }
418 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { 418 if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
419 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 419 if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
420 0, 0, &uip, 0) == 0) 420 0, 0, &uip) == 0)
421 tempuqip = B_TRUE; 421 tempuqip = B_TRUE;
422 } 422 }
423 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) { 423 if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
424 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 424 if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
425 0, 0, &gip, 0) == 0) 425 0, 0, &gip) == 0)
426 tempgqip = B_TRUE; 426 tempgqip = B_TRUE;
427 } 427 }
428 if (uip) { 428 if (uip) {
@@ -1109,10 +1109,7 @@ xfs_qm_internalqcheck_adjust(
1109 xfs_ino_t ino, /* inode number to get data for */ 1109 xfs_ino_t ino, /* inode number to get data for */
1110 void __user *buffer, /* not used */ 1110 void __user *buffer, /* not used */
1111 int ubsize, /* not used */ 1111 int ubsize, /* not used */
1112 void *private_data, /* not used */
1113 xfs_daddr_t bno, /* starting block of inode cluster */
1114 int *ubused, /* not used */ 1112 int *ubused, /* not used */
1115 void *dip, /* not used */
1116 int *res) /* bulkstat result code */ 1113 int *res) /* bulkstat result code */
1117{ 1114{
1118 xfs_inode_t *ip; 1115 xfs_inode_t *ip;
@@ -1134,7 +1131,7 @@ xfs_qm_internalqcheck_adjust(
1134 ipreleased = B_FALSE; 1131 ipreleased = B_FALSE;
1135 again: 1132 again:
1136 lock_flags = XFS_ILOCK_SHARED; 1133 lock_flags = XFS_ILOCK_SHARED;
1137 if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip, bno))) { 1134 if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) {
1138 *res = BULKSTAT_RV_NOTHING; 1135 *res = BULKSTAT_RV_NOTHING;
1139 return (error); 1136 return (error);
1140 } 1137 }
@@ -1205,15 +1202,15 @@ xfs_qm_internalqcheck(
1205 * Iterate thru all the inodes in the file system, 1202 * Iterate thru all the inodes in the file system,
1206 * adjusting the corresponding dquot counters 1203 * adjusting the corresponding dquot counters
1207 */ 1204 */
1208 if ((error = xfs_bulkstat(mp, &lastino, &count, 1205 error = xfs_bulkstat(mp, &lastino, &count,
1209 xfs_qm_internalqcheck_adjust, NULL, 1206 xfs_qm_internalqcheck_adjust,
1210 0, NULL, BULKSTAT_FG_IGET, &done))) { 1207 0, NULL, &done);
1208 if (error) {
1209 cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
1211 break; 1210 break;
1212 } 1211 }
1213 } while (! done); 1212 } while (!done);
1214 if (error) { 1213
1215 cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error);
1216 }
1217 cmn_err(CE_DEBUG, "Checking results against system dquots"); 1214 cmn_err(CE_DEBUG, "Checking results against system dquots");
1218 for (i = 0; i < qmtest_hashmask; i++) { 1215 for (i = 0; i < qmtest_hashmask; i++) {
1219 xfs_dqtest_t *d, *n; 1216 xfs_dqtest_t *d, *n;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 5bba29a07812..7f159d2a429a 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -69,7 +69,9 @@ xfs_swapext(
69 goto out; 69 goto out;
70 } 70 }
71 71
72 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) { 72 if (!(file->f_mode & FMODE_WRITE) ||
73 !(file->f_mode & FMODE_READ) ||
74 (file->f_flags & O_APPEND)) {
73 error = XFS_ERROR(EBADF); 75 error = XFS_ERROR(EBADF);
74 goto out_put_file; 76 goto out_put_file;
75 } 77 }
@@ -81,6 +83,7 @@ xfs_swapext(
81 } 83 }
82 84
83 if (!(tmp_file->f_mode & FMODE_WRITE) || 85 if (!(tmp_file->f_mode & FMODE_WRITE) ||
86 !(tmp_file->f_mode & FMODE_READ) ||
84 (tmp_file->f_flags & O_APPEND)) { 87 (tmp_file->f_flags & O_APPEND)) {
85 error = XFS_ERROR(EBADF); 88 error = XFS_ERROR(EBADF);
86 goto out_put_tmp_file; 89 goto out_put_tmp_file;
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 9d884c127bb9..c7142a064c48 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1203,6 +1203,63 @@ error0:
1203 return error; 1203 return error;
1204} 1204}
1205 1205
1206STATIC int
1207xfs_imap_lookup(
1208 struct xfs_mount *mp,
1209 struct xfs_trans *tp,
1210 xfs_agnumber_t agno,
1211 xfs_agino_t agino,
1212 xfs_agblock_t agbno,
1213 xfs_agblock_t *chunk_agbno,
1214 xfs_agblock_t *offset_agbno,
1215 int flags)
1216{
1217 struct xfs_inobt_rec_incore rec;
1218 struct xfs_btree_cur *cur;
1219 struct xfs_buf *agbp;
1220 xfs_agino_t startino;
1221 int error;
1222 int i;
1223
1224 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1225 if (error) {
1226 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1227 "xfs_ialloc_read_agi() returned "
1228 "error %d, agno %d",
1229 error, agno);
1230 return error;
1231 }
1232
1233 /*
1234 * derive and lookup the exact inode record for the given agino. If the
1235 * record cannot be found, then it's an invalid inode number and we
1236 * should abort.
1237 */
1238 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1239 startino = agino & ~(XFS_IALLOC_INODES(mp) - 1);
1240 error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
1241 if (!error) {
1242 if (i)
1243 error = xfs_inobt_get_rec(cur, &rec, &i);
1244 if (!error && i == 0)
1245 error = EINVAL;
1246 }
1247
1248 xfs_trans_brelse(tp, agbp);
1249 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1250 if (error)
1251 return error;
1252
1253 /* for untrusted inodes check it is allocated first */
1254 if ((flags & XFS_IGET_UNTRUSTED) &&
1255 (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
1256 return EINVAL;
1257
1258 *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
1259 *offset_agbno = agbno - *chunk_agbno;
1260 return 0;
1261}
1262
1206/* 1263/*
1207 * Return the location of the inode in imap, for mapping it into a buffer. 1264 * Return the location of the inode in imap, for mapping it into a buffer.
1208 */ 1265 */
@@ -1235,8 +1292,11 @@ xfs_imap(
1235 if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks || 1292 if (agno >= mp->m_sb.sb_agcount || agbno >= mp->m_sb.sb_agblocks ||
1236 ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1293 ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1237#ifdef DEBUG 1294#ifdef DEBUG
1238 /* no diagnostics for bulkstat, ino comes from userspace */ 1295 /*
1239 if (flags & XFS_IGET_BULKSTAT) 1296 * Don't output diagnostic information for untrusted inodes
1297 * as they can be invalid without implying corruption.
1298 */
1299 if (flags & XFS_IGET_UNTRUSTED)
1240 return XFS_ERROR(EINVAL); 1300 return XFS_ERROR(EINVAL);
1241 if (agno >= mp->m_sb.sb_agcount) { 1301 if (agno >= mp->m_sb.sb_agcount) {
1242 xfs_fs_cmn_err(CE_ALERT, mp, 1302 xfs_fs_cmn_err(CE_ALERT, mp,
@@ -1263,6 +1323,23 @@ xfs_imap(
1263 return XFS_ERROR(EINVAL); 1323 return XFS_ERROR(EINVAL);
1264 } 1324 }
1265 1325
1326 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
1327
1328 /*
1329 * For bulkstat and handle lookups, we have an untrusted inode number
1330 * that we have to verify is valid. We cannot do this just by reading
1331 * the inode buffer as it may have been unlinked and removed leaving
1332 * inodes in stale state on disk. Hence we have to do a btree lookup
1333 * in all cases where an untrusted inode number is passed.
1334 */
1335 if (flags & XFS_IGET_UNTRUSTED) {
1336 error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
1337 &chunk_agbno, &offset_agbno, flags);
1338 if (error)
1339 return error;
1340 goto out_map;
1341 }
1342
1266 /* 1343 /*
1267 * If the inode cluster size is the same as the blocksize or 1344 * If the inode cluster size is the same as the blocksize or
1268 * smaller we get to the buffer by simple arithmetics. 1345 * smaller we get to the buffer by simple arithmetics.
@@ -1277,24 +1354,6 @@ xfs_imap(
1277 return 0; 1354 return 0;
1278 } 1355 }
1279 1356
1280 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
1281
1282 /*
1283 * If we get a block number passed from bulkstat we can use it to
1284 * find the buffer easily.
1285 */
1286 if (imap->im_blkno) {
1287 offset = XFS_INO_TO_OFFSET(mp, ino);
1288 ASSERT(offset < mp->m_sb.sb_inopblock);
1289
1290 cluster_agbno = xfs_daddr_to_agbno(mp, imap->im_blkno);
1291 offset += (agbno - cluster_agbno) * mp->m_sb.sb_inopblock;
1292
1293 imap->im_len = XFS_FSB_TO_BB(mp, blks_per_cluster);
1294 imap->im_boffset = (ushort)(offset << mp->m_sb.sb_inodelog);
1295 return 0;
1296 }
1297
1298 /* 1357 /*
1299 * If the inode chunks are aligned then use simple maths to 1358 * If the inode chunks are aligned then use simple maths to
1300 * find the location. Otherwise we have to do a btree 1359 * find the location. Otherwise we have to do a btree
@@ -1304,50 +1363,13 @@ xfs_imap(
1304 offset_agbno = agbno & mp->m_inoalign_mask; 1363 offset_agbno = agbno & mp->m_inoalign_mask;
1305 chunk_agbno = agbno - offset_agbno; 1364 chunk_agbno = agbno - offset_agbno;
1306 } else { 1365 } else {
1307 xfs_btree_cur_t *cur; /* inode btree cursor */ 1366 error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
1308 xfs_inobt_rec_incore_t chunk_rec; 1367 &chunk_agbno, &offset_agbno, flags);
1309 xfs_buf_t *agbp; /* agi buffer */
1310 int i; /* temp state */
1311
1312 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1313 if (error) {
1314 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1315 "xfs_ialloc_read_agi() returned "
1316 "error %d, agno %d",
1317 error, agno);
1318 return error;
1319 }
1320
1321 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1322 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
1323 if (error) {
1324 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1325 "xfs_inobt_lookup() failed");
1326 goto error0;
1327 }
1328
1329 error = xfs_inobt_get_rec(cur, &chunk_rec, &i);
1330 if (error) {
1331 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1332 "xfs_inobt_get_rec() failed");
1333 goto error0;
1334 }
1335 if (i == 0) {
1336#ifdef DEBUG
1337 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1338 "xfs_inobt_get_rec() failed");
1339#endif /* DEBUG */
1340 error = XFS_ERROR(EINVAL);
1341 }
1342 error0:
1343 xfs_trans_brelse(tp, agbp);
1344 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1345 if (error) 1368 if (error)
1346 return error; 1369 return error;
1347 chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino);
1348 offset_agbno = agbno - chunk_agbno;
1349 } 1370 }
1350 1371
1372out_map:
1351 ASSERT(agbno >= chunk_agbno); 1373 ASSERT(agbno >= chunk_agbno);
1352 cluster_agbno = chunk_agbno + 1374 cluster_agbno = chunk_agbno +
1353 ((offset_agbno / blks_per_cluster) * blks_per_cluster); 1375 ((offset_agbno / blks_per_cluster) * blks_per_cluster);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 75df75f43d48..8f8b91be2c99 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -259,7 +259,6 @@ xfs_iget_cache_miss(
259 xfs_trans_t *tp, 259 xfs_trans_t *tp,
260 xfs_ino_t ino, 260 xfs_ino_t ino,
261 struct xfs_inode **ipp, 261 struct xfs_inode **ipp,
262 xfs_daddr_t bno,
263 int flags, 262 int flags,
264 int lock_flags) 263 int lock_flags)
265{ 264{
@@ -272,7 +271,7 @@ xfs_iget_cache_miss(
272 if (!ip) 271 if (!ip)
273 return ENOMEM; 272 return ENOMEM;
274 273
275 error = xfs_iread(mp, tp, ip, bno, flags); 274 error = xfs_iread(mp, tp, ip, flags);
276 if (error) 275 if (error)
277 goto out_destroy; 276 goto out_destroy;
278 277
@@ -358,8 +357,6 @@ out_destroy:
358 * within the file system for the inode being requested. 357 * within the file system for the inode being requested.
359 * lock_flags -- flags indicating how to lock the inode. See the comment 358 * lock_flags -- flags indicating how to lock the inode. See the comment
360 * for xfs_ilock() for a list of valid values. 359 * for xfs_ilock() for a list of valid values.
361 * bno -- the block number starting the buffer containing the inode,
362 * if known (as by bulkstat), else 0.
363 */ 360 */
364int 361int
365xfs_iget( 362xfs_iget(
@@ -368,8 +365,7 @@ xfs_iget(
368 xfs_ino_t ino, 365 xfs_ino_t ino,
369 uint flags, 366 uint flags,
370 uint lock_flags, 367 uint lock_flags,
371 xfs_inode_t **ipp, 368 xfs_inode_t **ipp)
372 xfs_daddr_t bno)
373{ 369{
374 xfs_inode_t *ip; 370 xfs_inode_t *ip;
375 int error; 371 int error;
@@ -397,7 +393,7 @@ again:
397 read_unlock(&pag->pag_ici_lock); 393 read_unlock(&pag->pag_ici_lock);
398 XFS_STATS_INC(xs_ig_missed); 394 XFS_STATS_INC(xs_ig_missed);
399 395
400 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno, 396 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
401 flags, lock_flags); 397 flags, lock_flags);
402 if (error) 398 if (error)
403 goto out_error_or_again; 399 goto out_error_or_again;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d53c39de7d05..b76a829d7e20 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -177,7 +177,7 @@ xfs_imap_to_bp(
177 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 177 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
178 XFS_ERRTAG_ITOBP_INOTOBP, 178 XFS_ERRTAG_ITOBP_INOTOBP,
179 XFS_RANDOM_ITOBP_INOTOBP))) { 179 XFS_RANDOM_ITOBP_INOTOBP))) {
180 if (iget_flags & XFS_IGET_BULKSTAT) { 180 if (iget_flags & XFS_IGET_UNTRUSTED) {
181 xfs_trans_brelse(tp, bp); 181 xfs_trans_brelse(tp, bp);
182 return XFS_ERROR(EINVAL); 182 return XFS_ERROR(EINVAL);
183 } 183 }
@@ -787,7 +787,6 @@ xfs_iread(
787 xfs_mount_t *mp, 787 xfs_mount_t *mp,
788 xfs_trans_t *tp, 788 xfs_trans_t *tp,
789 xfs_inode_t *ip, 789 xfs_inode_t *ip,
790 xfs_daddr_t bno,
791 uint iget_flags) 790 uint iget_flags)
792{ 791{
793 xfs_buf_t *bp; 792 xfs_buf_t *bp;
@@ -797,11 +796,9 @@ xfs_iread(
797 /* 796 /*
798 * Fill in the location information in the in-core inode. 797 * Fill in the location information in the in-core inode.
799 */ 798 */
800 ip->i_imap.im_blkno = bno;
801 error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); 799 error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags);
802 if (error) 800 if (error)
803 return error; 801 return error;
804 ASSERT(bno == 0 || bno == ip->i_imap.im_blkno);
805 802
806 /* 803 /*
807 * Get pointers to the on-disk inode and the buffer containing it. 804 * Get pointers to the on-disk inode and the buffer containing it.
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 9965e40a4615..78550df13cd6 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -442,7 +442,7 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
442 * xfs_iget.c prototypes. 442 * xfs_iget.c prototypes.
443 */ 443 */
444int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, 444int xfs_iget(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
445 uint, uint, xfs_inode_t **, xfs_daddr_t); 445 uint, uint, xfs_inode_t **);
446void xfs_iput(xfs_inode_t *, uint); 446void xfs_iput(xfs_inode_t *, uint);
447void xfs_iput_new(xfs_inode_t *, uint); 447void xfs_iput_new(xfs_inode_t *, uint);
448void xfs_ilock(xfs_inode_t *, uint); 448void xfs_ilock(xfs_inode_t *, uint);
@@ -500,7 +500,7 @@ do { \
500 * Flags for xfs_iget() 500 * Flags for xfs_iget()
501 */ 501 */
502#define XFS_IGET_CREATE 0x1 502#define XFS_IGET_CREATE 0x1
503#define XFS_IGET_BULKSTAT 0x2 503#define XFS_IGET_UNTRUSTED 0x2
504 504
505int xfs_inotobp(struct xfs_mount *, struct xfs_trans *, 505int xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
506 xfs_ino_t, struct xfs_dinode **, 506 xfs_ino_t, struct xfs_dinode **,
@@ -509,7 +509,7 @@ int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
509 struct xfs_inode *, struct xfs_dinode **, 509 struct xfs_inode *, struct xfs_dinode **,
510 struct xfs_buf **, uint); 510 struct xfs_buf **, uint);
511int xfs_iread(struct xfs_mount *, struct xfs_trans *, 511int xfs_iread(struct xfs_mount *, struct xfs_trans *,
512 struct xfs_inode *, xfs_daddr_t, uint); 512 struct xfs_inode *, uint);
513void xfs_dinode_to_disk(struct xfs_dinode *, 513void xfs_dinode_to_disk(struct xfs_dinode *,
514 struct xfs_icdinode *); 514 struct xfs_icdinode *);
515void xfs_idestroy_fork(struct xfs_inode *, int); 515void xfs_idestroy_fork(struct xfs_inode *, int);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index b1b801e4a28e..2b86f8610512 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -49,24 +49,40 @@ xfs_internal_inum(
49 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); 49 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino)));
50} 50}
51 51
52STATIC int 52/*
53xfs_bulkstat_one_iget( 53 * Return stat information for one inode.
54 xfs_mount_t *mp, /* mount point for filesystem */ 54 * Return 0 if ok, else errno.
55 xfs_ino_t ino, /* inode number to get data for */ 55 */
56 xfs_daddr_t bno, /* starting bno of inode cluster */ 56int
57 xfs_bstat_t *buf, /* return buffer */ 57xfs_bulkstat_one_int(
58 int *stat) /* BULKSTAT_RV_... */ 58 struct xfs_mount *mp, /* mount point for filesystem */
59 xfs_ino_t ino, /* inode to get data for */
60 void __user *buffer, /* buffer to place output in */
61 int ubsize, /* size of buffer */
62 bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
63 int *ubused, /* bytes used by me */
64 int *stat) /* BULKSTAT_RV_... */
59{ 65{
60 xfs_icdinode_t *dic; /* dinode core info pointer */ 66 struct xfs_icdinode *dic; /* dinode core info pointer */
61 xfs_inode_t *ip; /* incore inode pointer */ 67 struct xfs_inode *ip; /* incore inode pointer */
62 struct inode *inode; 68 struct inode *inode;
63 int error; 69 struct xfs_bstat *buf; /* return buffer */
70 int error = 0; /* error value */
71
72 *stat = BULKSTAT_RV_NOTHING;
73
74 if (!buffer || xfs_internal_inum(mp, ino))
75 return XFS_ERROR(EINVAL);
76
77 buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
78 if (!buf)
79 return XFS_ERROR(ENOMEM);
64 80
65 error = xfs_iget(mp, NULL, ino, 81 error = xfs_iget(mp, NULL, ino,
66 XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno); 82 XFS_IGET_UNTRUSTED, XFS_ILOCK_SHARED, &ip);
67 if (error) { 83 if (error) {
68 *stat = BULKSTAT_RV_NOTHING; 84 *stat = BULKSTAT_RV_NOTHING;
69 return error; 85 goto out_free;
70 } 86 }
71 87
72 ASSERT(ip != NULL); 88 ASSERT(ip != NULL);
@@ -127,77 +143,16 @@ xfs_bulkstat_one_iget(
127 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks; 143 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks;
128 break; 144 break;
129 } 145 }
130
131 xfs_iput(ip, XFS_ILOCK_SHARED); 146 xfs_iput(ip, XFS_ILOCK_SHARED);
132 return error;
133}
134 147
135STATIC void 148 error = formatter(buffer, ubsize, ubused, buf);
136xfs_bulkstat_one_dinode(
137 xfs_mount_t *mp, /* mount point for filesystem */
138 xfs_ino_t ino, /* inode number to get data for */
139 xfs_dinode_t *dic, /* dinode inode pointer */
140 xfs_bstat_t *buf) /* return buffer */
141{
142 /*
143 * The inode format changed when we moved the link count and
144 * made it 32 bits long. If this is an old format inode,
145 * convert it in memory to look like a new one. If it gets
146 * flushed to disk we will convert back before flushing or
147 * logging it. We zero out the new projid field and the old link
148 * count field. We'll handle clearing the pad field (the remains
149 * of the old uuid field) when we actually convert the inode to
150 * the new format. We don't change the version number so that we
151 * can distinguish this from a real new format inode.
152 */
153 if (dic->di_version == 1) {
154 buf->bs_nlink = be16_to_cpu(dic->di_onlink);
155 buf->bs_projid = 0;
156 } else {
157 buf->bs_nlink = be32_to_cpu(dic->di_nlink);
158 buf->bs_projid = be16_to_cpu(dic->di_projid);
159 }
160 149
161 buf->bs_ino = ino; 150 if (!error)
162 buf->bs_mode = be16_to_cpu(dic->di_mode); 151 *stat = BULKSTAT_RV_DIDONE;
163 buf->bs_uid = be32_to_cpu(dic->di_uid);
164 buf->bs_gid = be32_to_cpu(dic->di_gid);
165 buf->bs_size = be64_to_cpu(dic->di_size);
166 buf->bs_atime.tv_sec = be32_to_cpu(dic->di_atime.t_sec);
167 buf->bs_atime.tv_nsec = be32_to_cpu(dic->di_atime.t_nsec);
168 buf->bs_mtime.tv_sec = be32_to_cpu(dic->di_mtime.t_sec);
169 buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec);
170 buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec);
171 buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec);
172 buf->bs_xflags = xfs_dic2xflags(dic);
173 buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog;
174 buf->bs_extents = be32_to_cpu(dic->di_nextents);
175 buf->bs_gen = be32_to_cpu(dic->di_gen);
176 memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
177 buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask);
178 buf->bs_dmstate = be16_to_cpu(dic->di_dmstate);
179 buf->bs_aextents = be16_to_cpu(dic->di_anextents);
180 buf->bs_forkoff = XFS_DFORK_BOFF(dic);
181 152
182 switch (dic->di_format) { 153 out_free:
183 case XFS_DINODE_FMT_DEV: 154 kmem_free(buf);
184 buf->bs_rdev = xfs_dinode_get_rdev(dic); 155 return error;
185 buf->bs_blksize = BLKDEV_IOSIZE;
186 buf->bs_blocks = 0;
187 break;
188 case XFS_DINODE_FMT_LOCAL:
189 case XFS_DINODE_FMT_UUID:
190 buf->bs_rdev = 0;
191 buf->bs_blksize = mp->m_sb.sb_blocksize;
192 buf->bs_blocks = 0;
193 break;
194 case XFS_DINODE_FMT_EXTENTS:
195 case XFS_DINODE_FMT_BTREE:
196 buf->bs_rdev = 0;
197 buf->bs_blksize = mp->m_sb.sb_blocksize;
198 buf->bs_blocks = be64_to_cpu(dic->di_nblocks);
199 break;
200 }
201} 156}
202 157
203/* Return 0 on success or positive error */ 158/* Return 0 on success or positive error */
@@ -217,118 +172,17 @@ xfs_bulkstat_one_fmt(
217 return 0; 172 return 0;
218} 173}
219 174
220/*
221 * Return stat information for one inode.
222 * Return 0 if ok, else errno.
223 */
224int /* error status */
225xfs_bulkstat_one_int(
226 xfs_mount_t *mp, /* mount point for filesystem */
227 xfs_ino_t ino, /* inode number to get data for */
228 void __user *buffer, /* buffer to place output in */
229 int ubsize, /* size of buffer */
230 bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
231 xfs_daddr_t bno, /* starting bno of inode cluster */
232 int *ubused, /* bytes used by me */
233 void *dibuff, /* on-disk inode buffer */
234 int *stat) /* BULKSTAT_RV_... */
235{
236 xfs_bstat_t *buf; /* return buffer */
237 int error = 0; /* error value */
238 xfs_dinode_t *dip; /* dinode inode pointer */
239
240 dip = (xfs_dinode_t *)dibuff;
241 *stat = BULKSTAT_RV_NOTHING;
242
243 if (!buffer || xfs_internal_inum(mp, ino))
244 return XFS_ERROR(EINVAL);
245
246 buf = kmem_alloc(sizeof(*buf), KM_SLEEP);
247
248 if (dip == NULL) {
249 /* We're not being passed a pointer to a dinode. This happens
250 * if BULKSTAT_FG_IGET is selected. Do the iget.
251 */
252 error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat);
253 if (error)
254 goto out_free;
255 } else {
256 xfs_bulkstat_one_dinode(mp, ino, dip, buf);
257 }
258
259 error = formatter(buffer, ubsize, ubused, buf);
260 if (error)
261 goto out_free;
262
263 *stat = BULKSTAT_RV_DIDONE;
264
265 out_free:
266 kmem_free(buf);
267 return error;
268}
269
270int 175int
271xfs_bulkstat_one( 176xfs_bulkstat_one(
272 xfs_mount_t *mp, /* mount point for filesystem */ 177 xfs_mount_t *mp, /* mount point for filesystem */
273 xfs_ino_t ino, /* inode number to get data for */ 178 xfs_ino_t ino, /* inode number to get data for */
274 void __user *buffer, /* buffer to place output in */ 179 void __user *buffer, /* buffer to place output in */
275 int ubsize, /* size of buffer */ 180 int ubsize, /* size of buffer */
276 void *private_data, /* my private data */
277 xfs_daddr_t bno, /* starting bno of inode cluster */
278 int *ubused, /* bytes used by me */ 181 int *ubused, /* bytes used by me */
279 void *dibuff, /* on-disk inode buffer */
280 int *stat) /* BULKSTAT_RV_... */ 182 int *stat) /* BULKSTAT_RV_... */
281{ 183{
282 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, 184 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
283 xfs_bulkstat_one_fmt, bno, 185 xfs_bulkstat_one_fmt, ubused, stat);
284 ubused, dibuff, stat);
285}
286
287/*
288 * Test to see whether we can use the ondisk inode directly, based
289 * on the given bulkstat flags, filling in dipp accordingly.
290 * Returns zero if the inode is dodgey.
291 */
292STATIC int
293xfs_bulkstat_use_dinode(
294 xfs_mount_t *mp,
295 int flags,
296 xfs_buf_t *bp,
297 int clustidx,
298 xfs_dinode_t **dipp)
299{
300 xfs_dinode_t *dip;
301 unsigned int aformat;
302
303 *dipp = NULL;
304 if (!bp || (flags & BULKSTAT_FG_IGET))
305 return 1;
306 dip = (xfs_dinode_t *)
307 xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog);
308 /*
309 * Check the buffer containing the on-disk inode for di_mode == 0.
310 * This is to prevent xfs_bulkstat from picking up just reclaimed
311 * inodes that have their in-core state initialized but not flushed
312 * to disk yet. This is a temporary hack that would require a proper
313 * fix in the future.
314 */
315 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
316 !XFS_DINODE_GOOD_VERSION(dip->di_version) ||
317 !dip->di_mode)
318 return 0;
319 if (flags & BULKSTAT_FG_QUICK) {
320 *dipp = dip;
321 return 1;
322 }
323 /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */
324 aformat = dip->di_aformat;
325 if ((XFS_DFORK_Q(dip) == 0) ||
326 (aformat == XFS_DINODE_FMT_LOCAL) ||
327 (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) {
328 *dipp = dip;
329 return 1;
330 }
331 return 1;
332} 186}
333 187
334#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) 188#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size)
@@ -342,10 +196,8 @@ xfs_bulkstat(
342 xfs_ino_t *lastinop, /* last inode returned */ 196 xfs_ino_t *lastinop, /* last inode returned */
343 int *ubcountp, /* size of buffer/count returned */ 197 int *ubcountp, /* size of buffer/count returned */
344 bulkstat_one_pf formatter, /* func that'd fill a single buf */ 198 bulkstat_one_pf formatter, /* func that'd fill a single buf */
345 void *private_data,/* private data for formatter */
346 size_t statstruct_size, /* sizeof struct filling */ 199 size_t statstruct_size, /* sizeof struct filling */
347 char __user *ubuffer, /* buffer with inode stats */ 200 char __user *ubuffer, /* buffer with inode stats */
348 int flags, /* defined in xfs_itable.h */
349 int *done) /* 1 if there are more stats to get */ 201 int *done) /* 1 if there are more stats to get */
350{ 202{
351 xfs_agblock_t agbno=0;/* allocation group block number */ 203 xfs_agblock_t agbno=0;/* allocation group block number */
@@ -380,14 +232,12 @@ xfs_bulkstat(
380 int ubelem; /* spaces used in user's buffer */ 232 int ubelem; /* spaces used in user's buffer */
381 int ubused; /* bytes used by formatter */ 233 int ubused; /* bytes used by formatter */
382 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ 234 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */
383 xfs_dinode_t *dip; /* ptr into bp for specific inode */
384 235
385 /* 236 /*
386 * Get the last inode value, see if there's nothing to do. 237 * Get the last inode value, see if there's nothing to do.
387 */ 238 */
388 ino = (xfs_ino_t)*lastinop; 239 ino = (xfs_ino_t)*lastinop;
389 lastino = ino; 240 lastino = ino;
390 dip = NULL;
391 agno = XFS_INO_TO_AGNO(mp, ino); 241 agno = XFS_INO_TO_AGNO(mp, ino);
392 agino = XFS_INO_TO_AGINO(mp, ino); 242 agino = XFS_INO_TO_AGINO(mp, ino);
393 if (agno >= mp->m_sb.sb_agcount || 243 if (agno >= mp->m_sb.sb_agcount ||
@@ -612,37 +462,6 @@ xfs_bulkstat(
612 irbp->ir_startino) + 462 irbp->ir_startino) +
613 ((chunkidx & nimask) >> 463 ((chunkidx & nimask) >>
614 mp->m_sb.sb_inopblog); 464 mp->m_sb.sb_inopblog);
615
616 if (flags & (BULKSTAT_FG_QUICK |
617 BULKSTAT_FG_INLINE)) {
618 int offset;
619
620 ino = XFS_AGINO_TO_INO(mp, agno,
621 agino);
622 bno = XFS_AGB_TO_DADDR(mp, agno,
623 agbno);
624
625 /*
626 * Get the inode cluster buffer
627 */
628 if (bp)
629 xfs_buf_relse(bp);
630
631 error = xfs_inotobp(mp, NULL, ino, &dip,
632 &bp, &offset,
633 XFS_IGET_BULKSTAT);
634
635 if (!error)
636 clustidx = offset / mp->m_sb.sb_inodesize;
637 if (XFS_TEST_ERROR(error != 0,
638 mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
639 XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
640 bp = NULL;
641 ubleft = 0;
642 rval = error;
643 break;
644 }
645 }
646 } 465 }
647 ino = XFS_AGINO_TO_INO(mp, agno, agino); 466 ino = XFS_AGINO_TO_INO(mp, agno, agino);
648 bno = XFS_AGB_TO_DADDR(mp, agno, agbno); 467 bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
@@ -658,35 +477,13 @@ xfs_bulkstat(
658 * when the chunk is used up. 477 * when the chunk is used up.
659 */ 478 */
660 irbp->ir_freecount++; 479 irbp->ir_freecount++;
661 if (!xfs_bulkstat_use_dinode(mp, flags, bp,
662 clustidx, &dip)) {
663 lastino = ino;
664 continue;
665 }
666 /*
667 * If we need to do an iget, cannot hold bp.
668 * Drop it, until starting the next cluster.
669 */
670 if ((flags & BULKSTAT_FG_INLINE) && !dip) {
671 if (bp)
672 xfs_buf_relse(bp);
673 bp = NULL;
674 }
675 480
676 /* 481 /*
677 * Get the inode and fill in a single buffer. 482 * Get the inode and fill in a single buffer.
678 * BULKSTAT_FG_QUICK uses dip to fill it in.
679 * BULKSTAT_FG_IGET uses igets.
680 * BULKSTAT_FG_INLINE uses dip if we have an
681 * inline attr fork, else igets.
682 * See: xfs_bulkstat_one & xfs_dm_bulkstat_one.
683 * This is also used to count inodes/blks, etc
684 * in xfs_qm_quotacheck.
685 */ 483 */
686 ubused = statstruct_size; 484 ubused = statstruct_size;
687 error = formatter(mp, ino, ubufp, 485 error = formatter(mp, ino, ubufp, ubleft,
688 ubleft, private_data, 486 &ubused, &fmterror);
689 bno, &ubused, dip, &fmterror);
690 if (fmterror == BULKSTAT_RV_NOTHING) { 487 if (fmterror == BULKSTAT_RV_NOTHING) {
691 if (error && error != ENOENT && 488 if (error && error != ENOENT &&
692 error != EINVAL) { 489 error != EINVAL) {
@@ -778,8 +575,7 @@ xfs_bulkstat_single(
778 */ 575 */
779 576
780 ino = (xfs_ino_t)*lastinop; 577 ino = (xfs_ino_t)*lastinop;
781 error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 578 error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 0, &res);
782 NULL, 0, NULL, NULL, &res);
783 if (error) { 579 if (error) {
784 /* 580 /*
785 * Special case way failed, do it the "long" way 581 * Special case way failed, do it the "long" way
@@ -788,8 +584,7 @@ xfs_bulkstat_single(
788 (*lastinop)--; 584 (*lastinop)--;
789 count = 1; 585 count = 1;
790 if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one, 586 if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one,
791 NULL, sizeof(xfs_bstat_t), buffer, 587 sizeof(xfs_bstat_t), buffer, done))
792 BULKSTAT_FG_IGET, done))
793 return error; 588 return error;
794 if (count == 0 || (xfs_ino_t)*lastinop != ino) 589 if (count == 0 || (xfs_ino_t)*lastinop != ino)
795 return error == EFSCORRUPTED ? 590 return error == EFSCORRUPTED ?
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 20792bf45946..97295d91d170 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -27,10 +27,7 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
27 xfs_ino_t ino, 27 xfs_ino_t ino,
28 void __user *buffer, 28 void __user *buffer,
29 int ubsize, 29 int ubsize,
30 void *private_data,
31 xfs_daddr_t bno,
32 int *ubused, 30 int *ubused,
33 void *dip,
34 int *stat); 31 int *stat);
35 32
36/* 33/*
@@ -41,13 +38,6 @@ typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
41#define BULKSTAT_RV_GIVEUP 2 38#define BULKSTAT_RV_GIVEUP 2
42 39
43/* 40/*
44 * Values for bulkstat flag argument.
45 */
46#define BULKSTAT_FG_IGET 0x1 /* Go through the buffer cache */
47#define BULKSTAT_FG_QUICK 0x2 /* No iget, walk the dinode cluster */
48#define BULKSTAT_FG_INLINE 0x4 /* No iget if inline attrs */
49
50/*
51 * Return stat information in bulk (by-inode) for the filesystem. 41 * Return stat information in bulk (by-inode) for the filesystem.
52 */ 42 */
53int /* error status */ 43int /* error status */
@@ -56,10 +46,8 @@ xfs_bulkstat(
56 xfs_ino_t *lastino, /* last inode returned */ 46 xfs_ino_t *lastino, /* last inode returned */
57 int *count, /* size of buffer/count returned */ 47 int *count, /* size of buffer/count returned */
58 bulkstat_one_pf formatter, /* func that'd fill a single buf */ 48 bulkstat_one_pf formatter, /* func that'd fill a single buf */
59 void *private_data, /* private data for formatter */
60 size_t statstruct_size,/* sizeof struct that we're filling */ 49 size_t statstruct_size,/* sizeof struct that we're filling */
61 char __user *ubuffer,/* buffer with inode stats */ 50 char __user *ubuffer,/* buffer with inode stats */
62 int flags, /* flag to control access method */
63 int *done); /* 1 if there are more stats to get */ 51 int *done); /* 1 if there are more stats to get */
64 52
65int 53int
@@ -82,9 +70,7 @@ xfs_bulkstat_one_int(
82 void __user *buffer, 70 void __user *buffer,
83 int ubsize, 71 int ubsize,
84 bulkstat_one_fmt_pf formatter, 72 bulkstat_one_fmt_pf formatter,
85 xfs_daddr_t bno,
86 int *ubused, 73 int *ubused,
87 void *dibuff,
88 int *stat); 74 int *stat);
89 75
90int 76int
@@ -93,10 +79,7 @@ xfs_bulkstat_one(
93 xfs_ino_t ino, 79 xfs_ino_t ino,
94 void __user *buffer, 80 void __user *buffer,
95 int ubsize, 81 int ubsize,
96 void *private_data,
97 xfs_daddr_t bno,
98 int *ubused, 82 int *ubused,
99 void *dibuff,
100 int *stat); 83 int *stat);
101 84
102typedef int (*inumbers_fmt_pf)( 85typedef int (*inumbers_fmt_pf)(
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index ed0684cc50ee..9ac5cfab27b9 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3198,7 +3198,7 @@ xlog_recover_process_one_iunlink(
3198 int error; 3198 int error;
3199 3199
3200 ino = XFS_AGINO_TO_INO(mp, agno, agino); 3200 ino = XFS_AGINO_TO_INO(mp, agno, agino);
3201 error = xfs_iget(mp, NULL, ino, 0, 0, &ip, 0); 3201 error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
3202 if (error) 3202 if (error)
3203 goto fail; 3203 goto fail;
3204 3204
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d59f4e8bedcf..69f62d8b2816 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1300,7 +1300,7 @@ xfs_mountfs(
1300 * Get and sanity-check the root inode. 1300 * Get and sanity-check the root inode.
1301 * Save the pointer to it in the mount structure. 1301 * Save the pointer to it in the mount structure.
1302 */ 1302 */
1303 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip, 0); 1303 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip);
1304 if (error) { 1304 if (error) {
1305 cmn_err(CE_WARN, "XFS: failed to read root inode"); 1305 cmn_err(CE_WARN, "XFS: failed to read root inode");
1306 goto out_log_dealloc; 1306 goto out_log_dealloc;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1d2c7eed4eda..5761087ee8ea 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -259,7 +259,7 @@ typedef struct xfs_mount {
259 wait_queue_head_t m_wait_single_sync_task; 259 wait_queue_head_t m_wait_single_sync_task;
260 __int64_t m_update_flags; /* sb flags we need to update 260 __int64_t m_update_flags; /* sb flags we need to update
261 on the next remount,rw */ 261 on the next remount,rw */
262 struct list_head m_mplist; /* inode shrinker mount list */ 262 struct shrinker m_inode_shrink; /* inode reclaim shrinker */
263} xfs_mount_t; 263} xfs_mount_t;
264 264
265/* 265/*
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 16445518506d..a2d32ce335aa 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -2277,12 +2277,12 @@ xfs_rtmount_inodes(
2277 sbp = &mp->m_sb; 2277 sbp = &mp->m_sb;
2278 if (sbp->sb_rbmino == NULLFSINO) 2278 if (sbp->sb_rbmino == NULLFSINO)
2279 return 0; 2279 return 0;
2280 error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip, 0); 2280 error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip);
2281 if (error) 2281 if (error)
2282 return error; 2282 return error;
2283 ASSERT(mp->m_rbmip != NULL); 2283 ASSERT(mp->m_rbmip != NULL);
2284 ASSERT(sbp->sb_rsumino != NULLFSINO); 2284 ASSERT(sbp->sb_rsumino != NULLFSINO);
2285 error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip, 0); 2285 error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip);
2286 if (error) { 2286 if (error) {
2287 IRELE(mp->m_rbmip); 2287 IRELE(mp->m_rbmip);
2288 return error; 2288 return error;
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 785ff101da0a..2559dfec946b 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -62,7 +62,7 @@ xfs_trans_iget(
62{ 62{
63 int error; 63 int error;
64 64
65 error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp, 0); 65 error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp);
66 if (!error && tp) 66 if (!error && tp)
67 xfs_trans_ijoin(tp, *ipp, lock_flags); 67 xfs_trans_ijoin(tp, *ipp, lock_flags);
68 return error; 68 return error;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index a06bd62504fc..c1646838898f 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1269,7 +1269,7 @@ xfs_lookup(
1269 if (error) 1269 if (error)
1270 goto out; 1270 goto out;
1271 1271
1272 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0); 1272 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
1273 if (error) 1273 if (error)
1274 goto out_free_name; 1274 goto out_free_name;
1275 1275